In [10]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split



2023-08-01 02:55:20.230902: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-08-01 02:55:20.282918: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [11]:
(train_pool_X, train_pool_y), (test_X, test_y) = tf.keras.datasets.mnist.load_data()
train_X, Pool_X, train_y,  Pool_y = train_test_split(train_pool_X, train_pool_y, train_size=50, random_state=42)
print(train_X.shape, Pool_X.shape, train_y.shape, Pool_y.shape, test_X.shape, test_y.shape)
train_pool_X = train_pool_X.reshape(train_pool_X.shape[0], -1)
train_X = train_X.reshape(train_X.shape[0], -1)
test_X = test_X.reshape(test_X.shape[0], -1)
Pool_X = Pool_X.reshape(Pool_X.shape[0], -1)
print(train_pool_X.shape, train_X.shape, test_X.shape, Pool_X.shape)

(50, 28, 28) (59950, 28, 28) (50,) (59950,) (10000, 28, 28) (10000,)
(60000, 784) (50, 784) (10000, 784) (59950, 784)


In [12]:
class SimpleNN(nn.Module):
    def __init__(self,input_size,hidden_size,num_classes):
        super(SimpleNN,self).__init__()
        self.fc1 = nn.Linear(input_size,hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size,num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self,x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
        return out


In [13]:
def mc_dropout_predictions(model, X, num_samples=10):
    model.train()
    predictions = []
    with torch.no_grad():
        for _ in range(num_samples):
            output = model(X)
            predictions.append(output.softmax(dim=1).cpu().numpy())
    return np.array(predictions)

In [14]:
def entropy(p):
    return -np.sum(p * np.log2(p), axis=1)

In [15]:
def calculate_accuracy(model, test_X, test_y):
    model.eval() 
    with torch.no_grad():
        test_X_tensor = torch.Tensor(test_X).to(torch.float32)
        outputs = model(test_X_tensor)
        _, predicted_labels = torch.max(outputs, 1)
        correct = (predicted_labels == torch.Tensor(test_y)).sum().item()
        total = len(test_y)
        accuracy = correct / total
        print('Accuracy: %.2f' % (accuracy*100))
    return accuracy

In [16]:
def active_learning_mc_dropout(train_X, train_y, pool_X_train, pool_y_train, pool_X_test, y_test, num_iterations, num_samples=10):
    model=SimpleNN(dropout_prob=0.3)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    accuracy_list = []
    new_indices = []
    for i in range(num_iterations):
        model.train()
        optimizer.zero_grad() 
        output = mc_dropout_predictions(model,torch.Tensor(pool_X_train), num_samples)
        uncertainty = np.var(output, axis=0).sum(axis=1)
        new_index = np.argsort(uncertainty)[-1:]
        new_indices.append(new_index)
        label_X = pool_X_train[new_index]
        label_y = pool_y_train[new_index]
        train_X = np.concatenate((train_X, label_X), axis=0)
        train_y = np.concatenate((train_y, label_y), axis=0)
        pool_X_train = np.delete(pool_X_train, new_index, axis=0)
        pool_y_train = np.delete(pool_y_train, new_index)
        train_X_tensor = torch.Tensor(train_X)
        train_y_tensor = torch.Tensor(train_y)
        output = model(train_X_tensor)
        loss = criterion(output, train_y_tensor)
        loss.backward()
        optimizer.step()
        accuracy = calculate_accuracy(model, pool_X_test, y_test)
        accuracy_list.append(accuracy)
    
    return  accuracy_list,new_indices       

In [17]:
accuracy,new_indices_list_mc_dropout = active_learning_mc_dropout(train_X, train_y, pool_X, pool_y, test_X, test_y, num_iterations, num_samples_mc_dropout)

NameError: name 'pool_X' is not defined