In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, Subset

In [2]:
data = np.load('./Mnist_10000._samples.npy')

In [3]:
# Convert the structured array to PyTorch tensors
images = torch.tensor([item['image'] for item in data], dtype=torch.float32)
labels = torch.tensor([item['label'] for item in data], dtype=torch.long)

# Flatten the images for a simple fully connected network
images = images.view(images.shape[0], -1)

print(len(images))

# Create a dataset and data loader
dataset = TensorDataset(images, labels)
#loader = DataLoader(dataset, batch_size=10, shuffle=False)

10000


  images = torch.tensor([item['image'] for item in data], dtype=torch.float32)


In [4]:
class_indices = {k: [] for k in range(10)}  # Assuming 10 classes (0-9)
for idx, (image, label) in enumerate(dataset):
    class_indices[label.item()].append(idx)

# Step 2: Randomly select 10 samples from each class
train_indices = []
for indices in class_indices.values():
    train_indices.extend(np.random.choice(indices, 10, replace=False))

# Create a mask for the rest of the data for testing
mask = np.ones(len(dataset), dtype=bool)
mask[train_indices] = False
test_indices = np.arange(len(dataset))[mask]

# Step 3: Create training and testing subsets
train_subset = Subset(dataset, train_indices)
test_subset = Subset(dataset, test_indices)

In [5]:
# Step 4: Create DataLoaders
train_loader = DataLoader(train_subset, batch_size=10, shuffle=True)
test_loader = DataLoader(test_subset, batch_size=10, shuffle=False)


print("Number of samples in test set:", len(test_subset))
print("Number of batches in test loader:", len(test_loader))

Number of samples in test set: 9900
Number of batches in test loader: 990


In [6]:
class Net(nn.Module):

    def __init__(self):

        super(Net,self).__init__()

        self.fullyConnectedLayer = nn.Sequential(
            nn.Linear(784, 200),
            nn.ReLU(),
            nn.Linear(200,10)
        )

    def forward(self, input):

        output = self.fullyConnectedLayer(input)
        activatedOutput = F.log_softmax(output, dim = 1)

        return activatedOutput
    

In [7]:
baseModel = Net()

In [8]:
import torch.optim as optim


# Loss function and optimizer
criterion = nn.NLLLoss()
optimizer = optim.SGD(baseModel.parameters(), lr=0.01)


def train(model, loader, criterion, optimizer, epochs = 1000):

    # Training loop
    for epoch in range(epochs):
        for images, labels in loader:
            
            optimizer.zero_grad()      # Zero the gradient buffers
            output = model(images)     # Pass the batch through the network
            loss = criterion(output, labels) # Calculate the loss
            loss.backward()            # Backpropagate
            optimizer.step()           # Update weights

        print(f'Epoch {epoch+1}, Loss: {loss.item()}')

train(baseModel, train_loader, criterion, optimizer, 10)


Epoch 1, Loss: 2.303950786590576
Epoch 2, Loss: 2.3042502403259277
Epoch 3, Loss: 2.298736095428467
Epoch 4, Loss: 2.299814462661743
Epoch 5, Loss: 2.297729730606079
Epoch 6, Loss: 2.305091619491577
Epoch 7, Loss: 2.2942588329315186
Epoch 8, Loss: 2.2795193195343018
Epoch 9, Loss: 2.3095154762268066
Epoch 10, Loss: 2.314253330230713


In [19]:
def calculate_accuracy(loader, model):
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return 100 * correct / total

train_accuracy = calculate_accuracy(train_loader, baseModel)
test_accuracy = calculate_accuracy(test_loader, baseModel)

print(f'Training Accuracy: {train_accuracy}%')
print(f'Testing Accuracy: {test_accuracy}%')

Training Accuracy: 13.0%
Testing Accuracy: 10.181818181818182%


## Baseline Performance for Two Moons Dataset

In [37]:
selected_samples = np.load('./selected_samples.npy')

remaining_samples = np.load('./remaining_dataset.npy')

# Converting the selected samples and remaining samples into PyTorch tensors
selected_samples_tensor = torch.tensor(selected_samples, dtype=torch.float32)
remaining_samples_tensor = torch.tensor(remaining_samples, dtype=torch.float32)

# Extracting features and labels for both datasets
features_selected = selected_samples_tensor[:, :2]
labels_selected = selected_samples_tensor[:, 2].long()  # converting labels to long for classification

features_remaining = remaining_samples_tensor[:, :2]
labels_remaining = remaining_samples_tensor[:, 2].long()

# Creating TensorDatasets
selected_dataset = TensorDataset(features_selected, labels_selected)
remaining_dataset = TensorDataset(features_remaining, labels_remaining)

# Creating DataLoaders
selected_loader = DataLoader(selected_dataset, batch_size=1)  # small batch size for the small dataset
remaining_loader = DataLoader(remaining_dataset, batch_size=10)  # larger batch size for the larger dataset

In [42]:
class TwoMoonsNet(nn.Module):

    def __init__(self):

        super(TwoMoonsNet,self).__init__()

        self.fullyConnectedLayer = nn.Sequential(
            nn.Linear(2, 10),
            nn.ReLU(),
            nn.Linear(10,2)
        )

    def forward(self, input):

        output = self.fullyConnectedLayer(input)
        activatedOutput = F.log_softmax(output, dim = 1)

        return activatedOutput

In [46]:
model2 = TwoMoonsNet()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model2.parameters(), lr=0.01)

In [47]:
train(model2,selected_loader, criterion, optimizer, 5)

Epoch 1, Loss: 0.5281192064285278
Epoch 2, Loss: 0.5223095417022705
Epoch 3, Loss: 0.5162363648414612
Epoch 4, Loss: 0.509925901889801
Epoch 5, Loss: 0.5034029483795166


In [48]:
train_accuracy = calculate_accuracy(selected_loader, model2)
test_accuracy = calculate_accuracy(remaining_loader, model2)

print(f'Training Accuracy: {train_accuracy}%')
print(f'Testing Accuracy: {test_accuracy}%')


Training Accuracy: 83.33333333333333%
Testing Accuracy: 71.27659574468085%
