# Challenge

## Instructions

The goal of the challenge is to classify a tabular dataset with *1024 input features* and separated in *10 classes*. The train and test set are provided.

The classification score is computed on the test set using the Categorical Cross Entropy loss. An implementation is provided in the `classification_score` function. Of course this function should be used after the model training finished!

The best score in the litterature for this dataset is **Loss = 0.15** and **Accuracy = 0.95**. 

The objective is to qualify your methodology, expertise and creativity.

You will present your results within the jupyter notebook.

Your code should be written in **Pytorch**.

Tips: Tabular data are unstructured by definition. Finding the underlying structure will strongly enhance your results.

## Common Functions

In [1]:
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import networkx as nx
import random
import copy

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision import transforms
import torch.nn.functional as F



In [27]:
def train(trainloader, model):

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())

    # Training loop
    for epoch in range(50):
        running_loss = 0.0
        for i, data in enumerate(trainloader):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        print("Training loss ",epoch,": ",running_loss/len(trainloader))
        
    return model

def save(model, model_path):
    
    torch.save(model, model_path)
        
def get_train_accuracy(model, trainloader):
    
    correct = 0
    total = 0
    with torch.no_grad():
        for data in trainloader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct/total 

def get_test_accuracy(model, testloader):
    
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct/total

def classification_score(model, x_test, y_test):
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    x_test = x_test.to(device) 
    y_test = y_test.to(device)

    # Get predictions
    y_pred = model(x_test)

    # Calculate loss
    criterion = nn.CrossEntropyLoss()
    loss = criterion(y_pred, y_test)

    # Calculate accuracy
    acc = (y_pred.argmax(dim=1) == y_test).float().mean()

    print('Loss: {:.3f}, Accuracy: {:.3f}'.format(loss.item(), acc))

    return loss.item(), acc.item()


## PyTorch Classes

In [3]:
class MLP(nn.Module):
    
    def __init__(self, input_dim=1024, num_classes=10):
        
        super().__init__()
        
        self.layer1 = nn.Linear(input_dim, 512) 
        self.bn1 = nn.BatchNorm1d(512)
        self.dropout1 = nn.Dropout(p=0.2)
        
        self.layer2 = nn.Linear(512, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.dropout2 = nn.Dropout(p=0.2)
        
        self.layer3 = nn.Linear(128, 64)
        self.bn3 = nn.BatchNorm1d(64)
        self.dropout3 = nn.Dropout(p=0.2)

        self.layer4 = nn.Linear(64, 32)
        self.bn4 = nn.BatchNorm1d(32)
        self.dropout4 = nn.Dropout(p=0.2)
        
        self.layer5 = nn.Linear(32, num_classes)
        
    def forward(self, x):
        
        x = F.relu(self.layer1(x)) 
        x = self.bn1(x)
        x = self.dropout1(x)
        
        x = F.relu(self.layer2(x))
        x = self.bn2(x)
        x = self.dropout2(x)
        
        x = F.relu(self.layer3(x))
        x = self.bn3(x)
        x = self.dropout3(x)
        
        x = F.relu(self.layer4(x))
        x = self.bn4(x)
        x = self.dropout4(x)

        x = F.softmax(self.layer5(x), dim=1)
        
        return x

In [4]:
class CNN(nn.Module):
    
    def __init__(self):
        
        super(CNN, self).__init__()
    
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.pool1 = nn.AvgPool2d(2, 2) 
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1) 
        self.pool2 = nn.AvgPool2d(2, 2)
        
        self.fc1 = nn.Linear(32 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 10)
        

    def forward(self, x):
        
        x = F.relu(self.conv1(x))
        x = self.pool1(x) 
        
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        
        x = x.view(-1, 32 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        return x
    

In [5]:
class CNNPermInvTrial(nn.Module):
    
    def __init__(self):

        super(CNNPermInvTrial, self).__init__()
       
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.pool1 = nn.AvgPool2d(2, 2)
        
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.pool2 = nn.AvgPool2d(2, 2)

        self.gap = nn.AdaptiveAvgPool2d((1, 1))
        
        self.fc1 = nn.Linear(32, 128)
        self.fc2 = nn.Linear(128, 10)
        

    def forward(self, x):
        
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        
        x = self.gap(x)
        
        x = x.view(-1, 32)
        
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        return x

## Experiments Part 1: Baseline

**MLP MODEL - DATASET AND MODEL PREPARATION**

In [6]:
x_train = np.load('X_train.npy')
X_train = np.reshape(x_train, (50000, 1024))
y_train = np.load('Y_train.npy')
y_train = np.reshape(y_train, (50000, ))
x_test = np.load('X_test.npy')
X_test = np.reshape(x_test, (10000, 1024))
y_test = np.load('Y_test.npy')
y_test = np.reshape(y_test, (10000, ))

X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train).long() 
    
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=32)

X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test).long()

test_dataset = torch.utils.data.TensorDataset(X_test, y_test)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=32)

model = MLP() 
model = torch.load('Models/original_50.pth')

**MLP MODEL - EVALUATION**

In [7]:
train_acc = get_train_accuracy(model, trainloader)
test_acc = get_test_accuracy(model, testloader)
print('\nTrain Accuracy: ',train_acc,'\nTest Accuracy: ',test_acc)


Train Accuracy:  0.32166 
Test Accuracy:  0.3134


**CNN MODEL - DATASET AND MODEL PREPARATION**

In [8]:
x_train = np.load('X_train.npy')
X_train = np.reshape(x_train, (50000, 32, 32))
y_train = np.load('Y_train.npy')
y_train = np.reshape(y_train, (50000, ))
x_test = np.load('X_test.npy')
X_test = np.reshape(x_test, (10000, 32, 32))
y_test = np.load('Y_test.npy')
y_test = np.reshape(y_test, (10000, ))

X_train = np.expand_dims(X_train, axis=1)
X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train).long() 
    
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=32)

X_test = np.expand_dims(X_test, axis=1)
X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test).long()

test_dataset = torch.utils.data.TensorDataset(X_test, y_test)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=32)

model_100epoch, model_250epoch = CNN(), CNN()
model_100epoch = torch.load('Models/collective_100.pth')
model_250epoch = torch.load('Models/collective_250.pth')

**CNN MODEL - EVALUATION**

In [9]:
train_acc1 = get_train_accuracy(model_100epoch, trainloader)
test_acc1 = get_test_accuracy(model_100epoch, testloader)
print('\nTrain Accuracy - 100 Epochs: ',train_acc1,'\nTest Accuracy - 100 Epochs: ',test_acc1)

train_acc2 = get_train_accuracy(model_250epoch, trainloader)
test_acc2 = get_test_accuracy(model_250epoch, testloader)
print('\nTrain Accuracy - 250 Epochs: ',train_acc2,'\nTest Accuracy - 250 Epochs: ',test_acc2)


Train Accuracy - 100 Epochs:  0.35866 
Test Accuracy - 100 Epochs:  0.2735

Train Accuracy - 250 Epochs:  0.4528 
Test Accuracy - 250 Epochs:  0.3179


**CNN (GLOBAL AVERAGE POOLING) - TRAINING DEMONSTRATED**

In [12]:
model = CNNPermInvTrial()
train(trainloader, model)
save(model, 'Models/perminv_50.pth')

Training loss  0 :  2.177089918369066
Training loss  1 :  2.132926947172071
Training loss  2 :  2.126747092831539
Training loss  3 :  2.1203662581117353
Training loss  4 :  2.1056951384321665
Training loss  5 :  2.0836199610491097
Training loss  6 :  2.0721624943970567
Training loss  7 :  2.066424259373719
Training loss  8 :  2.0622528930238175
Training loss  9 :  2.058015494032388
Training loss  10 :  2.0535742544014335
Training loss  11 :  2.048881127829744
Training loss  12 :  2.043821997087275
Training loss  13 :  2.038518985265047
Training loss  14 :  2.0328484679824332
Training loss  15 :  2.026864065166017
Training loss  16 :  2.020539047850757
Training loss  17 :  2.014266214001583
Training loss  18 :  2.0076106168181944
Training loss  19 :  2.0010501175298954
Training loss  20 :  1.9941367603461864
Training loss  21 :  1.9873234507981128
Training loss  22 :  1.9804210754365243
Training loss  23 :  1.9738631937188058
Training loss  24 :  1.9669915147874115
Training loss  25 :  

**CNN (GLOBAL AVERAGE POOLING) - MODEL EVALUATION**

In [14]:
model = torch.load('Models/perminv_50.pth')
train_acc = get_train_accuracy(model, trainloader)
test_acc = get_test_accuracy(model, testloader)
print('\nTrain Accuracy: ',train_acc,'\nTest Accuracy: ',test_acc)


Train Accuracy:  0.33 
Test Accuracy:  0.3238


## Experiments Part 2: Failed but Different Approaches

**DATA EXPLORATION AND PREPARATION UTILITY FUNCTIONS**

In [16]:
np.random.seed(42)

def standardize_dataset(dataset, mean=None, std=None):
    
    # Calculate mean and standard deviation across the entire dataset
    if mean==None and std==None:
        mean = np.mean(dataset)
        std = np.std(dataset)
    
    # Standardize the dataset
    standardized_dataset = (dataset - mean) / std
    
    return standardized_dataset, mean, std


def reassign(x_train):
    
    x_train[(x_train < -1.0)] = -2.0
    x_train[(x_train > -1.0) & (x_train < 1)] = 0.0
    x_train[(x_train > 1)] = 2.0
    
    return x_train

def plot(data, index):
    
    plt.imshow(data[index], cmap='gray')  
    plt.axis('off')  
    plt.show()
    
def plot_single(image):
    
    plt.imshow(image, cmap='gray')  
    plt.axis('off')  
    plt.show()
    
def get_separate_classes(data, target):
    
    max_element = target.max()
    dict_ind = {i:[] for i in range(max_element+1)}
    for i in range(max_element+1):
        indices = np.where(target == i)[0]
        dict_ind[i] = data[indices]
        
    return dict_ind


def plot_sequence_distribution(dataset, num_buckets=10):
    
    # Determine the range of values across all sequences
    min_value = min(min(seq) for seq in dataset)
    max_value = max(max(seq) for seq in dataset)
    
    # Define bucket edges
    bucket_edges = np.linspace(min_value, max_value, num_buckets + 1)
    
    # Initialize histogram counts for each bucket
    hist_counts = np.zeros((len(dataset), num_buckets))
    
    # Count values falling into each bucket for each sequence
    for i, seq in enumerate(dataset):
        hist, _ = np.histogram(seq, bins=bucket_edges)
        hist_counts[i] = hist
    
    # Plot distribution of sequence values
    for hist in hist_counts:
        plt.plot(bucket_edges[:-1], hist, marker='o')
    
    plt.xlabel('Value')
    plt.ylabel('Frequency')
    plt.title('Distribution of Sequence Values')
    plt.grid(True)
    plt.show()
    
    
def image_to_graph(image):
    
    # Create an empty graph
    graph = nx.Graph()
    
    # Get image dimensions
    height, width = image.shape
    
    # Add nodes to the graph
    for i in range(height):
        for j in range(width):
            node_id = i * width + j
            graph.add_node(node_id, pixel_value=image[i, j])
    
    # Add edges between adjacent pixels
    for i in range(height):
        for j in range(width):
            current_node_id = i * width + j
            # Check neighboring pixels
            for ni in range(i - 1, i + 2):
                for nj in range(j - 1, j + 2):
                    # Ensure neighbor is within image boundaries and not the same as the current pixel
                    if 0 <= ni < height and 0 <= nj < width and (ni != i or nj != j):
                        neighbor_node_id = ni * width + nj
                        # Calculate edge weight as absolute difference between pixel values
                        edge_weight = abs(image[i, j] - image[ni, nj])
                        # Add edge to the graph
                        graph.add_edge(current_node_id, neighbor_node_id, weight=edge_weight)
    
    return graph

def display_graph(graph):
    
    # Draw the graph
    plt.figure(figsize=(8, 8))
    pos = nx.spring_layout(graph)  # Position nodes using the spring layout algorithm
    nx.draw(graph, pos, with_labels=False, node_size=10, edge_color='gray', alpha=0.7)
    
    # Display edge weights
    labels = nx.get_edge_attributes(graph, 'weight')
    nx.draw_networkx_edge_labels(graph, pos, edge_labels=labels)
    
    # Show the plot
    plt.title('Graph Representation of Image')
    plt.show()    
    
def select_coordinates(image, c_start, c_end, c_size, threshold):
    
    # Get coordinates of pixels with values less than the threshold
    filtered_indices = np.where(image < threshold)
    filtered_x, filtered_y = filtered_indices
    
    # Randomly select c_size coordinates from the filtered coordinates
    selected_indices = np.random.choice(len(filtered_x), size=c_size, replace=False)
    selected_x = filtered_x[selected_indices]
    selected_y = filtered_y[selected_indices]
    
    return selected_x, selected_y


def local_permute_pixels(image):

    img_height, img_width = image.shape[:2]
  
    c_size = 20 #img_height//2
    
    c_start = 6 #img_height//2 - c_size//2
    c_end = 26   #c_start + c_size
  
    b_top = 0
    b_bottom = img_height
    b_left = 0
    b_right = img_width
    
    #central_x_coordinates, central_y_coordinates = select_coordinates(image, c_start, c_end, c_size, 0)

    central_x_coordinates = np.random.choice(np.arange(c_start, c_end), size=c_size, replace=False)
    central_y_coordinates = np.random.choice(np.arange(c_start, c_end), size=c_size, replace=False)
    
    leftperi_x_coordinates = np.random.choice(np.arange(b_left, c_start), size=c_size//4, replace=False)
    leftperi_y_coordinates = np.random.choice(np.arange(b_top, b_bottom), size=c_size//4, replace=False)
    
    rightperi_x_coordinates = np.random.choice(np.arange(c_end, b_right), size=c_size//4, replace=False)
    rightperi_y_coordinates = np.random.choice(np.arange(b_top, b_bottom), size=c_size//4, replace=False)
    
    topperi_x_coordinates = np.random.choice(np.arange(b_left, b_right), size=c_size//4, replace=False)
    topperi_y_coordinates = np.random.choice(np.arange(b_top, c_start), size=c_size//4, replace=False)
    
    bottomperi_x_coordinates = np.random.choice(np.arange(b_left, b_right), size=c_size//4, replace=False)
    bottomperi_y_coordinates = np.random.choice(np.arange(c_end, b_bottom), size=c_size//4, replace=False)
    
    central = np.array([central_x_coordinates, central_y_coordinates]).T
    
    left_peripheral = np.array([leftperi_x_coordinates, leftperi_y_coordinates]).T
    right_peripheral = np.array([rightperi_x_coordinates, rightperi_y_coordinates]).T
    top_peripheral = np.array([topperi_x_coordinates, topperi_y_coordinates]).T
    bottom_peripheral = np.array([bottomperi_x_coordinates, bottomperi_y_coordinates]).T
    
    peripheral_stacked = np.stack((left_peripheral, right_peripheral, top_peripheral, bottom_peripheral),axis=0)
    peripheral = np.reshape(peripheral_stacked, (c_size, 2))
    
    return central, peripheral


def get_new_image(image, central, peripheral):
    
    img_height, img_width = image.shape[:2]
  
    c_size = 20 #img_height//2
    
    c_start = 6 #img_height//2 - c_size//2
    c_end = 26   #c_start + c_size
  
    new_image = copy.deepcopy(image)
    new_image[peripheral] = image[central]
    new_image[c_start:c_end, c_start:c_end] = 0.0

    return new_image

def assign_pixel_values_closest_match(image, central_coords, peripheral_coords):
    
    img_height, img_width = image.shape[:2]
  
    c_size = 20 #img_height//2
    
    c_start = 6 #img_height//2 - c_size//2
    c_end = 26   #c_start + c_size
    
    new_image = np.copy(image)  # Create a copy of the peripheral image
    
    for p_coord in peripheral_coords:
        min_distance = float('inf')
        closest_c_coord = None
        
        # Calculate Euclidean distance between the peripheral coordinate and all central coordinates
        for c_coord in central_coords:
            distance = np.linalg.norm(p_coord - c_coord)
            if distance < min_distance:
                min_distance = distance
                closest_c_coord = c_coord
        
        # Assign pixel value from the closest central coordinate to the peripheral coordinate
        p_x, p_y = p_coord
        c_x, c_y = closest_c_coord
        new_image[p_x, p_y] = image[c_x, c_y]
        new_image[c_start:c_end, c_start:c_end] = 0
    
    return new_image

def perturb_dataset(original_X):

    X_train_smallprt = {i:[] for i in range(10)}

    for i in range(10):

        unperturbed_matrices = original_X[i]
        perturbed_matrices = []

        for image_no in range(unperturbed_matrices.shape[0]):

            img = unperturbed_matrices[image_no]
            new_img = get_new_image(img, central, peripheral)
            perturbed_matrices.append(new_img)

        X_train_smallprt[i] = np.array(perturbed_matrices)
        
    return X_train_smallprt


def prepare_dataset_for_CNN(X_separated):
    
    X_list = []
    y_list = []
    
    for key in range(10):
        
        for image_no in range(X_separated[key].shape[0]):
            
            img = np.reshape(X_separated[key][image_no], (1024,))
            X_list.append(img)
            y_list.append(key)
        
    X_arr = np.array(X_list)
    y_arr = np.array(y_list)
    
    total_num = len(y_list)
    X = np.reshape(X_arr, (total_num, 32, 32))
    y = np.reshape(y_arr, (total_num,))
    
    return X, y
    

**STANDARDIZED AND PERTURBED DATA LOADING AND BEST BASELINE MODEL PREPARATION**

In [19]:
X_train = np.load('X_train_smallprt0.npy')
X_test = np.load('X_test_smallprt0.npy')
y_train = np.load('y_train_smallprt0.npy')
y_test = np.load('y_test_smallprt0.npy')

X_train = np.expand_dims(X_train, axis=1)
X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train).long() 
    
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=32)

X_test = np.expand_dims(X_test, axis=1)
X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test).long()

test_dataset = torch.utils.data.TensorDataset(X_test, y_test)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=32)

model = CNN()
model = torch.load('Models/collective_250.pth')

**CNN 250 EPOCH - MODEL EVALUATION ON PERTURBED DATA**

In [20]:
train_acc = get_train_accuracy(model, trainloader)
test_acc = get_test_accuracy(model, testloader)
print('\nTrain Accuracy: ',train_acc,'\nTest Accuracy: ',test_acc)


Train Accuracy:  0.4582 
Test Accuracy:  0.3179


**DATA AND MODEL LOADING FOR MoE TRIAL (MIXTURE OF EXPERTS)**

In [23]:
x_train = np.load('X_train.npy')
X_train = np.reshape(x_train, (50000, 32, 32))
y_train = np.load('Y_train.npy')
y_train = np.reshape(y_train, (50000, ))
x_test = np.load('X_test.npy')
X_test = np.reshape(x_test, (10000, 32, 32))
y_test = np.load('Y_test.npy')
y_test = np.reshape(y_test, (10000, ))

X_train = np.expand_dims(X_train, axis=1)
X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train).long() 
    
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=32)

X_test = np.expand_dims(X_test, axis=1)
X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test).long()

test_dataset = torch.utils.data.TensorDataset(X_test, y_test)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=32)

loaded_models = []
for m in range(10):
    
    model = CNN()
    mname = 'Models/m'+str(m)+'.npy'
    model.load_state_dict(torch.load(mname))
    loaded_models.append(model)
    
def classify_image(image, model):
    
    # Convert the image to torch tensor and add batch dimension
    image = torch.tensor(image).unsqueeze(0)
    
    # Set model to evaluation mode
    model.eval()
    
    # Forward pass
    with torch.no_grad():
        
        output = model(image)
        starkness = torch.max(output) - torch.min(output)
        max_logit = torch.max(output)
        idx = torch.where(output == max_logit)[1]
        predicted_label = idx[0]
        
    return starkness, max_logit, predicted_label

**MoE - EVALUATION**

In [24]:
#training evaluation
train_length = X_train.shape[0]
train_correct = 0
for image_no in range(train_length):
    
    image = X_train[image_no]
    actual_label = y_train[image_no]
    
    logits = []
    predicted_labels = []
    
    for model in loaded_models:
        
        _, max_logit, pred = classify_image(image, model)
        stk_item = max_logit.item()
        pred_item = pred.item()
        
        logits.append(stk_item)
        predicted_labels.append(pred_item)
        
    most_certain_model = logits.index(max(logits))
    chosen_class = predicted_labels[most_certain_model]
    
    if int(chosen_class) == int(actual_label):
        
        train_correct += 1
        
        
print('Training Accuracy %: ', (train_correct/train_length)*100)    

# testing evaluation
test_length = X_test.shape[0]
test_correct = 0
for image_no in range(test_length):
    
    image = X_test[image_no]
    actual_label = y_test[image_no]
    
    starkness = []
    predicted_labels = []
    
    for model in loaded_models:
        
        stk, _, pred = classify_image(image, model)
        stk_item = stk.item()
        pred_item = pred.item()
        
        starkness.append(stk_item)
        predicted_labels.append(pred_item)
        
    most_certain_model = starkness.index(max(starkness))
    chosen_class = predicted_labels[most_certain_model]
    
    if chosen_class == actual_label:
        
        test_correct += 1        
        
print('Testing Accuracy %: ', (test_correct/test_length)*100) 

  image = torch.tensor(image).unsqueeze(0)
[W NNPACK.cpp:51] Could not initialize NNPACK! Reason: Unsupported hardware.


Training Accuracy %:  10.0
Testing Accuracy %:  10.03


## Thoughts behind attempted approaches and what might've worked better...

In [28]:
#Summarising best model so far (which, tbh, seriously needs improvment)

x_train = np.load('X_train.npy')
X_train = np.reshape(x_train, (50000, 32, 32))
y_train = np.load('Y_train.npy')
y_train = np.reshape(y_train, (50000, ))
x_test = np.load('X_test.npy')
X_test = np.reshape(x_test, (10000, 32, 32))
y_test = np.load('Y_test.npy')
y_test = np.reshape(y_test, (10000, ))

X_train = np.expand_dims(X_train, axis=1)
X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train).long() 

X_test = np.expand_dims(X_test, axis=1)
X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test).long()

model = CNN()
model = torch.load('Models/collective_250.pth')
loss, acc = classification_score(model, X_test, y_test)

Loss: 11.733, Accuracy: 0.318


**NOTE:**
In these series of approaches, I have not put my focus on the neurel network architecture, but rather focused on pre-processing of the data, exploring the effect of permutation and the failure of pooling functions (both max and average) due to the image pixel scrambling. And i've also focus one approach on attempting a mixture of experts system, which failed. 

**Approaches from Part I -** 

The best approach from Part I was from a simple CNN model trained over data restructured into images. it performed better than the MLP model - translated from the TF code to PyTorch code, and surprisingly, better than than the CNN model with global average pooling which I believed would yield better results due to increased ability of maintaining permutation equivariance. 

Loss and Accuracy of the best model - 

    Loss: 11.733
    Accuracy: 0.318

**Approaches from Part II -** 

The first approach from part II was a data pre-processing approach where the images were extremized to bring more discrete sections of sub-images. The nest part of the approach was perturbing some of the pixel values in such a way that at least one perturbation may bring the images closer to the original unciphered images. In retrospect, I doubt this was a good use of time since the whole point of the cipher was to protect the data and classify without unscrambling it.  

The second approach was to train ten models (for each class) on their own corresponding set of images with a few added from the other nine classes (this is also more computationally efficient). And each of these models are treated as experts in their class. Then during evaluation, these ten models are each given a vote on which class a specific image belongs to, and the prediction with the maximum logit-based confidence is chosen. This method also failed with an unacceptably low accuracy. 

**WHAT WAS LEARNT...**

Thinking about this from first-principles - when an image has it's pixels permuted, the aggregate high-level patterns disappear, and what remains constant is the overall distribution of the pixels in the image. Now if the permutation is the same across all images, then per sub-block of image, there should still be similar sub-distributions of pixel values. However, when a closer look was taken at the images, there was no tangible similarity in many of these images. And upon reading a bit about the original CIFAR data, this is my conclusion; the objects in the pictures are in quite different orientations, different levels of zoomification, and different contrast settings. While I haven't thought about a way to get around the orientation and zoomification problem yet, the third part - contrast settings perhaps be worked with. If all images were normalised to a low contrast standardised between -0.5 and 0.5 (-1 and 1, whichever yields better) and then two models were trained - one on this described tranform and another on the inverse, and then a higher confidenec score was selected, it may yield better results (but then again it may not, from last seven hour's experience). 

**PAST OPEN-SOURCE IMPLEMENTED RESEARCH I WOULD ATTEMPT IF TIME PERMITTED...**

1. https://openreview.net/pdf?id=eL1iX7DMnPI 
2. https://arxiv.org/pdf/2001.07761.pdf

## IMPLEMENTATION IN TF KERAS
**This was not simulated on my system, since I ran into the problem of my Jupyter kernel crashing each time.**

In [21]:
# Exemple in Keras
from tensorflow.keras.models import Sequential 
from tensorflow.keras import layers 
from tensorflow.keras.callbacks import EarlyStopping 

def simple_mlp_exemple_in_keras(x_train, y_train):
    
    model = Sequential() 
    model.add(layers.Dense(512, kernel_initializer = 'normal', activation = 'relu', input_shape = (1024,))) 
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(128, activation = 'relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(64, activation = 'relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(32, activation = 'relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(10, activation = 'softmax'))

    model.compile(
       loss = 'categorical_crossentropy', 
       optimizer = "adam", 
       metrics = ['accuracy']
    )
    
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
    
    history = model.fit(
       x_train, y_train,    
       batch_size=128, 
       epochs = 50, 
       verbose = 1, 
       validation_split = 0.2,
       callbacks=[es]
    )
    return model

model = simple_mlp_exemple_in_keras(x_train, y_train)

'\n# Exemple in Keras\nfrom tensorflow.keras.models import Sequential \nfrom tensorflow.keras import layers \nfrom tensorflow.keras.callbacks import EarlyStopping \n\ndef simple_mlp_exemple_in_keras(x_train, y_train):\n    \n    model = Sequential() \n    model.add(layers.Dense(512, kernel_initializer = \'normal\', activation = \'relu\', input_shape = (1024,))) \n    model.add(layers.BatchNormalization())\n    model.add(layers.Dropout(0.2))\n    model.add(layers.Dense(128, activation = \'relu\'))\n    model.add(layers.BatchNormalization())\n    model.add(layers.Dropout(0.2))\n    model.add(layers.Dense(64, activation = \'relu\'))\n    model.add(layers.BatchNormalization())\n    model.add(layers.Dropout(0.2))\n    model.add(layers.Dense(32, activation = \'relu\'))\n    model.add(layers.BatchNormalization())\n    model.add(layers.Dropout(0.2))\n    model.add(layers.Dense(10, activation = \'softmax\'))\n\n    model.compile(\n       loss = \'categorical_crossentropy\', \n       optimizer =

## Score

In [22]:
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy

def classification_score(model, ohe, scaler):
    
    Categorical Cross Entropy Score function
    
        Parameters
        ----------
        model : sklearn-like class of the classification model.
                It requires a predict method.
        ohe : One Hot Encoder model for label encoding 
        scaler : Normalisation function
  
    # Load data
    x_test = np.load('X_test.npy')
    y_test = np.load('Y_test.npy')
    
    # Label Encoder
    y_test = ohe.transform(y_test).toarray()
    
    # Samples Normalisation
    x_test = scaler.transform(x_test)
    
    # Predict
    y_pred = model.predict(x_test)
    
    # Score
    cce = CategoricalCrossentropy()
    ac = CategoricalAccuracy()
    
    loss = cce(y_test, y_pred).numpy()
    acc = ac(y_test, y_pred).numpy()
    
    print('Loss: %.3f, Accuracy: %.3f' % (loss, acc))
    return (loss, acc)

(loss, acc) = classification_score(model, ohe, scaler)


"\nfrom tensorflow.keras.losses import CategoricalCrossentropy\nfrom tensorflow.keras.metrics import CategoricalAccuracy\n\ndef classification_score(model, ohe, scaler):\n    \n    Categorical Cross Entropy Score function\n    \n        Parameters\n        ----------\n        model : sklearn-like class of the classification model.\n                It requires a predict method.\n        ohe : One Hot Encoder model for label encoding \n        scaler : Normalisation function\n  \n    # Load data\n    x_test = np.load('X_test.npy')\n    y_test = np.load('Y_test.npy')\n    \n    # Label Encoder\n    y_test = ohe.transform(y_test).toarray()\n    \n    # Samples Normalisation\n    x_test = scaler.transform(x_test)\n    \n    # Predict\n    y_pred = model.predict(x_test)\n    \n    # Score\n    cce = CategoricalCrossentropy()\n    ac = CategoricalAccuracy()\n    \n    loss = cce(y_test, y_pred).numpy()\n    acc = ac(y_test, y_pred).numpy()\n    \n    print('Loss: %.3f, Accuracy: %.3f' % (loss