In [11]:
import torch
import torch.nn as nn

import numpy as np
import matplotlib.pyplot as plt

from sklearn import datasets
from tqdm.notebook import tqdm

#from sklearn.metrics import confusion_matrix
import sklearn.metrics as sk_m
from sklearn.metrics import plot_confusion_matrix
import pandas as pd
import seaborn as sn

from IPython import embed

import os
import pickle
import shutil

In [2]:
path_save = "/Users/andyvarner/Documents/CI_Fall_2022/CI_Projects/NN Project/data"
d_filename = os.path.join(path_save, "datasets.pkl")
all_datasets = pickle.load(open(d_filename, "rb"))

f_filename = os.path.join(path_save, "folds.pkl")
all_folds = pickle.load(open(f_filename, "rb"))

In [3]:
class Dataset(torch.utils.data.Dataset):
    
    def __init__(self, samples, labels):
        
        super(Dataset, self).__init__()
        
        self.create_dataset(samples, labels)
        
    def create_dataset(self, all_samples, all_labels):
        
        self.dataset = []
        
        for sample, label in zip(all_samples, all_labels):
            self.dataset.append((torch.tensor(sample).float(), torch.tensor(label).long()))

    def __getitem__(self, index):
                                        
        return self.dataset[index]                            
        
    def __len__(self):
        
        return len(self.dataset)

In [37]:
def train(fold, model, batch_size, title, num_classes, num_epochs = 200, rate = 10):
    
    # Make datasets pytorchy
    
    all_train_samples, all_train_labels = fold["train"]
    all_test_samples, all_test_labels = fold["test"]
    
    train_dataset = Dataset(all_train_samples, all_train_labels)   
    valid_dataset = Dataset(all_test_samples, all_test_labels) 
    
    train_dataset = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
    valid_dataset = torch.utils.data.DataLoader(valid_dataset, batch_size = 1, shuffle = False)
    
    # Init optimzizer

    model.init_optimizer()

    # Train and validate network

    training_loss, training_metrics = [], []

    for epoch in tqdm(range(num_epochs), desc = "Training " + title):

        # Train network

        epoch_loss = 0
        for i, (sample, label) in enumerate(train_dataset):

            preds = model(sample)

            loss = model.objective(preds, label)

            epoch_loss = epoch_loss + loss.item()

            model.optimizer.zero_grad()
            loss.backward()
            model.optimizer.step()

        epoch_loss = epoch_loss / (i + 1)

        training_loss.append(epoch_loss)

        # Validate network
        
        if(epoch % rate == 0):
            
            model.eval()

            all_labels, all_preds = [], []
            for i, (sample, label) in enumerate(valid_dataset):

                pred = model(sample)
                pred = torch.argmax(pred)

                all_preds.append(pred.detach().numpy())
                all_labels.append(label.numpy())
                
            # get metrics
            training_metrics = sk_m.confusion_matrix(all_labels, all_preds)
            
            #epoch_accuracy = calculate_accuracy(np.asarray(all_preds), np.asarray(all_labels))

            #training_metrics.append(cf_matrix)
            
            model.train()
            
    return training_loss, training_metrics

In [38]:
class MLP(torch.nn.Module):
    
    def __init__(self, learning_rate, num_features, num_classes, num_neurons, loss_choice, activation_choice, optimizer_choice):
    
        super(MLP, self).__init__()
    
        self.alpha = learning_rate
        self.num_features = num_features
        self.num_classes = num_classes
        self.num_neurons = num_neurons
        self.loss_choice = loss_choice
        self.activation_choice = activation_choice
        self.optimizer_choice = optimizer_choice
        
        if(self.activation_choice.lower() == "tanh"):
            self.network = torch.nn.Sequential(torch.nn.Linear(num_features, num_neurons),
                                               torch.nn.Tanh(),
                                               torch.nn.Linear(num_neurons, 64),
                                               torch.nn.Tanh(),
                                               torch.nn.Linear(64, num_classes))
        if(self.activation_choice.lower() == "sigmoid"):
            self.network = torch.nn.Sequential(torch.nn.Linear(num_features, num_neurons),
                                               torch.nn.Sigmoid(),
                                               torch.nn.Linear(num_neurons, 64),
                                               torch.nn.Sigmoid(),
                                               torch.nn.Linear(64, num_classes))
            
        if(self.activation_choice.lower() == "relu"):
            self.network = torch.nn.Sequential(torch.nn.Linear(num_features, num_neurons),
                                               torch.nn.ReLU(),
                                               torch.nn.Linear(num_neurons, 64),
                                               torch.nn.ReLU(),
                                               torch.nn.Linear(64, num_classes))
        
        
    def init_optimizer(self):
        
        if(optimizer_choice.lower() == "sgd"):
            self.optimizer = torch.optim.SGD(self.parameters(), lr = self.alpha)
        if(optimizer_choice.lower() == "adam"):
            self.optimizer = torch.optim.Adam(self.parameters(), lr = self.alpha)

    def objective(self, preds, labels):
    
        if(self.loss_choice.lower() == "crossentropyloss"):    
            obj = torch.nn.CrossEntropyLoss()
        if(self.loss_choice.lower() == "mse"):
            obj = torch.nn.MSELoss()

        return obj(preds, labels)

    def forward(self, x):

        return self.network(x)
    

In [39]:
def create_folder(path):
    
    if(os.path.exists(path)):
        shutil.rmtree(path)
        
    os.makedirs(path)

In [40]:
loss_choice = "CrossEntropyLoss"
activation_choice = "ReLu"
optimizer_choice = "SGD"
num_neurons = 128

batch_size = 16
num_features = 2

alpha = 1e-3

exp_name = "Experiment_test"
path_save = "/Users/andyvarner/Documents/CI_Fall_2022/CI_Projects/NN Project/results"

path = os.path.join(path_save, exp_name)
create_folder(path)     

for i, fold_set in enumerate(all_folds):
    
    all_keys = fold_set.keys()
    
    for j, current_key in enumerate(all_keys):
        
        title = "task_%s_fold_%s" % (i, j)
        
        fold = fold_set[current_key]
        
        _, all_train_labels = fold["train"]
        num_classes = len(np.unique(all_train_labels))
        model = MLP(alpha, num_features, num_classes, num_neurons, loss_choice, activation_choice, optimizer_choice)
        train_results, valid_results = train(fold, model, batch_size, title, num_classes)

        results = {}
        results["train"] = train_results
        results["test"] = valid_results
        
        #dump train_results
        
        path_save = os.path.join(path, "task_%s" % (str(i).zfill(3)))
        
        if(not(os.path.exists(path_save))):
            create_folder(path_save)
        
        title = "fold_%s.pkl" % (str(j).zfill(3))
        
        filename = os.path.join(path_save, title)
        
        pickle.dump(results, open(filename, "wb"))
        
        #dump test_results
    
        

Training task_0_fold_0:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_0_fold_1:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_0_fold_2:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_0_fold_3:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_0_fold_4:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_1_fold_0:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_1_fold_1:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_1_fold_2:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_1_fold_3:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_1_fold_4:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_2_fold_0:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_2_fold_1:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_2_fold_2:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_2_fold_3:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_2_fold_4:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_3_fold_0:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_3_fold_1:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_3_fold_2:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_3_fold_3:   0%|          | 0/200 [00:00<?, ?it/s]

Training task_3_fold_4:   0%|          | 0/200 [00:00<?, ?it/s]