# 1. Imports

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import os
import numpy as np
from torchvision import datasets, models, transforms

from torchvision.models import resnet18, ResNet18_Weights
import torch.optim as optim


import random
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split


from sklearn.model_selection import GroupKFold
from sklearn.model_selection import StratifiedGroupKFold
from imblearn.over_sampling import RandomOverSampler



from codes.utils import stratified_train_test_group_kfold
from codes.utils import model_test_classification
from codes.utils import accuracy_classification
from codes.utils import mlp_nnrank_cross_val_final_test

from codes.regression_codes import mlp_nnrank_gridsearch


torch.backends.cudnn.deterministic = True
random.seed(1)
torch.manual_seed(1)
torch.cuda.manual_seed(1)
np.random.seed(1)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

# 2. Data

In [2]:
with open("Data/MoS2_Analysis_Data_trained2", "rb") as fp:   # Unpickling
    MoS2_Proj1_Class_Data = pickle.load(fp)

#features = MoS2_ImageNet_100_data[0]
df = pd.DataFrame(MoS2_Proj1_Class_Data)
print(df.keys())
T_dict= {900:0, 950:1, 1000:2}
T_target = np.array([T_dict[item] for item in df['T']])

features = np.array(list(df['ImageNet']))
sampleId = np.array(list(df['sampleId']))

X = features
Y = np.array(T_target)
#Y = Y.reshape(-1, 1)
groups = np.array(sampleId)

train_val_groups, train_val_X, train_val_Y, test_X, test_Y = stratified_train_test_group_kfold(X, Y, groups, n_splits=10, test_fold=0)


#Y = Y.flatten()

oversample = RandomOverSampler(sampling_strategy='not majority')

print(train_val_X.shape)
print(train_val_Y.shape)
print(test_X.shape)
print(test_Y.shape)






Index(['sampleId', 'sampleLabel', 'image', 'T', 'ImageNet', 'MicroNet'], dtype='object')
<class 'generator'>
(235, 100)
(235,)
(27, 100)
(27,)


In [3]:
print(Y[0])

2


# 3. Runing Cross-Validation

In [4]:
#!mkdir -p 'Models/nnrank/ImageNet/aug3/MLP'

In [5]:
# the paramters below were optimized
#this run is only for the production model, only the optimized hyperparameters
                        
Learning_rate = [0.01]
Drop_out = [0.35]#[0.0, 0.2, 0.35, 0.5]
Batch_size = [32]#[16, 32, 100]
L1 = [600]#[600, 400, 300, 200]
L2 = [150]# [150, 100, 50]

def cross_10_folds_mlp(train_val_X, train_val_Y):

    best_train = []
    best_val = []
    best_variables = []
    best_performance_record = []
    for fold in range(10):
        
        model_path = f'nnrank/ImageNet/aug3/MLP/{fold}_model.pth'
        group, train_X, train_Y, val_X, val_Y = stratified_train_test_group_kfold(train_val_X, train_val_Y, train_val_groups, n_splits=10, test_fold=fold)
        train_X, train_Y = oversample.fit_resample(train_X, train_Y)
        
        #train_X, val_X, train_Y, val_Y = train_test_split(train_val_X, train_val_Y, test_size=0.1,stratify=train_val_Y,random_state=fold+30)#42
        best_train_acc, best_val_acc, best_record, hyper=mlp_nnrank_gridsearch(train_X, train_Y, val_X, val_Y,Learning_rate, L1, L2, Drop_out,Batch_size, fold, model_path)
        #best_train_acc, best_val_acc, trainf1, valf1, best_perf_record, hyper=cnn_class_train_fn(train_X, train_Y, val_X, val_Y, Learning_rate, Batch_size, fold)

        best_train.append(best_train_acc)
        best_val.append(best_val_acc)
        best_variables.append(hyper)
        best_performance_record.append(best_record)

        print(f'fold: {fold} done!')
    return best_train, best_val, best_variables, best_performance_record


best_train, best_val, best_variables, best_performance_record = cross_10_folds_mlp(train_val_X, train_val_Y)



<class 'generator'>
Epoch1: loss: 16.0200 val_loss: 10.8593
Epoch2: loss: 9.4872 val_loss: 5.5211
Epoch3: loss: 8.3409 val_loss: 5.5562
Epoch4: loss: 6.9869 val_loss: 4.9315
Epoch5: loss: 5.9337 val_loss: 4.5624
Epoch6: loss: 5.5023 val_loss: 5.0979
Epoch7: loss: 4.8424 val_loss: 4.6525
Epoch8: loss: 3.9628 val_loss: 5.4996
Epoch9: loss: 3.6023 val_loss: 4.7171
Epoch10: loss: 3.2741 val_loss: 4.8002
Epoch11: loss: 2.5940 val_loss: 6.1741
Epoch12: loss: 2.8674 val_loss: 4.3993
Epoch13: loss: 1.8128 val_loss: 5.8599
Epoch14: loss: 2.6041 val_loss: 5.6071
Epoch15: loss: 1.8053 val_loss: 5.5615
Epoch16: loss: 1.1959 val_loss: 6.8277
Epoch17: loss: 1.4660 val_loss: 4.2645
Epoch18: loss: 1.5836 val_loss: 5.3498
Epoch19: loss: 1.1581 val_loss: 5.5801
Epoch20: loss: 1.0777 val_loss: 5.7946
Epoch21: loss: 0.8902 val_loss: 5.6260
Epoch22: loss: 1.0068 val_loss: 5.8403
Epoch23: loss: 1.3318 val_loss: 5.1780
Epoch24: loss: 0.9607 val_loss: 5.4712
Epoch25: loss: 1.3151 val_loss: 5.1758
Epoch26: los

# 4. Test Models

In [7]:
class MLP(nn.Module):
    def __init__(self, l1=120, l2=84, p = 0.2):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(100, l1)
        self.fc2 = nn.Linear(l1, l2)
        self.fc3 = nn.Linear(l2, 3)
        self.activ = torch.nn.Sigmoid()
        self.dropout = nn.Dropout(p)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return self.activ(x)

l1, l2, p = 600, 150, 0.35
trained_model = model = MLP(l1, l2, p).to(device)

In [8]:

#trained_model = pretrained_model()
root_path = f'nnrank/ImageNet/aug3/MLP'

best_test, confusion_matrix_test = mlp_nnrank_cross_val_final_test(trained_model, test_X, test_Y, 'test', root_path)


print(best_test)
print(f'test_acc_mean: {np.mean(best_test) :.3f}, std: {np.std(best_test) :.3f}')
print("......")

def cm_to_plot(best_test):

    max_test = np.max(best_test)
    idx_cm = []
    for index, item in enumerate(best_test):
        if item == max_test:
            idx_cm.append(index)

    print('idx_cm: ', idx_cm)
    print('max_test_acc: ', max_test)

    print(confusion_matrix_test[idx_cm[0]])
cm_to_plot(best_test)
print('5th confusion matrix: ', [list(item) for item in confusion_matrix_test[4]])


cm_list = []
for cm in confusion_matrix_test:
    cm_list.append([list(item) for item in cm])
#print(cm_list)

print(len(cm_list))
print('avg cm: ', [list(item) for item in np.mean(cm_list, axis=0)])


Accuracy of the network on the 27 test images: 70.4 %
Accuracy of the network on the 27 test images: 59.3 %
Accuracy of the network on the 27 test images: 70.4 %
Accuracy of the network on the 27 test images: 77.8 %
Accuracy of the network on the 27 test images: 74.1 %
Accuracy of the network on the 27 test images: 77.8 %
Accuracy of the network on the 27 test images: 77.8 %
Accuracy of the network on the 27 test images: 77.8 %
Accuracy of the network on the 27 test images: 59.3 %
Accuracy of the network on the 27 test images: 66.7 %
[70.37037037037037, 59.25925925925926, 70.37037037037037, 77.77777777777777, 74.07407407407408, 77.77777777777777, 77.77777777777777, 77.77777777777777, 59.25925925925926, 66.66666666666667]
test_acc_mean: 71.111, std: 6.988
......
idx_cm:  [3, 5, 6, 7]
max_test_acc:  77.77777777777777
[[ 1  2  0]
 [ 0 10  3]
 [ 0  1 10]]
5th confusion matrix:  [[1, 2, 0], [2, 8, 3], [0, 0, 11]]
10
avg cm:  [[1.0, 1.9, 0.1], [1.6, 8.9, 2.5], [0.0, 1.7, 9.3]]
