# 1. Imports

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import os
import numpy as np
from torchvision import datasets, models, transforms

from torchvision.models import resnet18, ResNet18_Weights
import torch.optim as optim


import random
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split


from sklearn.model_selection import GroupKFold
from sklearn.model_selection import StratifiedGroupKFold


from codes.utils import stratified_train_test_group_kfold
from codes.utils import model_test_classification
from codes.utils import accuracy_nnrank
from codes.utils import cnn_nnrank_cross_val_final_test

from codes.regression_codes import cnn_nnrank_gridsearch


torch.backends.cudnn.deterministic = True
random.seed(1)
torch.manual_seed(1)
torch.cuda.manual_seed(1)
np.random.seed(1)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

# 2. Data

In [2]:
with open("Data/MoS2_Analysis_Processed_Data2", "rb") as fp:   # Unpickling
  MoS2_data = pickle.load(fp)

df = pd.DataFrame(MoS2_data)
T = df['T']

T_classes = {900.0: 0, 950.0: 1, 1000.0: 2}
#print(MoS2_activities_substrate2['T'])
T_target = [T_classes[T[index]] for index, item in enumerate(T)]
sampleId = df['sampleId']
data_image = np.array([np.array(item) for item in df['image']])

#T_target = np.array(T_target).reshape(len(T_target),1)
print(len(T_target))

Data_CNN = data_image
print(Data_CNN.shape)
Data_CNN_rgb = np.repeat(Data_CNN[..., np.newaxis], 3, -1)
print(Data_CNN_rgb.shape)
Data_CNN_rgb = Data_CNN_rgb.transpose(0, 3, 1, 2)/255
print(Data_CNN_rgb.shape)



X = Data_CNN_rgb
Y = np.array(T_target)
groups = np.array(sampleId)


train_val_groups, train_val_X, train_val_Y, test_X, test_Y = stratified_train_test_group_kfold(X, Y, groups, n_splits=10, test_fold=0)


#Y = Y.flatten()



print(train_val_X.shape)
print(train_val_Y.shape)
print(test_X.shape)
print(test_Y.shape)


def pretrained_model(drop_out):
    
    model = models.resnet18(weights=ResNet18_Weights.DEFAULT)

    model.fc = nn.Sequential(nn.ReLU(),
                                 nn.Dropout(p=drop_out),
                                    nn.Linear(512, 100), #150, 50176
                                     nn.ReLU(),
                                     nn.Dropout(p=drop_out),
                                     nn.Linear(100, 3),
                                     nn.Sigmoid()
                                     )
    model.to(device)
    return model

262
(262, 224, 224)
(262, 224, 224, 3)
(262, 3, 224, 224)
<class 'generator'>
(235, 3, 224, 224)
(235,)
(27, 3, 224, 224)
(27,)


In [3]:
!mkdir -p 'Models/nnrank/ImageNet/aug3/CNN/'

# 3. Runing Cross-Validation

In [4]:
# the paramters below were optimized
# only the optimum parameters for the production model are used here

Learning_rate = [5e-5] #[1e-2, 5e-3, 1e-3, 5e-4, 1e-4, 1e-5, 5e-6] 
Drop_out = [0.2]#[0.2, 0.35, 0.5, 0.65, 0.75]
Batch_size = [100] #[24, 32, 100]

def cross_10_folds_cnn(train_val_X, train_val_Y):

    best_train = []
    best_val = []
 
    best_variables = []
    best_performance_record = []
    for fold in range(10):
        
        model_path = f'nnrank/ImageNet/aug3/CNN/{fold}_model.pth'
        group, train_X, train_Y, val_X, val_Y = stratified_train_test_group_kfold(train_val_X, train_val_Y, train_val_groups, n_splits=10, test_fold=fold)
        #train_X, val_X, train_Y, val_Y = train_test_split(train_val_X, train_val_Y, test_size=0.1,stratify=train_val_Y,random_state=fold+30)#42
        best_train_acc, best_val_acc, best_record, hyper=cnn_nnrank_gridsearch(train_X, train_Y, val_X, val_Y, Learning_rate,Drop_out, Batch_size, fold, model_path)#, epochs=34)
    
        best_train.append(best_train_acc)
        best_val.append(best_val_acc)

        best_variables.append(hyper)
        best_performance_record.append(best_record)

        print(f'fold: {fold} done!')
    return best_train, best_val, best_variables, best_performance_record


best_train, best_val, best_variables, best_performance_record = cross_10_folds_cnn(train_val_X, train_val_Y)



<class 'generator'>
Epoch1: loss: 49.9988 val_loss: 16.3455
Epoch2: loss: 44.5546 val_loss: 14.6204
Epoch3: loss: 40.0436 val_loss: 14.0150
Epoch4: loss: 36.6752 val_loss: 12.2559
Epoch5: loss: 32.9435 val_loss: 12.3110
Epoch6: loss: 31.6957 val_loss: 11.5994
Epoch7: loss: 27.0584 val_loss: 10.0417
Epoch8: loss: 24.7065 val_loss: 9.8446
Epoch9: loss: 23.5530 val_loss: 8.7414
Epoch10: loss: 21.2243 val_loss: 8.3128
Epoch11: loss: 20.8283 val_loss: 8.0188
Epoch12: loss: 17.5389 val_loss: 7.6006
Epoch13: loss: 18.6593 val_loss: 8.1850
Epoch14: loss: 16.2862 val_loss: 7.1365
Epoch15: loss: 16.2115 val_loss: 6.8154
Epoch16: loss: 14.0702 val_loss: 6.6374
Epoch17: loss: 12.1854 val_loss: 7.1589
Epoch18: loss: 13.8398 val_loss: 6.9241
Epoch19: loss: 13.0860 val_loss: 6.9984
Epoch20: loss: 13.0610 val_loss: 7.6753
Epoch21: loss: 11.2394 val_loss: 7.4670
Epoch22: loss: 9.9377 val_loss: 7.7166
Epoch23: loss: 10.1676 val_loss: 6.5994
Epoch24: loss: 9.9376 val_loss: 6.8313
Epoch25: loss: 9.8013 va

# 4. Test Models

In [5]:

trained_model = pretrained_model(0.2)
root_path = f'nnrank/ImageNet/aug3/CNN'

best_test, confusion_matrix_test = cnn_nnrank_cross_val_final_test(trained_model, test_X, test_Y, 'test', root_path)


print(best_test)
print(f'test_acc_mean: {np.mean(best_test) :.3f}, std: {np.std(best_test) :.3f}')
print("......")

def cm_to_plot(best_test):

    max_test = np.max(best_test)
    idx_cm = []
    for index, item in enumerate(best_test):
        if item == max_test:
            idx_cm.append(index)

    print('idx_cm: ', idx_cm)
    print('max_test_acc: ', max_test)

    print(confusion_matrix_test[idx_cm[0]])
cm_to_plot(best_test)
print('5th confusion matrix: ', [list(item) for item in confusion_matrix_test[4]])


cm_list = []
for cm in confusion_matrix_test:
    cm_list.append([list(item) for item in cm])
#print(cm_list)

print(len(cm_list))
print('avg cm: ', [list(item) for item in np.mean(cm_list, axis=0)])


Accuracy of the network on the 27 test images: 63.0 %
Accuracy of the network on the 27 test images: 59.3 %
Accuracy of the network on the 27 test images: 74.1 %
Accuracy of the network on the 27 test images: 55.6 %
Accuracy of the network on the 27 test images: 63.0 %
Accuracy of the network on the 27 test images: 66.7 %
Accuracy of the network on the 27 test images: 77.8 %
Accuracy of the network on the 27 test images: 70.4 %
Accuracy of the network on the 27 test images: 74.1 %
Accuracy of the network on the 27 test images: 74.1 %
[62.96296296296296, 59.25925925925926, 74.07407407407408, 55.55555555555556, 62.96296296296296, 66.66666666666667, 77.77777777777777, 70.37037037037037, 74.07407407407408, 74.07407407407408]
test_acc_mean: 67.778, std: 7.037
......
idx_cm:  [6]
max_test_acc:  77.77777777777777
[[ 2  1  0]
 [ 1 10  2]
 [ 0  2  9]]
5th confusion matrix:  [[1, 2, 0], [3, 7, 3], [1, 1, 9]]
10
avg cm:  [[1.3, 1.7, 0.0], [2.3, 8.8, 1.9], [0.1, 2.7, 8.2]]


In [None]:
'Models/nnrank/ImageNet/aug3/CNN/'