# 1. Imports

In [1]:
from sklearn.decomposition import PCA
import matplotlib as mpl 
import numpy as np
import os
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import f1_score
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import StandardScaler


from codes.utils import stratified_train_test_group_kfold
from codes.utils import model_test_classification

from codes.classification_codes import svc_gridsearch



0.10.1


# 2. Data

In [2]:

with open("Data/MoS2_Analysis_Cropped_trained", "rb") as fp:   # Unpickling
    MoS2_Proj1_Class_Data = pickle.load(fp)

#features = MoS2_ImageNet_100_data[0]
df = pd.DataFrame(MoS2_Proj1_Class_Data)
#T_dict= {900:0, 950:1, 1000:2}
T_target = np.array([item for item in df['T']])

features = np.array(list(df['ImageNet']))
sampleId = [item for item in df['sampleId']]

X = features
Y = np.array(T_target)
groups = np.array(sampleId)


train_val_groups, train_val_X, train_val_Y, test_X, test_Y = stratified_train_test_group_kfold(X, Y, groups, n_splits=10, test_fold=0)

scaler = StandardScaler().fit(X)
X = scaler.transform(X)
#val_X = scaler.transform(val_X)

Y = Y.flatten()



print(train_X.shape)
print(train_Y.shape)
print(val_X.shape)
print(val_Y.shape)


# 3. Runing Cross-Validation

In [6]:
Ce = [1.0, 0.8, 0.2]
Kernel= ['poly']#, 'rbf', 'sigmoid']#, 'precomputed']
Degree = [i for i in range(2, 8)]
Gamma = ['scale', 'auto', 0.1, 0.2, 0.3, 0.34]# or float, default=’scale’ 0.34 is the best
Coef0 = [0.8, 0.2, 0.05, 0.0]

Max_iter = [90, 100, 120, -1]
variables = [Ce, Kernel, Degree, Gamma, Coef0, Max_iter]


def cross_10_folds_svc(X, Y):
    
    best_train = []
    best_val = []
    best_variables = []
    for fold in range(10):
        
        model_path = f'classification/ImageNet/aug1/SVC_{fold}_model.sav'
        group, train_X, train_Y, val_X, val_Y = stratified_train_test_group_kfold(X, Y, groups, n_splits=10, test_fold=fold)
         
        
        #train_Y = [item[0] for item in train_Y]
        #val_Y = [item[0] for item in val_Y]
        
        variables_best, performance_best = svc_gridsearch(train_X, train_Y, val_X, val_Y, Ce, Kernel, Degree, Gamma, Coef0, Max_iter, fold, model_path)
        best_train.append(performance_best['accuracy_train'])
        best_val.append(performance_best['accuracy_val'])
        best_variables.append(variables_best)
        print(f'fold: {fold} done!')
    return best_train, best_val, best_variables


best_train, best_val, best_variables = cross_10_folds_svc(X, Y)
print(best_train)
print(best_val)
print(f'train_acc_mean: {np.mean(best_train) :.3f}, std: {np.std(best_train) :.3f}, val_acc_mean: {np.mean(best_val):.3f}, std: {np.std(best_val):.3f}')

<class 'generator'>
best_train_acc: 0.8208, best_val_acc: 0.35135135135135137




best_train_acc: 0.8176, best_val_acc: 0.43243243243243246




best_train_acc: 0.9728, best_val_acc: 0.44594594594594594




best_train_acc: 0.7248, best_val_acc: 0.4594594594594595




best_train_acc: 0.6112, best_val_acc: 0.47297297297297297




best_train_acc: 0.944, best_val_acc: 0.5135135135135135




fold: 0 done!
<class 'generator'>
best_train_acc: 0.8048, best_val_acc: 0.25675675675675674
best_train_acc: 0.9152, best_val_acc: 0.33783783783783783




best_train_acc: 0.7136, best_val_acc: 0.36486486486486486




best_train_acc: 0.7264, best_val_acc: 0.3918918918918919




best_train_acc: 0.7568, best_val_acc: 0.43243243243243246




best_train_acc: 0.5808, best_val_acc: 0.5




fold: 1 done!
<class 'generator'>
best_train_acc: 0.7942583732057417, best_val_acc: 0.25
best_train_acc: 0.8437001594896332, best_val_acc: 0.2916666666666667
best_train_acc: 0.9154704944178629, best_val_acc: 0.3055555555555556




best_train_acc: 0.9553429027113237, best_val_acc: 0.3333333333333333
best_train_acc: 0.69377990430622, best_val_acc: 0.4166666666666667




best_train_acc: 0.9696969696969697, best_val_acc: 0.4583333333333333
best_train_acc: 0.9808612440191388, best_val_acc: 0.4861111111111111




fold: 2 done!
<class 'generator'>
best_train_acc: 0.8389154704944178, best_val_acc: 0.2916666666666667




best_train_acc: 0.9649122807017544, best_val_acc: 0.3055555555555556
best_train_acc: 0.7001594896331739, best_val_acc: 0.3611111111111111
best_train_acc: 0.7304625199362041, best_val_acc: 0.3888888888888889
best_train_acc: 0.7575757575757576, best_val_acc: 0.4722222222222222




best_train_acc: 0.6602870813397129, best_val_acc: 0.5277777777777778
best_train_acc: 0.7192982456140351, best_val_acc: 0.5833333333333334




best_train_acc: 0.7017543859649122, best_val_acc: 0.5972222222222222




fold: 3 done!
<class 'generator'>
best_train_acc: 0.7703349282296651, best_val_acc: 0.3333333333333333




best_train_acc: 0.7623604465709729, best_val_acc: 0.3611111111111111




best_train_acc: 0.9665071770334929, best_val_acc: 0.4166666666666667
best_train_acc: 0.9696969696969697, best_val_acc: 0.4722222222222222




fold: 4 done!
<class 'generator'>
best_train_acc: 0.8724082934609251, best_val_acc: 0.3194444444444444
best_train_acc: 0.8851674641148325, best_val_acc: 0.375




best_train_acc: 0.6682615629984051, best_val_acc: 0.5




best_train_acc: 0.7878787878787878, best_val_acc: 0.5555555555555556




best_train_acc: 0.9569377990430622, best_val_acc: 0.5833333333333334




fold: 5 done!
<class 'generator'>
best_train_acc: 0.8388625592417062, best_val_acc: 0.42424242424242425




best_train_acc: 0.8815165876777251, best_val_acc: 0.48484848484848486
best_train_acc: 0.8325434439178515, best_val_acc: 0.5303030303030303




best_train_acc: 0.9447077409162717, best_val_acc: 0.5454545454545454




fold: 6 done!
<class 'generator'>
best_train_acc: 0.8357030015797788, best_val_acc: 0.24242424242424243




best_train_acc: 0.8609794628751974, best_val_acc: 0.2878787878787879
best_train_acc: 0.8135860979462876, best_val_acc: 0.30303030303030304
best_train_acc: 0.7914691943127962, best_val_acc: 0.3333333333333333
best_train_acc: 0.8041074249605056, best_val_acc: 0.3484848484848485




best_train_acc: 0.7693522906793049, best_val_acc: 0.3787878787878788
best_train_acc: 0.8246445497630331, best_val_acc: 0.45454545454545453




best_train_acc: 0.65086887835703, best_val_acc: 0.4696969696969697
best_train_acc: 0.9605055292259084, best_val_acc: 0.5




best_train_acc: 1.0, best_val_acc: 0.6363636363636364




fold: 7 done!
<class 'generator'>
best_train_acc: 0.8104265402843602, best_val_acc: 0.21212121212121213
best_train_acc: 0.8293838862559242, best_val_acc: 0.30303030303030304




best_train_acc: 0.9557661927330173, best_val_acc: 0.3181818181818182
best_train_acc: 0.7298578199052133, best_val_acc: 0.36363636363636365
best_train_acc: 0.943127962085308, best_val_acc: 0.3939393939393939




best_train_acc: 0.9873617693522907, best_val_acc: 0.4090909090909091




best_train_acc: 0.9715639810426541, best_val_acc: 0.42424242424242425




fold: 8 done!
<class 'generator'>
best_train_acc: 0.8233438485804416, best_val_acc: 0.49230769230769234
best_train_acc: 0.8438485804416404, best_val_acc: 0.5230769230769231




best_train_acc: 0.9526813880126183, best_val_acc: 0.5538461538461539
best_train_acc: 0.943217665615142, best_val_acc: 0.5692307692307692




best_train_acc: 0.944794952681388, best_val_acc: 0.6153846153846154




fold: 9 done!
[0.944, 0.5808, 0.9808612440191388, 0.7017543859649122, 0.9696969696969697, 0.9569377990430622, 0.9447077409162717, 1.0, 0.9715639810426541, 0.944794952681388]
[0.5135135135135135, 0.5, 0.4861111111111111, 0.5972222222222222, 0.4722222222222222, 0.5833333333333334, 0.5454545454545454, 0.6363636363636364, 0.42424242424242425, 0.6153846153846154]
train_acc_mean: 0.900, std: 0.133, val_acc_mean: 0.537, std: 0.066


In [7]:
import numpy as np
from sklearn.metrics import f1_score


def f1score_fn(X, Y):

    F1_score_train = []
    F1_score_val = []
    for fold in range(10):
        
        group, train_X, train_Y, val_X, val_Y = stratified_train_test_group_kfold(X, Y, groups, n_splits=10, test_fold=fold)
        #train_X, train_Y = oversample.fit_resample(train_X, train_Y)
        model_path = f'classification/ImageNet/aug1/SVC_{fold}_model.sav'
        
        PATH = os.path.join('Model', model_path)
        
        loaded_model = pickle.load(open(PATH, 'rb'))

        pred_val_Y = loaded_model.predict(val_X)
        pred_train_Y = loaded_model.predict(train_X)

        f_score_train = f1_score(train_Y, pred_train_Y, average='macro')#'weighted')
        f_score_val = f1_score(val_Y, pred_val_Y, average='macro')#'weighted')

        F1_score_train.append(f_score_train)
        F1_score_val.append(f_score_val)
    
    return F1_score_train, F1_score_val



F1_score_train, F1_score_val = f1score_fn(X, Y)


print(f'f1_train: {np.mean(F1_score_train)}, std: {np.std(F1_score_train)}, f1_val: {np.mean(F1_score_val)}, std: {np.std(F1_score_val)}')


<class 'generator'>
<class 'generator'>
<class 'generator'>
<class 'generator'>
<class 'generator'>
<class 'generator'>
<class 'generator'>
<class 'generator'>
<class 'generator'>
<class 'generator'>
f1_train: 0.8845822769556664, std: 0.1642292528108883, f1_val: 0.5034534017450101, std: 0.09598041240059832
