# Experiment 02: Scattering + PCA + SVM






In [1]:
import sys
import random
sys.path.append('../src')
import warnings
warnings.filterwarnings("ignore") 

from utils.compute_metrics import get_metrics, get_majority_vote,log_test_metrics
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.model_selection import GroupKFold
from tqdm import tqdm
from pprint import pprint

from itertools import product
import pickle
import pandas as pd
import numpy as np
import mlflow
import matplotlib.pyplot as plt


## Feature Reduction/Selection

#### Upload Scattering Features

In [2]:
with open('../data/03_features/inception_dict_tensor_avg_interpolation_pooling.pickle', 'rb') as handle:
    features_dict = pickle.load(handle)
    df_features = features_dict ['features']
    interpolation = features_dict ['Interpolation']

  and should_run_async(code)


# Cross Validation using SVM Classification

> Methods that exclude outliers were used to normalize the features. Patient-specific leave-one-out cross-validation (LOOCV) was applied to evaluate the classification. In each case, the test set consisted of10 images from the same patient and the training set contained 540 images from the remaining 54 patients. For each training set, fivefold cross-validation and grid search were applied to indicate the optimal SVM classifier hyperparameters and the best kernel. To address the problem of class imbalance, the SVM hyperparameter C of each class was adjusted inversely proportional to that class frequency in the training set. Label 1 indicated the image containing a fatty liver and label −1 otherwise. 


In [3]:
# Set the parameters by cross-validation
param_gamma = [1e-3, 1e-4]
param_C = [1, 10, 1000] 
svm_class_weight = [None, 'balanced']
rbf_params = list(product(['kernel'],param_gamma, param_C, svm_class_weight ))
linear_params = list(product(['linear'],param_C, svm_class_weight))
params = rbf_params + linear_params

In [4]:
def train_valid(param, X_train,X_valid,y_train, y_valid):
    if param[0] == 'kernel': 
        #The “balanced” mode uses the values of y to automatically adjust weights inversely
        #proportional to class frequencies in the input data as n_samples / (n_classes * np.bincount(y)).
        model = SVC(gamma=param[1], C=param[2], class_weight= param[3])
    if param[0] == 'linear': 
        #The “balanced” mode uses the values of y to automatically adjust weights inversely
        #proportional to class frequencies in the input data as n_samples / (n_classes * np.bincount(y)).
        model = LinearSVC(C=param[1], class_weight= param[2])

    model.fit(X_train, y_train)
    predictions = model.predict(X_valid)
    acc, auc, specificity, sensitivity = get_metrics(y_valid, predictions)
    return acc, auc, specificity, sensitivity , predictions


In [5]:
def log_val_metrics(params, metrics, test_n_splits, pca_n_components = 5, standardize=True):
    # Important !!!! Put Correct Experiment Name
    mlflow.set_experiment('val_scattering_svm_pca_experiment')
    # log mlflow params
    for param in params:
        with mlflow.start_run():
            #log params
            mlflow.log_param('pca_n',pca_n_components)
            mlflow.log_param('model',f'svm: {param[0]}')
            mlflow.log_param('test K fold', test_n_splits)
            if param[0] == 'kernel':
                mlflow.log_param('gamma',param[1])
                mlflow.log_param('C',param[2])
                mlflow.log_param('class weight svm', param[3])
            if param[0] == 'linear': 
                mlflow.log_param('C',param[1])
                mlflow.log_param('class weight svm', param[2])
            #log metrics
            mlflow.log_metric('accuracy',np.array(metrics[str(param)]['acc']).mean())
            mlflow.log_metric('AUC',np.array(metrics[str(param)]['auc']).mean())
            mlflow.log_metric('specificity',np.array(metrics[str(param)]['specificity']).mean())
            mlflow.log_metric('sensitivity',np.array(metrics[str(param)]['sensitivity']).mean())
    print("Done logging validation params in MLFlow")

In [6]:
df = df_features
pca_n_components = 5
standardize = True
test_metrics={}  
#majority vote results
test_metrics_mv={} 
test_n_splits = 11
group_kfold_test = GroupKFold(n_splits=test_n_splits)
seed= 11
df_pid = df['id']
df_y = df['labels']
fold_c =1 

for train_index, test_index in group_kfold_test.split(df, 
                                                  df_y, 
                                                  df_pid):
    random.seed(seed)
    random.shuffle(train_index)
    X_train, X_test = df.iloc[train_index], df.iloc[test_index]
    y_train, y_test = df_y.iloc[train_index], df_y.iloc[test_index]
    
    X_test = X_test.drop(columns=['id', 'labels'])
    X_train_pid = X_train.pop('id')
    X_train = X_train.drop(columns=['labels'])
    
    # Do cross-validation for hyperparam tuning
    group_kfold_val = GroupKFold(n_splits=5)
    metrics={}
    #X_train_y = df.pop('class')
    for subtrain_index, valid_index in group_kfold_val.split(X_train, 
                                                      y_train, 
                                                      X_train_pid):
                                   
        X_subtrain, X_valid = X_train.iloc[subtrain_index], X_train.iloc[valid_index]
        y_subtrain, y_valid = y_train.iloc[subtrain_index], y_train.iloc[valid_index]


        
        pca = PCA(n_components=pca_n_components,random_state = seed)           
        X_subtrain = pca.fit_transform(X_subtrain)
        X_valid = pca.transform(X_valid)
        
        #standardize
        if standardize:
            scaler = StandardScaler()
            X_subtrain = scaler.fit_transform(X_subtrain)
            X_valid = scaler.transform(X_valid)
        

        for param in tqdm(params):
            if str(param) not in metrics.keys() :
                metrics[str(param)] ={'acc':[], 'auc':[], 'sensitivity':[], 'specificity':[]}
                                   
            acc, auc, specificity, sensitivity,_ = train_valid(param, X_subtrain,X_valid,y_subtrain, y_valid)
            metrics[str(param)]['auc'].append(auc)
            metrics[str(param)]['acc'].append(acc)
            metrics[str(param)]['sensitivity'].append(sensitivity)
            metrics[str(param)]['specificity'].append(specificity)
    #log validation metrics for all combination of params
    log_val_metrics(params, metrics, test_n_splits, pca_n_components, standardize)
    
    #highest accuracy
    index_param_max = np.array([np.array(metrics[str(param)]['auc']).mean() for param in params]).argmax()
    print('From all the combinations, the highest accuracy was achieved with', params[index_param_max])
    #train and test with max param

    #standardize
    if standardize:
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
    
    pca = PCA(n_components=pca_n_components)           
    X_train = pca.fit_transform(X_train)
    X_test = pca.transform(X_test)

    acc, auc, specificity, sensitivity, predictions = train_valid(params[index_param_max], X_train, X_test, y_train, y_test)
    
    #compute majority vote metrics
    acc_mv, auc_mv, specificity_mv, sensitivity_mv = get_majority_vote(y_test, predictions)
    
    print('FOLD '+ str(fold_c) + ':  acc ' + str(acc) +  ', auc ' +  str(auc) +  ', specificity '+ str(specificity)
          + ', sensitivity ' + str(sensitivity))
    print('FOLD '+ str(fold_c) + ':  MV acc ' + str(acc_mv) +  ', MV auc ' +  str(auc_mv) +  ', MV specificity '+ str(specificity_mv)
          + ', MV sensitivity ' + str(sensitivity_mv))
    
    test_metrics[fold_c]=  {'acc':acc, 'auc':auc, 'sensitivity':sensitivity, 'specificity':specificity, 'param':params[index_param_max]}
    test_metrics_mv[fold_c]=  {'acc':acc_mv, 'auc':auc_mv, 'sensitivity':sensitivity_mv, 'specificity':specificity_mv, 'param':params[index_param_max]}
    
    fold_c +=1 

log_test_metrics(test_metrics, test_metrics_mv, test_n_splits, 'AVG Pooling Inception features + PCA + SVM',None, seed, pca_n_components)


100%|██████████| 18/18 [00:00<00:00, 38.45it/s]
100%|██████████| 18/18 [00:00<00:00, 48.31it/s]
100%|██████████| 18/18 [00:00<00:00, 67.68it/s]
100%|██████████| 18/18 [00:00<00:00, 87.37it/s]
100%|██████████| 18/18 [00:00<00:00, 48.13it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 1, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 1:  acc 0.86, auc 0.8833333333333333, specificity 1.0, sensitivity 0.7666666666666667
FOLD 1:  MV acc 1.0, MV auc 1.0, MV specificity 1.0, MV sensitivity 1.0


100%|██████████| 18/18 [00:00<00:00, 79.60it/s]
100%|██████████| 18/18 [00:00<00:00, 80.27it/s]
100%|██████████| 18/18 [00:00<00:00, 75.26it/s]
100%|██████████| 18/18 [00:00<00:00, 101.55it/s]
100%|██████████| 18/18 [00:00<00:00, 80.54it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('linear', 1000, None)
FOLD 2:  acc 0.78, auc 0.8083333333333333, specificity 0.6666666666666666, sensitivity 0.95
FOLD 2:  MV acc 0.8, MV auc 0.8333333333333334, MV specificity 0.6666666666666666, MV sensitivity 1.0


100%|██████████| 18/18 [00:00<00:00, 72.06it/s]
100%|██████████| 18/18 [00:00<00:00, 60.34it/s]
100%|██████████| 18/18 [00:00<00:00, 77.60it/s]
100%|██████████| 18/18 [00:00<00:00, 98.12it/s] 
100%|██████████| 18/18 [00:00<00:00, 94.28it/s] 


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 1, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 3:  acc 0.56, auc 0.5333333333333333, specificity 0.4, sensitivity 0.6666666666666666
FOLD 3:  MV acc 0.8, MV auc 0.75, MV specificity 0.5, MV sensitivity 1.0


100%|██████████| 18/18 [00:00<00:00, 75.83it/s]
100%|██████████| 18/18 [00:00<00:00, 69.53it/s]
100%|██████████| 18/18 [00:00<00:00, 71.03it/s]
100%|██████████| 18/18 [00:00<00:00, 76.80it/s]
100%|██████████| 18/18 [00:00<00:00, 75.79it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('linear', 1000, 'balanced')
FOLD 4:  acc 0.64, auc 0.475, specificity 0.2, sensitivity 0.75
FOLD 4:  MV acc 0.8, MV auc 0.5, MV specificity 0.0, MV sensitivity 1.0


100%|██████████| 18/18 [00:00<00:00, 84.42it/s]
100%|██████████| 18/18 [00:00<00:00, 84.65it/s]
100%|██████████| 18/18 [00:00<00:00, 90.92it/s] 
100%|██████████| 18/18 [00:00<00:00, 104.72it/s]
100%|██████████| 18/18 [00:00<00:00, 77.37it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('linear', 10, 'balanced')
FOLD 5:  acc 0.84, auc 0.8625, specificity 0.9, sensitivity 0.825
FOLD 5:  MV acc 0.8, MV auc 0.875, MV specificity 1.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:00<00:00, 79.19it/s]
100%|██████████| 18/18 [00:00<00:00, 76.38it/s]
100%|██████████| 18/18 [00:00<00:00, 78.87it/s]
100%|██████████| 18/18 [00:00<00:00, 88.45it/s]
100%|██████████| 18/18 [00:00<00:00, 76.86it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('linear', 1, 'balanced')
FOLD 6:  acc 0.96, auc 0.975, specificity 1.0, sensitivity 0.95
FOLD 6:  MV acc 1.0, MV auc 1.0, MV specificity 1.0, MV sensitivity 1.0


100%|██████████| 18/18 [00:00<00:00, 73.26it/s]
100%|██████████| 18/18 [00:00<00:00, 73.92it/s]
100%|██████████| 18/18 [00:00<00:00, 82.98it/s] 
100%|██████████| 18/18 [00:00<00:00, 58.92it/s]
100%|██████████| 18/18 [00:00<00:00, 43.12it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 1, 'balanced')
FOLD 7:  acc 0.58, auc 0.5125, specificity 0.4, sensitivity 0.625
FOLD 7:  MV acc 0.6, MV auc 0.375, MV specificity 0.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:00<00:00, 80.05it/s]
100%|██████████| 18/18 [00:00<00:00, 89.56it/s]
100%|██████████| 18/18 [00:00<00:00, 95.37it/s] 
100%|██████████| 18/18 [00:00<00:00, 86.09it/s]
100%|██████████| 18/18 [00:00<00:00, 79.24it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 1, 'balanced')
FOLD 8:  acc 0.8, auc 0.875, specificity 1.0, sensitivity 0.75
FOLD 8:  MV acc 0.8, MV auc 0.875, MV specificity 1.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:00<00:00, 77.68it/s]
100%|██████████| 18/18 [00:00<00:00, 75.74it/s]
100%|██████████| 18/18 [00:00<00:00, 95.75it/s] 
100%|██████████| 18/18 [00:00<00:00, 101.67it/s]
100%|██████████| 18/18 [00:00<00:00, 101.13it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 1, 'balanced')
FOLD 9:  acc 0.18, auc 0.15, specificity 0.1, sensitivity 0.2
FOLD 9:  MV acc 0.2, MV auc 0.125, MV specificity 0.0, MV sensitivity 0.25


100%|██████████| 18/18 [00:00<00:00, 71.78it/s]
100%|██████████| 18/18 [00:00<00:00, 77.11it/s]
100%|██████████| 18/18 [00:00<00:00, 78.48it/s]
100%|██████████| 18/18 [00:00<00:00, 72.44it/s]
100%|██████████| 18/18 [00:00<00:00, 80.31it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('linear', 1, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 10:  acc 0.62, auc 0.425, specificity 0.1, sensitivity 0.75
FOLD 10:  MV acc 0.6, MV auc 0.375, MV specificity 0.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:00<00:00, 67.94it/s]
100%|██████████| 18/18 [00:00<00:00, 75.00it/s]
100%|██████████| 18/18 [00:00<00:00, 74.59it/s]
100%|██████████| 18/18 [00:00<00:00, 72.55it/s]
100%|██████████| 18/18 [00:00<00:00, 88.46it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('linear', 1, 'balanced')
FOLD 11:  acc 0.72, auc 0.7083333333333333, specificity 0.7666666666666667, sensitivity 0.65
FOLD 11:  MV acc 0.6, MV auc 0.5833333333333334, MV specificity 0.6666666666666666, MV sensitivity 0.5
0.6854545454545454 0.6553030303030303
0.7272727272727273 0.662878787878788
Experiment done


In [7]:
df = df_features
pca_n_components = 8
standardize = True
test_metrics={}  
#majority vote results
test_metrics_mv={} 
test_n_splits = 11
group_kfold_test = GroupKFold(n_splits=test_n_splits)
seed= 11
df_pid = df['id']
df_y = df['labels']
fold_c =1 

for train_index, test_index in group_kfold_test.split(df, 
                                                  df_y, 
                                                  df_pid):
    random.seed(seed)
    random.shuffle(train_index)
    X_train, X_test = df.iloc[train_index], df.iloc[test_index]
    y_train, y_test = df_y.iloc[train_index], df_y.iloc[test_index]
    
    X_test = X_test.drop(columns=['id', 'labels'])
    X_train_pid = X_train.pop('id')
    X_train = X_train.drop(columns=['labels'])
    
    # Do cross-validation for hyperparam tuning
    group_kfold_val = GroupKFold(n_splits=5)
    metrics={}
    #X_train_y = df.pop('class')
    for subtrain_index, valid_index in group_kfold_val.split(X_train, 
                                                      y_train, 
                                                      X_train_pid):
                                   
        X_subtrain, X_valid = X_train.iloc[subtrain_index], X_train.iloc[valid_index]
        y_subtrain, y_valid = y_train.iloc[subtrain_index], y_train.iloc[valid_index]


        #standardize
        if standardize:
            scaler = StandardScaler()
            X_subtrain = scaler.fit_transform(X_subtrain)
            X_valid = scaler.transform(X_valid)
        
        pca = PCA(n_components=pca_n_components,random_state = seed)           
        X_subtrain = pca.fit_transform(X_subtrain)
        X_valid = pca.transform(X_valid)
        

        for param in tqdm(params):
            if str(param) not in metrics.keys() :
                metrics[str(param)] ={'acc':[], 'auc':[], 'sensitivity':[], 'specificity':[]}
                                   
            acc, auc, specificity, sensitivity,_ = train_valid(param, X_subtrain,X_valid,y_subtrain, y_valid)
            metrics[str(param)]['auc'].append(auc)
            metrics[str(param)]['acc'].append(acc)
            metrics[str(param)]['sensitivity'].append(sensitivity)
            metrics[str(param)]['specificity'].append(specificity)
    #log validation metrics for all combination of params
    log_val_metrics(params, metrics, test_n_splits, pca_n_components, standardize)
    
    #highest accuracy
    index_param_max = np.array([np.array(metrics[str(param)]['auc']).mean() for param in params]).argmax()
    print('From all the combinations, the highest accuracy was achieved with', params[index_param_max])
    #train and test with max param

    #standardize
    if standardize:
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
    
    pca = PCA(n_components=pca_n_components)           
    X_train = pca.fit_transform(X_train)
    X_test = pca.transform(X_test)

    acc, auc, specificity, sensitivity, predictions = train_valid(params[index_param_max], X_train, X_test, y_train, y_test)
    
    #compute majority vote metrics
    acc_mv, auc_mv, specificity_mv, sensitivity_mv = get_majority_vote(y_test, predictions)
    
    print('FOLD '+ str(fold_c) + ':  acc ' + str(acc) +  ', auc ' +  str(auc) +  ', specificity '+ str(specificity)
          + ', sensitivity ' + str(sensitivity))
    print('FOLD '+ str(fold_c) + ':  MV acc ' + str(acc_mv) +  ', MV auc ' +  str(auc_mv) +  ', MV specificity '+ str(specificity_mv)
          + ', MV sensitivity ' + str(sensitivity_mv))
    
    test_metrics[fold_c]=  {'acc':acc, 'auc':auc, 'sensitivity':sensitivity, 'specificity':specificity, 'param':params[index_param_max]}
    test_metrics_mv[fold_c]=  {'acc':acc_mv, 'auc':auc_mv, 'sensitivity':sensitivity_mv, 'specificity':specificity_mv, 'param':params[index_param_max]}
    
    fold_c +=1 

log_test_metrics(test_metrics, test_metrics_mv, test_n_splits, 'AVG Pooling Inception features + PCA + SVM',None, seed, pca_n_components)


  and should_run_async(code)
100%|██████████| 18/18 [00:00<00:00, 61.76it/s]
100%|██████████| 18/18 [00:00<00:00, 80.38it/s]
100%|██████████| 18/18 [00:00<00:00, 71.36it/s]
100%|██████████| 18/18 [00:00<00:00, 90.63it/s] 
100%|██████████| 18/18 [00:00<00:00, 84.73it/s] 


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 1, 'balanced')
FOLD 1:  acc 0.9, auc 0.9166666666666667, specificity 1.0, sensitivity 0.8333333333333334
FOLD 1:  MV acc 1.0, MV auc 1.0, MV specificity 1.0, MV sensitivity 1.0


100%|██████████| 18/18 [00:00<00:00, 91.34it/s] 
100%|██████████| 18/18 [00:00<00:00, 93.14it/s] 
100%|██████████| 18/18 [00:00<00:00, 90.71it/s] 
100%|██████████| 18/18 [00:00<00:00, 96.04it/s] 
100%|██████████| 18/18 [00:00<00:00, 102.45it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 10, 'balanced')
FOLD 2:  acc 0.66, auc 0.6416666666666667, specificity 0.7333333333333333, sensitivity 0.55
FOLD 2:  MV acc 0.6, MV auc 0.5833333333333334, MV specificity 0.6666666666666666, MV sensitivity 0.5


100%|██████████| 18/18 [00:00<00:00, 45.69it/s]
100%|██████████| 18/18 [00:00<00:00, 80.06it/s] 
100%|██████████| 18/18 [00:00<00:00, 40.75it/s]
100%|██████████| 18/18 [00:00<00:00, 56.22it/s]
100%|██████████| 18/18 [00:00<00:00, 66.39it/s] 


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 1, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 3:  acc 0.46, auc 0.44166666666666665, specificity 0.35, sensitivity 0.5333333333333333
FOLD 3:  MV acc 0.4, MV auc 0.3333333333333333, MV specificity 0.0, MV sensitivity 0.6666666666666666


100%|██████████| 18/18 [00:00<00:00, 77.08it/s]
100%|██████████| 18/18 [00:00<00:00, 79.44it/s]
100%|██████████| 18/18 [00:00<00:00, 81.27it/s]
100%|██████████| 18/18 [00:00<00:00, 100.44it/s]
100%|██████████| 18/18 [00:00<00:00, 99.88it/s] 


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('linear', 10, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 4:  acc 0.68, auc 0.65, specificity 0.6, sensitivity 0.7
FOLD 4:  MV acc 0.8, MV auc 0.875, MV specificity 1.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:00<00:00, 84.76it/s] 
100%|██████████| 18/18 [00:00<00:00, 66.11it/s]
100%|██████████| 18/18 [00:00<00:00, 86.47it/s] 
100%|██████████| 18/18 [00:00<00:00, 86.21it/s]
100%|██████████| 18/18 [00:00<00:00, 76.30it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('linear', 1, 'balanced')
FOLD 5:  acc 0.72, auc 0.675, specificity 0.6, sensitivity 0.75
FOLD 5:  MV acc 0.8, MV auc 0.875, MV specificity 1.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:00<00:00, 67.44it/s]
100%|██████████| 18/18 [00:00<00:00, 77.59it/s] 
100%|██████████| 18/18 [00:00<00:00, 68.47it/s]
100%|██████████| 18/18 [00:00<00:00, 103.42it/s]
100%|██████████| 18/18 [00:00<00:00, 91.76it/s] 


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 1, 'balanced')
FOLD 6:  acc 0.8, auc 0.875, specificity 1.0, sensitivity 0.75
FOLD 6:  MV acc 0.8, MV auc 0.875, MV specificity 1.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:00<00:00, 77.24it/s]
100%|██████████| 18/18 [00:00<00:00, 90.12it/s] 
100%|██████████| 18/18 [00:00<00:00, 98.07it/s] 
100%|██████████| 18/18 [00:00<00:00, 87.90it/s] 
100%|██████████| 18/18 [00:00<00:00, 88.76it/s] 


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 1, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 7:  acc 0.6, auc 0.525, specificity 0.4, sensitivity 0.65
FOLD 7:  MV acc 0.6, MV auc 0.375, MV specificity 0.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:00<00:00, 65.69it/s] 
100%|██████████| 18/18 [00:00<00:00, 42.41it/s]
100%|██████████| 18/18 [00:00<00:00, 80.24it/s]
100%|██████████| 18/18 [00:00<00:00, 92.99it/s] 
100%|██████████| 18/18 [00:00<00:00, 71.76it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 1, 'balanced')
FOLD 8:  acc 0.82, auc 0.8875, specificity 1.0, sensitivity 0.775
FOLD 8:  MV acc 0.8, MV auc 0.875, MV specificity 1.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:00<00:00, 86.13it/s]
100%|██████████| 18/18 [00:00<00:00, 118.70it/s]
100%|██████████| 18/18 [00:00<00:00, 93.86it/s] 
100%|██████████| 18/18 [00:00<00:00, 91.07it/s]
100%|██████████| 18/18 [00:00<00:00, 88.47it/s] 


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 1, 'balanced')
FOLD 9:  acc 0.06, auc 0.075, specificity 0.1, sensitivity 0.05
FOLD 9:  MV acc 0.0, MV auc 0.0, MV specificity 0.0, MV sensitivity 0.0


100%|██████████| 18/18 [00:00<00:00, 38.81it/s]
100%|██████████| 18/18 [00:00<00:00, 59.46it/s]
100%|██████████| 18/18 [00:00<00:00, 45.74it/s]
100%|██████████| 18/18 [00:00<00:00, 43.67it/s]
100%|██████████| 18/18 [00:00<00:00, 52.83it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 1, 'balanced')
FOLD 10:  acc 0.64, auc 0.5125, specificity 0.3, sensitivity 0.725
FOLD 10:  MV acc 0.6, MV auc 0.375, MV specificity 0.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:00<00:00, 82.07it/s]
100%|██████████| 18/18 [00:00<00:00, 85.15it/s]
100%|██████████| 18/18 [00:00<00:00, 71.89it/s]
100%|██████████| 18/18 [00:00<00:00, 81.72it/s]
100%|██████████| 18/18 [00:00<00:00, 90.02it/s] 


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.001, 1, 'balanced')
FOLD 11:  acc 0.58, auc 0.5666666666666667, specificity 0.6333333333333333, sensitivity 0.5
FOLD 11:  MV acc 0.6, MV auc 0.5833333333333334, MV specificity 0.6666666666666666, MV sensitivity 0.5
0.629090909090909 0.6151515151515152
0.6363636363636364 0.6136363636363636
Experiment done


# ALL features

In [6]:
df = df_features
pca_n_components = None
standardize = True
test_metrics={}  
#majority vote results
test_metrics_mv={} 
test_n_splits = 11
group_kfold_test = GroupKFold(n_splits=test_n_splits)
seed= 11
df_pid = df['id']
df_y = df['labels']
fold_c =1 

for train_index, test_index in group_kfold_test.split(df, 
                                                  df_y, 
                                                  df_pid):
    random.seed(seed)
    random.shuffle(train_index)
    X_train, X_test = df.iloc[train_index], df.iloc[test_index]
    y_train, y_test = df_y.iloc[train_index], df_y.iloc[test_index]
    
    X_test = X_test.drop(columns=['id', 'labels'])
    X_train_pid = X_train.pop('id')
    X_train = X_train.drop(columns=['labels'])
    
    # Do cross-validation for hyperparam tuning
    group_kfold_val = GroupKFold(n_splits=5)
    metrics={}
    #X_train_y = df.pop('class')
    for subtrain_index, valid_index in group_kfold_val.split(X_train, 
                                                      y_train, 
                                                      X_train_pid):
                                   
        X_subtrain, X_valid = X_train.iloc[subtrain_index], X_train.iloc[valid_index]
        y_subtrain, y_valid = y_train.iloc[subtrain_index], y_train.iloc[valid_index]


        #standardize
        if standardize:
            scaler = StandardScaler()
            X_subtrain = scaler.fit_transform(X_subtrain)
            X_valid = scaler.transform(X_valid)
                

        for param in tqdm(params):
            if str(param) not in metrics.keys() :
                metrics[str(param)] ={'acc':[], 'auc':[], 'sensitivity':[], 'specificity':[]}
                                   
            acc, auc, specificity, sensitivity,_ = train_valid(param, X_subtrain,X_valid,y_subtrain, y_valid)
            metrics[str(param)]['auc'].append(auc)
            metrics[str(param)]['acc'].append(acc)
            metrics[str(param)]['sensitivity'].append(sensitivity)
            metrics[str(param)]['specificity'].append(specificity)
    #log validation metrics for all combination of params
    log_val_metrics(params, metrics, test_n_splits, pca_n_components, standardize)
    
    #highest accuracy
    index_param_max = np.array([np.array(metrics[str(param)]['auc']).mean() for param in params]).argmax()
    print('From all the combinations, the highest accuracy was achieved with', params[index_param_max])
    #train and test with max param

    #standardize
    if standardize:
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
    
    acc, auc, specificity, sensitivity, predictions = train_valid(params[index_param_max], X_train, X_test, y_train, y_test)
    
    #compute majority vote metrics
    acc_mv, auc_mv, specificity_mv, sensitivity_mv = get_majority_vote(y_test, predictions)
    
    print('FOLD '+ str(fold_c) + ':  acc ' + str(acc) +  ', auc ' +  str(auc) +  ', specificity '+ str(specificity)
          + ', sensitivity ' + str(sensitivity))
    print('FOLD '+ str(fold_c) + ':  MV acc ' + str(acc_mv) +  ', MV auc ' +  str(auc_mv) +  ', MV specificity '+ str(specificity_mv)
          + ', MV sensitivity ' + str(sensitivity_mv))
    
    test_metrics[fold_c]=  {'acc':acc, 'auc':auc, 'sensitivity':sensitivity, 'specificity':specificity, 'param':params[index_param_max]}
    test_metrics_mv[fold_c]=  {'acc':acc_mv, 'auc':auc_mv, 'sensitivity':sensitivity_mv, 'specificity':specificity_mv, 'param':params[index_param_max]}
    
    fold_c +=1 

log_test_metrics(test_metrics, test_metrics_mv, test_n_splits, 'AVG Pooling Inception features + PCA + SVM',None, seed, pca_n_components, standardize)


100%|██████████| 18/18 [00:03<00:00,  4.97it/s]
100%|██████████| 18/18 [00:04<00:00,  3.68it/s]
100%|██████████| 18/18 [00:05<00:00,  3.25it/s]
100%|██████████| 18/18 [00:06<00:00,  2.83it/s]
100%|██████████| 18/18 [00:03<00:00,  4.63it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('linear', 1, None)


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 1:  acc 1.0, auc 1.0, specificity 1.0, sensitivity 1.0
FOLD 1:  MV acc 1.0, MV auc 1.0, MV specificity 1.0, MV sensitivity 1.0


100%|██████████| 18/18 [00:07<00:00,  2.43it/s]
100%|██████████| 18/18 [00:07<00:00,  2.46it/s]
100%|██████████| 18/18 [00:03<00:00,  4.54it/s]
100%|██████████| 18/18 [00:05<00:00,  3.15it/s]
100%|██████████| 18/18 [00:05<00:00,  3.05it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('linear', 1, None)


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 2:  acc 0.8, auc 0.8166666666666667, specificity 0.7333333333333333, sensitivity 0.9
FOLD 2:  MV acc 0.8, MV auc 0.8333333333333334, MV specificity 0.6666666666666666, MV sensitivity 1.0


100%|██████████| 18/18 [00:05<00:00,  3.17it/s]
100%|██████████| 18/18 [00:05<00:00,  3.14it/s]
100%|██████████| 18/18 [00:03<00:00,  5.31it/s]
100%|██████████| 18/18 [00:02<00:00,  7.16it/s]
100%|██████████| 18/18 [00:02<00:00,  6.02it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('linear', 1, None)
FOLD 3:  acc 0.42, auc 0.4166666666666667, specificity 0.4, sensitivity 0.43333333333333335
FOLD 3:  MV acc 0.4, MV auc 0.41666666666666663, MV specificity 0.5, MV sensitivity 0.3333333333333333


100%|██████████| 18/18 [00:07<00:00,  2.51it/s]
100%|██████████| 18/18 [00:06<00:00,  2.93it/s]
100%|██████████| 18/18 [00:04<00:00,  3.79it/s]
100%|██████████| 18/18 [00:05<00:00,  3.09it/s]
100%|██████████| 18/18 [00:05<00:00,  3.57it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('linear', 1, None)


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 4:  acc 0.88, auc 0.7, specificity 0.4, sensitivity 1.0
FOLD 4:  MV acc 0.8, MV auc 0.5, MV specificity 0.0, MV sensitivity 1.0


100%|██████████| 18/18 [00:04<00:00,  3.79it/s]
100%|██████████| 18/18 [00:06<00:00,  2.71it/s]
100%|██████████| 18/18 [00:09<00:00,  1.96it/s]
100%|██████████| 18/18 [00:03<00:00,  4.94it/s]
100%|██████████| 18/18 [00:07<00:00,  2.39it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 10, 'balanced')
FOLD 5:  acc 0.96, auc 0.975, specificity 1.0, sensitivity 0.95
FOLD 5:  MV acc 1.0, MV auc 1.0, MV specificity 1.0, MV sensitivity 1.0


100%|██████████| 18/18 [00:03<00:00,  5.36it/s]
100%|██████████| 18/18 [00:03<00:00,  5.15it/s]
100%|██████████| 18/18 [00:02<00:00,  6.67it/s]
100%|██████████| 18/18 [00:02<00:00,  6.66it/s]
100%|██████████| 18/18 [00:06<00:00,  2.66it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('linear', 1, None)


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 6:  acc 0.66, auc 0.5625, specificity 0.4, sensitivity 0.725
FOLD 6:  MV acc 0.6, MV auc 0.375, MV specificity 0.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:03<00:00,  5.49it/s]
100%|██████████| 18/18 [00:04<00:00,  4.11it/s]
100%|██████████| 18/18 [00:05<00:00,  3.42it/s]
100%|██████████| 18/18 [00:04<00:00,  3.77it/s]
100%|██████████| 18/18 [00:03<00:00,  4.86it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('linear', 10, None)


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 7:  acc 0.6, auc 0.44999999999999996, specificity 0.2, sensitivity 0.7
FOLD 7:  MV acc 0.6, MV auc 0.375, MV specificity 0.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:05<00:00,  3.21it/s]
100%|██████████| 18/18 [00:04<00:00,  4.27it/s]
100%|██████████| 18/18 [00:02<00:00,  6.65it/s]
100%|██████████| 18/18 [00:02<00:00,  7.03it/s]
100%|██████████| 18/18 [00:03<00:00,  4.76it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 1000, None)
FOLD 8:  acc 0.76, auc 0.475, specificity 0.0, sensitivity 0.95
FOLD 8:  MV acc 0.8, MV auc 0.5, MV specificity 0.0, MV sensitivity 1.0


100%|██████████| 18/18 [00:05<00:00,  3.10it/s]
100%|██████████| 18/18 [00:06<00:00,  2.90it/s]
100%|██████████| 18/18 [00:05<00:00,  3.59it/s]
100%|██████████| 18/18 [00:03<00:00,  4.81it/s]
100%|██████████| 18/18 [00:03<00:00,  5.03it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 1, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 9:  acc 0.36, auc 0.26249999999999996, specificity 0.1, sensitivity 0.425
FOLD 9:  MV acc 0.4, MV auc 0.25, MV specificity 0.0, MV sensitivity 0.5


100%|██████████| 18/18 [00:08<00:00,  2.09it/s]
100%|██████████| 18/18 [00:09<00:00,  1.98it/s]
100%|██████████| 18/18 [00:04<00:00,  3.64it/s]
100%|██████████| 18/18 [00:05<00:00,  3.06it/s]
100%|██████████| 18/18 [00:07<00:00,  2.56it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 1, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 10:  acc 0.56, auc 0.3875, specificity 0.1, sensitivity 0.675
FOLD 10:  MV acc 0.6, MV auc 0.375, MV specificity 0.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:03<00:00,  5.45it/s]
100%|██████████| 18/18 [00:03<00:00,  5.16it/s]
100%|██████████| 18/18 [00:03<00:00,  5.80it/s]
100%|██████████| 18/18 [00:02<00:00,  6.63it/s]
100%|██████████| 18/18 [00:05<00:00,  3.22it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('linear', 1, None)
FOLD 11:  acc 0.98, auc 0.9833333333333334, specificity 0.9666666666666667, sensitivity 1.0
FOLD 11:  MV acc 1.0, MV auc 1.0, MV specificity 1.0, MV sensitivity 1.0
0.7254545454545455 0.6390151515151516
0.7272727272727273 0.6022727272727273
Experiment done


In [7]:
df = df_features
pca_n_components = None
standardize = False
test_metrics={}  
#majority vote results
test_metrics_mv={} 
test_n_splits = 11
group_kfold_test = GroupKFold(n_splits=test_n_splits)
seed= 11
df_pid = df['id']
df_y = df['labels']
fold_c =1 

for train_index, test_index in group_kfold_test.split(df, 
                                                  df_y, 
                                                  df_pid):
    random.seed(seed)
    random.shuffle(train_index)
    X_train, X_test = df.iloc[train_index], df.iloc[test_index]
    y_train, y_test = df_y.iloc[train_index], df_y.iloc[test_index]
    
    X_test = X_test.drop(columns=['id', 'labels'])
    X_train_pid = X_train.pop('id')
    X_train = X_train.drop(columns=['labels'])
    
    # Do cross-validation for hyperparam tuning
    group_kfold_val = GroupKFold(n_splits=5)
    metrics={}
    #X_train_y = df.pop('class')
    for subtrain_index, valid_index in group_kfold_val.split(X_train, 
                                                      y_train, 
                                                      X_train_pid):
                                   
        X_subtrain, X_valid = X_train.iloc[subtrain_index], X_train.iloc[valid_index]
        y_subtrain, y_valid = y_train.iloc[subtrain_index], y_train.iloc[valid_index]
                

        for param in tqdm(params):
            if str(param) not in metrics.keys() :
                metrics[str(param)] ={'acc':[], 'auc':[], 'sensitivity':[], 'specificity':[]}
                                   
            acc, auc, specificity, sensitivity,_ = train_valid(param, X_subtrain,X_valid,y_subtrain, y_valid)
            metrics[str(param)]['auc'].append(auc)
            metrics[str(param)]['acc'].append(acc)
            metrics[str(param)]['sensitivity'].append(sensitivity)
            metrics[str(param)]['specificity'].append(specificity)
    #log validation metrics for all combination of params
    log_val_metrics(params, metrics, test_n_splits, pca_n_components, standardize)
    
    #highest accuracy
    index_param_max = np.array([np.array(metrics[str(param)]['auc']).mean() for param in params]).argmax()
    print('From all the combinations, the highest accuracy was achieved with', params[index_param_max])
    #train and test with max param
    
    acc, auc, specificity, sensitivity, predictions = train_valid(params[index_param_max], X_train, X_test, y_train, y_test)
    
    #compute majority vote metrics
    acc_mv, auc_mv, specificity_mv, sensitivity_mv = get_majority_vote(y_test, predictions)
    
    print('FOLD '+ str(fold_c) + ':  acc ' + str(acc) +  ', auc ' +  str(auc) +  ', specificity '+ str(specificity)
          + ', sensitivity ' + str(sensitivity))
    print('FOLD '+ str(fold_c) + ':  MV acc ' + str(acc_mv) +  ', MV auc ' +  str(auc_mv) +  ', MV specificity '+ str(specificity_mv)
          + ', MV sensitivity ' + str(sensitivity_mv))
    
    test_metrics[fold_c]=  {'acc':acc, 'auc':auc, 'sensitivity':sensitivity, 'specificity':specificity, 'param':params[index_param_max]}
    test_metrics_mv[fold_c]=  {'acc':acc_mv, 'auc':auc_mv, 'sensitivity':sensitivity_mv, 'specificity':specificity_mv, 'param':params[index_param_max]}
    
    fold_c +=1 

log_test_metrics(test_metrics, test_metrics_mv, test_n_splits, 'AVG Pooling Inception features + PCA + SVM',None, seed, pca_n_components, standardize)


  and should_run_async(code)
100%|██████████| 18/18 [00:04<00:00,  3.78it/s]
100%|██████████| 18/18 [00:04<00:00,  3.77it/s]
100%|██████████| 18/18 [00:06<00:00,  2.90it/s]
100%|██████████| 18/18 [00:13<00:00,  1.32it/s]
100%|██████████| 18/18 [00:10<00:00,  1.75it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.001, 1, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 1:  acc 0.96, auc 0.9666666666666667, specificity 1.0, sensitivity 0.9333333333333333
FOLD 1:  MV acc 1.0, MV auc 1.0, MV specificity 1.0, MV sensitivity 1.0


100%|██████████| 18/18 [00:06<00:00,  2.91it/s]
100%|██████████| 18/18 [00:04<00:00,  3.82it/s]
100%|██████████| 18/18 [00:04<00:00,  3.99it/s]
100%|██████████| 18/18 [00:03<00:00,  4.58it/s]
100%|██████████| 18/18 [00:04<00:00,  4.07it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.001, 1, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 2:  acc 0.86, auc 0.8416666666666667, specificity 0.9333333333333333, sensitivity 0.75
FOLD 2:  MV acc 0.8, MV auc 0.75, MV specificity 1.0, MV sensitivity 0.5


100%|██████████| 18/18 [00:08<00:00,  2.12it/s]
100%|██████████| 18/18 [00:07<00:00,  2.25it/s]
100%|██████████| 18/18 [00:04<00:00,  4.07it/s]
100%|██████████| 18/18 [00:04<00:00,  4.08it/s]
100%|██████████| 18/18 [00:08<00:00,  2.03it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.001, 1, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 3:  acc 0.32, auc 0.275, specificity 0.05, sensitivity 0.5
FOLD 3:  MV acc 0.2, MV auc 0.16666666666666666, MV specificity 0.0, MV sensitivity 0.3333333333333333


100%|██████████| 18/18 [00:08<00:00,  2.01it/s]
100%|██████████| 18/18 [00:06<00:00,  2.80it/s]
100%|██████████| 18/18 [00:04<00:00,  3.76it/s]
100%|██████████| 18/18 [00:04<00:00,  4.10it/s]
100%|██████████| 18/18 [00:04<00:00,  3.88it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('linear', 1000, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 4:  acc 0.8, auc 0.5375, specificity 0.1, sensitivity 0.975
FOLD 4:  MV acc 0.8, MV auc 0.5, MV specificity 0.0, MV sensitivity 1.0


100%|██████████| 18/18 [00:04<00:00,  3.74it/s]
100%|██████████| 18/18 [00:04<00:00,  3.88it/s]
100%|██████████| 18/18 [00:04<00:00,  4.08it/s]
100%|██████████| 18/18 [00:04<00:00,  4.28it/s]
100%|██████████| 18/18 [00:04<00:00,  3.69it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 10, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 5:  acc 0.8, auc 0.875, specificity 1.0, sensitivity 0.75
FOLD 5:  MV acc 0.8, MV auc 0.875, MV specificity 1.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:13<00:00,  1.37it/s]
100%|██████████| 18/18 [00:07<00:00,  2.43it/s]
100%|██████████| 18/18 [00:04<00:00,  3.96it/s]
100%|██████████| 18/18 [00:04<00:00,  4.00it/s]
100%|██████████| 18/18 [00:05<00:00,  3.57it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 10, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 6:  acc 0.84, auc 0.9, specificity 1.0, sensitivity 0.8
FOLD 6:  MV acc 0.8, MV auc 0.875, MV specificity 1.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:09<00:00,  1.95it/s]
100%|██████████| 18/18 [00:12<00:00,  1.41it/s]
100%|██████████| 18/18 [00:09<00:00,  1.82it/s]
100%|██████████| 18/18 [00:04<00:00,  4.01it/s]
100%|██████████| 18/18 [00:04<00:00,  3.68it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.001, 1, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 7:  acc 0.68, auc 0.6125, specificity 0.5, sensitivity 0.725
FOLD 7:  MV acc 0.6, MV auc 0.375, MV specificity 0.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:08<00:00,  2.07it/s]
100%|██████████| 18/18 [00:06<00:00,  2.88it/s]
100%|██████████| 18/18 [00:04<00:00,  4.31it/s]
100%|██████████| 18/18 [00:04<00:00,  4.28it/s]
100%|██████████| 18/18 [00:04<00:00,  3.66it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.001, 1, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 8:  acc 0.82, auc 0.8875, specificity 1.0, sensitivity 0.775
FOLD 8:  MV acc 0.8, MV auc 0.875, MV specificity 1.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:11<00:00,  1.53it/s]
100%|██████████| 18/18 [00:08<00:00,  2.11it/s]
100%|██████████| 18/18 [00:09<00:00,  1.94it/s]
100%|██████████| 18/18 [00:07<00:00,  2.51it/s]
100%|██████████| 18/18 [00:06<00:00,  2.89it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 10, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 9:  acc 0.42, auc 0.375, specificity 0.3, sensitivity 0.45
FOLD 9:  MV acc 0.4, MV auc 0.25, MV specificity 0.0, MV sensitivity 0.5


100%|██████████| 18/18 [00:09<00:00,  1.92it/s]
100%|██████████| 18/18 [00:07<00:00,  2.35it/s]
100%|██████████| 18/18 [00:07<00:00,  2.48it/s]
100%|██████████| 18/18 [00:04<00:00,  4.32it/s]
100%|██████████| 18/18 [00:04<00:00,  4.02it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.001, 1, 'balanced')


  0%|          | 0/18 [00:00<?, ?it/s]

FOLD 10:  acc 0.62, auc 0.425, specificity 0.1, sensitivity 0.75
FOLD 10:  MV acc 0.6, MV auc 0.375, MV specificity 0.0, MV sensitivity 0.75


100%|██████████| 18/18 [00:08<00:00,  2.02it/s]
100%|██████████| 18/18 [00:08<00:00,  2.12it/s]
100%|██████████| 18/18 [00:09<00:00,  1.88it/s]
100%|██████████| 18/18 [00:07<00:00,  2.41it/s]
100%|██████████| 18/18 [00:10<00:00,  1.74it/s]


Done logging validation params in MLFlow
From all the combinations, the highest accuracy was achieved with ('kernel', 0.0001, 10, 'balanced')
FOLD 11:  acc 0.8, auc 0.75, specificity 1.0, sensitivity 0.5
FOLD 11:  MV acc 0.8, MV auc 0.75, MV specificity 1.0, MV sensitivity 0.5
0.72 0.6768939393939394
0.6909090909090909 0.6174242424242423
Experiment done
