### LibLinear SVM Cross-Validation, Training, Testing, and Evaluation

In [3]:
import os, joblib, utils, json, csv, ast
from utils import *
import pandas as pd
import numpy as np
from scipy.special import softmax
from joblib import Parallel, delayed
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split, RandomizedSearchCV, KFold, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.model_selection import ParameterSampler
from sklearn.calibration import CalibratedClassifierCV
from scipy.stats import uniform, loguniform

In [31]:
# These lists are used to determine imputation strategies for the specified features (inherited from LoGoFunc)
NEGONE_FEATURES = ['MOD_RES','REGION','INTERACTION_REGION','REQUIRED_FOR_INTER','ATP_binding_gbind','Ca2+_binding_gbind','DNA_binding_gbind','HEME_binding_gbind','Mg2+_binding_gbind','Mn2+_binding_gbind','RNA_binding_gbind','Dist2Mutation','BLOSUM62','ProteinLengthChange','TSSDistance','1000Gp3_AF','UK10K_AF','gnomAD_exomes_AF','gnomAD_genomes_AF','MSC_95CI','rel_cDNA_pos','rel_CDS_pos','rel_prot_pos','GDI','Selective_pressure','Clarks_distance','CDS_len','Number_of_paralogs','denovo_Zscore','RVIS','Indispensability_score','RSA','ASA','RSA_Zfit','before_RSA_3','before_RSA_8','before_RSA_15','after_RSA_3','after_RSA_8','after_RSA_15','before_ASA_3','before_ASA_8','before_ASA_15','after_ASA_3','after_ASA_8','after_ASA_15','Phosphorylation','Acetylation','Methylation','Ubiquitination','Glycosylation','PTM','AF_Relative_ASA','IUPRED2','ANCHOR2','before_IUPRED_3','before_IUPRED_8','before_IUPRED_15','after_IUPRED_3','after_IUPRED_8','after_IUPRED_15','before_ANCHOR_3','before_ANCHOR_8','before_ANCHOR_15','after_ANCHOR_3','after_ANCHOR_8','after_ANCHOR_15','A3D_SCORE','n_contacts','distance_com','concavity_score','S_DDG[SEQ]','S_DDG[3D]','hgmd_mutcount','gnomsingle_mutcount','gnom_mutcount','AF_confidence','isHomomultimer','num_interactions','ppi_combined_0','ppi_combined_1','ppi_combined_2','ppi_combined_3','ppi_combined_4','ppi_combined_5','ppi_combined_6','ppi_combined_7','ppi_combined_8','ppi_combined_9','ppi_combined_10','ppi_combined_11','ppi_combined_12','ppi_combined_13','ppi_combined_14','ppi_combined_15','ppi_combined_16','ppi_combined_17','ppi_combined_18','ppi_combined_19','ppi_combined_20','ppi_combined_21','ppi_combined_22','ppi_combined_23','ppi_combined_24','ppi_combined_25','ppi_combined_26','ppi_combined_27','ppi_combined_28','ppi_combined_29','ppi_combined_30','ppi_combined_31','ppi_combined_32','ppi_combined_33','ppi_combined_34','ppi_combined_35','ppi_combined_36','ppi_combined_37','ppi_combined_38','ppi_combined_39','ppi_combined_40','ppi_combined_41','ppi_combined_42','ppi_combined_43','ppi_combined_44','ppi_combined_45','ppi_combined_46','ppi_combined_47','ppi_combined_48','ppi_combined_49','ppi_combined_50','ppi_combined_51','ppi_combined_52','ppi_combined_53','ppi_combined_54','ppi_combined_55','ppi_combined_56','ppi_combined_57','ppi_combined_58','ppi_combined_59','ppi_combined_60','ppi_combined_61','ppi_combined_62','ppi_combined_63','s_het','DRNApredDNAscore_aa_window_3_prev','DRNApredDNAscore_aa_window_8_prev','DRNApredDNAscore_aa_window_15_prev','DRNApredDNAscore_aa_window_3_next','DRNApredDNAscore_aa_window_8_next','DRNApredDNAscore_aa_window_15_next','DRNApredDNAscore_aa','ASAquick_normscore_aa_window_3_prev','ASAquick_normscore_aa_window_8_prev','ASAquick_normscore_aa_window_15_prev','ASAquick_normscore_aa_window_3_next','ASAquick_normscore_aa_window_8_next','ASAquick_normscore_aa_window_15_next','ASAquick_normscore_aa','ASAquick_rawscore_aa_window_3_prev','ASAquick_rawscore_aa_window_8_prev','ASAquick_rawscore_aa_window_15_prev','ASAquick_rawscore_aa_window_3_next','ASAquick_rawscore_aa_window_8_next','ASAquick_rawscore_aa_window_15_next','ASAquick_rawscore_aa','DFLpredScore_aa_window_3_prev','DFLpredScore_aa_window_8_prev','DFLpredScore_aa_window_15_prev','DFLpredScore_aa_window_3_next','DFLpredScore_aa_window_8_next','DFLpredScore_aa_window_15_next','DFLpredScore_aa','DRNApredRNAscore_aa_window_3_prev','DRNApredRNAscore_aa_window_8_prev','DRNApredRNAscore_aa_window_15_prev','DRNApredRNAscore_aa_window_3_next','DRNApredRNAscore_aa_window_8_next','DRNApredRNAscore_aa_window_15_next','DRNApredRNAscore_aa','DisoDNAscore_aa_window_3_prev','DisoDNAscore_aa_window_8_prev','DisoDNAscore_aa_window_15_prev','DisoDNAscore_aa_window_3_next','DisoDNAscore_aa_window_8_next','DisoDNAscore_aa_window_15_next','DisoDNAscore_aa','DisoPROscore_aa_window_3_prev','DisoPROscore_aa_window_8_prev','DisoPROscore_aa_window_15_prev','DisoPROscore_aa_window_3_next','DisoPROscore_aa_window_8_next','DisoPROscore_aa_window_15_next','DisoPROscore_aa','DisoRNAscore_aa_window_3_prev','DisoRNAscore_aa_window_8_prev','DisoRNAscore_aa_window_15_prev','DisoRNAscore_aa_window_3_next','DisoRNAscore_aa_window_8_next','DisoRNAscore_aa_window_15_next','DisoRNAscore_aa','MMseq2_conservation_level_aa_window_3_prev','MMseq2_conservation_level_aa_window_8_prev','MMseq2_conservation_level_aa_window_15_prev','MMseq2_conservation_level_aa_window_3_next','MMseq2_conservation_level_aa_window_8_next','MMseq2_conservation_level_aa_window_15_next','MMseq2_conservation_level_aa','MMseq2_conservation_score_aa_window_3_prev','MMseq2_conservation_score_aa_window_8_prev','MMseq2_conservation_score_aa_window_15_prev','MMseq2_conservation_score_aa_window_3_next','MMseq2_conservation_score_aa_window_8_next','MMseq2_conservation_score_aa_window_15_next','MMseq2_conservation_score_aa','MoRFchibiScore_aa_window_3_prev','MoRFchibiScore_aa_window_8_prev','MoRFchibiScore_aa_window_15_prev','MoRFchibiScore_aa_window_3_next','MoRFchibiScore_aa_window_8_next','MoRFchibiScore_aa_window_15_next','MoRFchibiScore_aa','PSIPRED_helix_aa_window_3_prev','PSIPRED_helix_aa_window_8_prev','PSIPRED_helix_aa_window_15_prev','PSIPRED_helix_aa_window_3_next','PSIPRED_helix_aa_window_8_next','PSIPRED_helix_aa_window_15_next','PSIPRED_helix_aa','PSIPRED_strand_aa_window_3_prev','PSIPRED_strand_aa_window_8_prev','PSIPRED_strand_aa_window_15_prev','PSIPRED_strand_aa_window_3_next','PSIPRED_strand_aa_window_8_next','PSIPRED_strand_aa_window_15_next','PSIPRED_strand_aa','SCRIBERscore_aa_window_3_prev','SCRIBERscore_aa_window_8_prev','SCRIBERscore_aa_window_15_prev','SCRIBERscore_aa_window_3_next','SCRIBERscore_aa_window_8_next','SCRIBERscore_aa_window_15_next','SCRIBERscore_aa','SignalP_score_aa_window_3_prev','SignalP_score_aa_window_8_prev','SignalP_score_aa_window_15_prev','SignalP_score_aa_window_3_next','SignalP_score_aa_window_8_next','SignalP_score_aa_window_15_next','SignalP_score_aa','gtex_Adipose_-_Subcutaneous','gtex_Adipose_-_Visceral_(Omentum)','gtex_Adrenal_Gland','gtex_Artery_-_Aorta','gtex_Artery_-_Coronary','gtex_Artery_-_Tibial','gtex_Bladder','gtex_Brain_-_Amygdala','gtex_Brain_-_Anterior_cingulate_cortex_(BA24)','gtex_Brain_-_Caudate_(basal_ganglia)','gtex_Brain_-_Cerebellar_Hemisphere','gtex_Brain_-_Cerebellum','gtex_Brain_-_Cortex','gtex_Brain_-_Frontal_Cortex_(BA9)','gtex_Brain_-_Hippocampus','gtex_Brain_-_Hypothalamus','gtex_Brain_-_Nucleus_accumbens_(basal_ganglia)','gtex_Brain_-_Putamen_(basal_ganglia)','gtex_Brain_-_Spinal_cord_(cervical_c-1)','gtex_Brain_-_Substantia_nigra','gtex_Breast_-_Mammary_Tissue','gtex_Cells_-_Cultured_fibroblasts','gtex_Cells_-_EBV-transformed_lymphocytes','gtex_Cervix_-_Ectocervix','gtex_Cervix_-_Endocervix','gtex_Colon_-_Sigmoid','gtex_Colon_-_Transverse','gtex_Esophagus_-_Gastroesophageal_Junction','gtex_Esophagus_-_Mucosa','gtex_Esophagus_-_Muscularis','gtex_Fallopian_Tube','gtex_Heart_-_Atrial_Appendage','gtex_Heart_-_Left_Ventricle','gtex_Kidney_-_Cortex','gtex_Kidney_-_Medulla','gtex_Liver','gtex_Lung','gtex_Minor_Salivary_Gland','gtex_Muscle_-_Skeletal','gtex_Nerve_-_Tibial','gtex_Ovary','gtex_Pancreas','gtex_Pituitary','gtex_Prostate','gtex_Skin_-_Not_Sun_Exposed_(Suprapubic)','gtex_Skin_-_Sun_Exposed_(Lower_leg)','gtex_Small_Intestine_-_Terminal_Ileum','gtex_Spleen','gtex_Stomach','gtex_Testis','gtex_Thyroid','gtex_Uterus','gtex_Vagina','gtex_Whole_Blood','haplo','haplo_imputed','PHOSPHORYLATION','ACETYLATION','UBIQUITINATION','S-NITROSYLATION','N-GLYCOSYLATION','METHYLATION','O-GLYCOSYLATION','MYRISTOYLATION','C-GLYCOSYLATION','SUMOYLATION','S-GLYCOSYLATION','polyphen_nobs','polyphen_normasa','polyphen_dvol','polyphen_dprop','polyphen_bfact','polyphen_hbonds','polyphen_avenhet','polyphen_mindhet','polyphen_avenint','polyphen_mindint','polyphen_avensit','polyphen_mindsit','polyphen_idpmax','polyphen_idpsnp','polyphen_idqmin','motifECount','motifEHIPos','motifEScoreChng','Dst2Splice','motifDist','EncodeH3K4me1-sum','EncodeH3K4me1-max','EncodeH3K4me2-sum','EncodeH3K4me2-max','EncodeH3K4me3-sum','EncodeH3K4me3-max','EncodeH3K9ac-sum','EncodeH3K9ac-max','EncodeH3K9me3-sum','EncodeH3K9me3-max','EncodeH3K27ac-sum','EncodeH3K27ac-max','EncodeH3K27me3-sum','EncodeH3K27me3-max','EncodeH3K36me3-sum','EncodeH3K36me3-max','EncodeH3K79me2-sum','EncodeH3K79me2-max','EncodeH4K20me1-sum','EncodeH4K20me1-max','EncodeH2AFZ-sum','EncodeH2AFZ-max','EncodeDNase-sum','EncodeDNase-max','EncodetotalRNA-sum','EncodetotalRNA-max','Grantham_x','Freq100bp','Rare100bp','Sngl100bp','Freq1000bp','Rare1000bp','Sngl1000bp','Freq10000bp','Rare10000bp','Sngl10000bp','RemapOverlapTF','RemapOverlapCL','Charge','Volume','Hydrophobicity','Polarity','Ex','PAM250','JM','HGMD2003','VB','Transition','COSMIC','COSMICvsSWISSPROT','HAPMAP','COSMICvsHAPMAP',]
MEDIAN_FEATURES = ['CADD_raw','Conservation','MaxEntScan_alt','MaxEntScan_diff','MaxEntScan_ref','ada_score','rf_score','FATHMM_score','GERPplus_plus_NR','GERPplus_plus_RS','GM12878_fitCons_score','GenoCanyon_score','H1_hESC_fitCons_score','HUVEC_fitCons_score','LINSIGHT','LIST_S2_score','LRT_score','M_CAP_score','MPC_score','MVP_score','MutationAssessor_score','MutationTaster_score','PROVEAN_score','SiPhy_29way_logOdds','VEST4_score','fathmm_MKL_coding_score','fathmm_XF_coding_score','integrated_fitCons_score','phastCons100way_vertebrate','phastCons17way_primate','phastCons30way_mammalian','phyloP100way_vertebrate','phyloP17way_primate','phyloP30way_mammalian','Condel_score','SIFT_score','NearestExonJB_distance','NearestExonJB_len','Dominant_probability','Recessive_probability','polyphen_dscore','polyphen_score1','polyphen_score2','ConsScore','GC','CpG','minDistTSS','minDistTSE','priPhCons','mamPhCons','verPhCons','priPhyloP','mamPhyloP','verPhyloP','bStatistic_y','targetScan','mirSVR-Score','mirSVR-E','mirSVR-Aln','cHmm_E1','cHmm_E2','cHmm_E3','cHmm_E4','cHmm_E5','cHmm_E6','cHmm_E7','cHmm_E8','cHmm_E9','cHmm_E10','cHmm_E11','cHmm_E12','cHmm_E13','cHmm_E14','cHmm_E15','cHmm_E16','cHmm_E17','cHmm_E18','cHmm_E19','cHmm_E20','cHmm_E21','cHmm_E22','cHmm_E23','cHmm_E24','cHmm_E25','GerpRS','GerpRSpval','GerpN','GerpS','tOverlapMotifs','SpliceAI-acc-gain','SpliceAI-acc-loss','SpliceAI-don-gain','SpliceAI-don-loss','MMSp_acceptorIntron','MMSp_acceptor','MMSp_exon','MMSp_donor','MMSp_donorIntron','dbscSNV-ada_score','dbscSNV-rf_score',]

### Preprocessing and Validation Split

In [4]:
# Loading the correct training data (missing `Protein_dom`).
X_train = pd.read_csv('../data/X_train_500.csv', low_memory=False)
y_train = pd.read_csv('../data/y_train_id.csv', low_memory=False)
model_type = 'linearSVC' 
model_path = f'../models/{model_type}-validation'
output_path = f'../results/{model_type}-validation.csv'
metrics_file = f'../metrics/{model_type}-validation-metrics.csv'

In [33]:
# This is used to drop columns that only contain NaN values.
def drop_allnan(data):
    for col in data.columns:
        if data[col].isna().sum() == len(data):
            data = data.drop(columns=col)
    return data

# This encodes the IMPACT feature's column.
X_train = drop_allnan(X_train)
impact_vals = {'LOW': 0, 'MODIFIER': 1, 'MODERATE': 1.5, 'HIGH': 2}
encoded_impacts = [impact_vals[imp] for imp in X_train['IMPACT']]
X_train = X_train.drop(columns=['IMPACT'])
X_train['IMPACT'] = encoded_impacts

# Conditional imputation based on feature type.
numeric_features = X_train.select_dtypes(include=['number']).columns
categorical_features = X_train.select_dtypes(include=['object']).columns

# Pre-processor generation with multiple parameters that can be tweaked. I used the same values as LoGoFunc.
preprocessor = utils.generate_preprocessor(numeric_features, categorical_features, 40, 0,
                            1, 0, 2, 0, 0, 1,
                            prefix='light0', do_feature_subset=True, max_features=1)

# Encoding the labels.
y_train_enc = []
for lab in y_train['label']:
    if lab == 'GOF':
        y_train_enc.append(1)
    elif lab == 'LOF':
        y_train_enc.append(2)
    else:
        y_train_enc.append(0)
y_train = y_train_enc

X_train, y_train = utils.preprocess(preprocessor, X_train, y_train)

# This creates a directory for the preprocessor and trained models.
os.makedirs(model_path, exist_ok=True)
joblib.dump(preprocessor, f'{model_path}/preprocessor.joblib')

# This is where the train-val split happens in a 80-20 ratio.
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=41)

initial took 0.4806478023529053 to run.
(25546, 498)
(25546, 472)
removeba took 0.017195701599121094 to run.
variance_threshold took 0.0758810043334961 to run.
oversampling took 0.13650774955749512 to run.


### Training and Validation

In [34]:
# First, there is initialization of the LinearSVC classifier.
svm = LinearSVC(random_state=42)

# This is the search space of parameters that will be assessed.
def param_dist():
    params = {
        'C': uniform(0.1, 1000), 
        'class_weight': [None, 'balanced', {0: 1, 1: 2}, {0: 2, 1: 1}],  
        'max_iter': [1000, 2000, 5000, 10000], 
        'tol': loguniform(1e-6, 1e-2) 
    }
    
    # Parameter options vary based on whether 'dual' is True or False.
    if np.random.random() < 0.5:
        params['dual'] = [True]
        params['loss'] = ['hinge', 'squared_hinge']
    else:
        params['dual'] = [False]
        params['penalty'] = ['l1', 'l2']
        params['loss'] = ['squared_hinge']
    
    return params

# This is where RandomizedSearchCV is set to explore 10% of the available search space.
random_search = RandomizedSearchCV(estimator=svm, param_distributions=param_dist(), n_iter=64, cv=3, n_jobs=-1, verbose=2, random_state=42)

# This fits the model to the randomized search algorithm.
random_search.fit(X_train, y_train)

# This is populated with the top parameters as the search completes.
best_params = random_search.best_params_
print(f'Best parameters found: {best_params}')

# This saves the total number of parameter sets that will be explored to a variable.
n_params = len(random_search.cv_results_['params'])

# This is where the 27 best params are pooled.
top_n = min(27, n_params)
top_n_indices = np.argsort(random_search.cv_results_['mean_test_score'])[-top_n:]
top_n_params = [random_search.cv_results_['params'][i] for i in top_n_indices]

# This prints the best params at the end.
print("Top 27 parameter sets:")
for i, params in enumerate(top_n_params):
    print(f"Set {i+1}:")
    for key, value in params.items():
        print(f"  {key}: {value}")
    print()

# This spits out the params to the Models directory.
os.makedirs(model_path, exist_ok=True)
with open(f'{model_path}/top_27_params.json', 'w') as f:
    json.dump(top_n_params, f, indent=2)

# This handles the training of the model ensemble.
def train_model(params, X_train, y_train):
    base_model = LinearSVC(random_state=42, **params)
    model = CalibratedClassifierCV(base_model, cv=3, method='sigmoid')
    model.fit(X_train, y_train)
    return model

# This takes the 27 best params and trains the models.
models = Parallel(n_jobs=-1)(delayed(train_model)(params, X_train, y_train) for params in top_n_params)

# This saves the models along with params in the Models directory.
for i, (model, params) in enumerate(zip(models, top_n_params)):
    joblib.dump(model, f'{model_path}/svm_model_{i}.joblib')
    with open(f'{model_path}/svm_params_{i}.json', 'w') as f:
        json.dump(params, f, indent=2)

def predict_model(model, data):
    return model.predict_proba(data)

all_preds = Parallel(n_jobs=-1)(delayed(predict_model)(model, X_val) for model in models)
all_preds = np.array(all_preds)
avg_preds = np.mean(all_preds, axis=0)
final_predictions = np.argmax(avg_preds, axis=1)

# This performs final validation of the model.
val_accuracy = accuracy_score(y_val, final_predictions)
val_precision = precision_score(y_val, final_predictions, average='weighted')
val_recall = recall_score(y_val, final_predictions, average='weighted')
val_f1 = f1_score(y_val, final_predictions, average='weighted')
val_roc_auc = roc_auc_score(y_val, avg_preds, multi_class='ovo', average='weighted')
val_conf_matrix = confusion_matrix(y_val, final_predictions)

print(f'Validation Accuracy: {val_accuracy}')
print(f'Validation Precision: {val_precision}')
print(f'Validation Recall: {val_recall}')
print(f'Validation F1 Score: {val_f1}')
print(f'Validation ROC AUC Score: {val_roc_auc}')
print(f'Confusion Matrix:\n{val_conf_matrix}')

Fitting 3 folds for each of 64 candidates, totalling 192 fits


[CV] END C=866.2761457749352, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=0.00040137830238462545; total time= 1.8min
[CV] END C=866.2761457749352, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=0.00040137830238462545; total time= 1.9min
[CV] END C=866.2761457749352, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=0.00040137830238462545; total time= 2.2min
[CV] END C=181.92496720710062, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=0.00027964859516062485; total time= 2.0min
[CV] END C=181.92496720710062, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=0.00027964859516062485; total time= 2.5min
[CV] END C=181.92496720710062, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=0.00027964859516062485; total time= 2.7min




[CV] END C=7.1663052197174055, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=0.0002801635158716264; total time= 2.4min




[CV] END C=7.1663052197174055, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=0.0002801635158716264; total time= 2.4min




[CV] END C=7.1663052197174055, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=0.0002801635158716264; total time= 2.6min
[CV] END C=56.51157902710026, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=7.068974950624607e-06; total time=10.2min
[CV] END C=56.51157902710026, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=7.068974950624607e-06; total time=10.8min
[CV] END C=56.51157902710026, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=7.068974950624607e-06; total time=13.0min
[CV] END C=90.7064345328208, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=0.00011400863701127338; total time= 4.4min




[CV] END C=374.6401188473625, class_weight=None, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=0.0013145103232150123; total time=14.8min




[CV] END C=596.950157946487, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=1.7073967431528114e-06; total time=14.8min
[CV] END C=374.6401188473625, class_weight=None, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=0.0013145103232150123; total time=14.8min
[CV] END C=596.950157946487, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=1.7073967431528114e-06; total time=14.8min




[CV] END C=596.950157946487, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=1.7073967431528114e-06; total time=15.5min




[CV] END C=374.6401188473625, class_weight=None, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=0.0013145103232150123; total time=15.7min
[CV] END C=90.7064345328208, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=0.00011400863701127338; total time= 5.2min
[CV] END C=90.7064345328208, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=0.00011400863701127338; total time= 4.5min




[CV] END C=592.5145688620424, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=0.0005262961031076744; total time= 3.0min




[CV] END C=592.5145688620424, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=0.0005262961031076744; total time= 2.9min




[CV] END C=592.5145688620424, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=0.0005262961031076744; total time= 3.1min
[CV] END C=450.599251969543, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=0.007286653737491046; total time= 3.3min
[CV] END C=450.599251969543, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=0.007286653737491046; total time= 3.4min
[CV] END C=450.599251969543, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=0.007286653737491046; total time= 3.0min




[CV] END C=808.4973481164611, class_weight=None, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=8.386394780402564e-06; total time= 6.6min




[CV] END C=808.4973481164611, class_weight=None, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=8.386394780402564e-06; total time= 6.6min
[CV] END C=139.59386065204183, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=8.532678095658724e-06; total time=16.5min




[CV] END C=808.4973481164611, class_weight=None, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=8.386394780402564e-06; total time= 6.7min
[CV] END C=139.59386065204183, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=8.532678095658724e-06; total time=19.3min
[CV] END C=241.1254660260117, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=9.565499215943819e-05; total time= 8.2min
[CV] END C=241.1254660260117, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=9.565499215943819e-05; total time= 9.7min
[CV] END C=755.4614103176525, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=2000, penalty=l2, tol=0.00015375920235481777; total time= 5.1min
[CV] END C=241.1254660260117, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=9.565499215943819e-05; total time=10.6min
[CV] END C=755.4614103176525, class_w



[CV] END C=395.25023600181447, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=0.0048696409415209; total time= 3.4min
[CV] END C=34.4885211152184, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=5.357280069601829e-06; total time=12.4min




[CV] END C=395.25023600181447, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=0.0048696409415209; total time= 3.4min
[CV] END C=88.5925020519195, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=0.006993395623207717; total time= 2.9min




[CV] END C=395.25023600181447, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=0.0048696409415209; total time= 3.4min
[CV] END C=88.5925020519195, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=0.006993395623207717; total time= 3.2min
[CV] END C=88.5925020519195, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=0.006993395623207717; total time= 3.0min




[CV] END C=296.3735057040824, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=0.008862326508576254; total time= 1.5min




[CV] END C=296.3735057040824, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=0.008862326508576254; total time= 1.7min




[CV] END C=296.3735057040824, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=0.008862326508576254; total time= 1.6min




[CV] END C=844.6338486781514, class_weight=None, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=0.0020651425578959264; total time= 4.5min




[CV] END C=844.6338486781514, class_weight=None, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=0.0020651425578959264; total time= 4.5min




[CV] END C=844.6338486781514, class_weight=None, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=0.0020651425578959264; total time= 4.6min
[CV] END C=184.95445552552704, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l2, tol=6.295301484516133e-05; total time=13.2min
[CV] END C=184.95445552552704, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l2, tol=6.295301484516133e-05; total time=11.4min
[CV] END C=184.95445552552704, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l2, tol=6.295301484516133e-05; total time=12.2min




[CV] END C=74.14465173409036, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=0.004569184576834549; total time= 3.4min




[CV] END C=74.14465173409036, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=0.004569184576834549; total time= 3.5min




[CV] END C=74.14465173409036, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=0.004569184576834549; total time= 3.4min




[CV] END C=850.1385777897993, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=1.7956984225677624e-06; total time= 4.3min




[CV] END C=850.1385777897993, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=1.7956984225677624e-06; total time= 4.3min




[CV] END C=198.94240408880515, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=0.0012164139351417069; total time=10.9min




[CV] END C=198.94240408880515, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=0.0012164139351417069; total time=10.9min




[CV] END C=850.1385777897993, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=1.7956984225677624e-06; total time= 4.4min




[CV] END C=198.94240408880515, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=0.0012164139351417069; total time=11.2min




[CV] END C=311.0823217156622, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=0.00023184148549922914; total time= 2.1min




[CV] END C=311.0823217156622, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=0.00023184148549922914; total time= 2.1min




[CV] END C=311.0823217156622, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=0.00023184148549922914; total time= 2.1min




[CV] END C=274.8217929900642, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=3.0086868214458464e-06; total time= 2.1min




[CV] END C=274.8217929900642, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=3.0086868214458464e-06; total time= 2.1min




[CV] END C=274.8217929900642, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=3.0086868214458464e-06; total time= 2.1min




[CV] END C=256.16832276132396, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=5.1305517605898387e-05; total time= 2.0min
[CV] END C=256.16832276132396, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=5.1305517605898387e-05; total time= 2.0min




[CV] END C=256.16832276132396, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=5.1305517605898387e-05; total time= 2.1min
[CV] END C=356.8533266935893, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=1.2705645329288717e-05; total time=19.8min
[CV] END C=475.4702231821118, class_weight=None, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=0.004268407710065497; total time=  24.8s
[CV] END C=475.4702231821118, class_weight=None, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=0.004268407710065497; total time=  18.1s
[CV] END C=475.4702231821118, class_weight=None, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=0.004268407710065497; total time=  29.9s
[CV] END C=356.8533266935893, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=1.2705645329288717e-05; total time=23.3min
[CV] END C=25.51912674409519, class_weight=Non



[CV] END C=249.39222914887495, class_weight=None, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=6.490003020716533e-06; total time=11.9min




[CV] END C=249.39222914887495, class_weight=None, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=6.490003020716533e-06; total time=12.0min




[CV] END C=942.953570557981, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=4.414536876494481e-06; total time=11.6min




[CV] END C=929.7976523425731, class_weight=None, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=2.6422690597255402e-06; total time= 4.7min




[CV] END C=942.953570557981, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=4.414536876494481e-06; total time=11.6min
[CV] END C=456.63457048291025, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=0.0037173717478250516; total time=  22.3s




[CV] END C=249.39222914887495, class_weight=None, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=6.490003020716533e-06; total time=12.8min
[CV] END C=456.63457048291025, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=0.0037173717478250516; total time=  30.3s
[CV] END C=456.63457048291025, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=0.0037173717478250516; total time=  28.0s




[CV] END C=942.953570557981, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=4.414536876494481e-06; total time=12.0min




[CV] END C=929.7976523425731, class_weight=None, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=2.6422690597255402e-06; total time= 4.6min




[CV] END C=906.928441545754, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=5.110120656497168e-05; total time= 2.2min
[CV] END C=713.344787222995, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=8.78900434140548e-06; total time=24.8min




[CV] END C=929.7976523425731, class_weight=None, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=2.6422690597255402e-06; total time= 4.6min




[CV] END C=906.928441545754, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=5.110120656497168e-05; total time= 2.2min




[CV] END C=906.928441545754, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=5.110120656497168e-05; total time= 2.2min
[CV] END C=713.344787222995, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=8.78900434140548e-06; total time=26.9min
[CV] END C=713.344787222995, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=8.78900434140548e-06; total time=27.1min
[CV] END C=772.3447692966574, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=1.1384639705304647e-06; total time=41.4min




[CV] END C=534.189419375442, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=3.0162092627967784e-06; total time= 2.2min




[CV] END C=534.189419375442, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=3.0162092627967784e-06; total time= 2.2min




[CV] END C=534.189419375442, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=3.0162092627967784e-06; total time= 2.3min
[CV] END C=558.2020020173412, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=0.007711312991331112; total time=  16.9s
[CV] END C=558.2020020173412, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=0.007711312991331112; total time=  16.5s
[CV] END C=558.2020020173412, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=0.007711312991331112; total time=  14.8s
[CV] END C=962.5472949421113, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=0.0007064515131755324; total time= 1.2min
[CV] END C=962.5472949421113, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=0.0007064515131755324; total time= 1.5min
[CV] END C=772.3447692966574, class_weight=



[CV] END C=148.1869299533999, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=0.000274319913977967; total time= 2.1min
[CV] END C=539.4422419156507, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=2.661897646793291e-05; total time=14.0min




[CV] END C=148.1869299533999, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=0.000274319913977967; total time= 2.2min




[CV] END C=818.1147659224931, class_weight=None, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=4.556416465223591e-06; total time=12.0min




[CV] END C=818.1147659224931, class_weight=None, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=4.556416465223591e-06; total time=12.0min




[CV] END C=148.1869299533999, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=0.000274319913977967; total time= 2.2min
[CV] END C=539.4422419156507, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=2.661897646793291e-05; total time=16.5min




[CV] END C=818.1147659224931, class_weight=None, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=4.556416465223591e-06; total time=12.4min
[CV] END C=539.4422419156507, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=2.661897646793291e-05; total time=17.4min




[CV] END C=634.4513447013638, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=9.074256288983866e-05; total time= 4.7min




[CV] END C=634.4513447013638, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=9.074256288983866e-05; total time= 4.7min
[CV] END C=772.3447692966574, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=1.1384639705304647e-06; total time=56.6min




[CV] END C=634.4513447013638, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=9.074256288983866e-05; total time= 4.8min




[CV] END C=502.7790232288615, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=2.400407594352162e-05; total time=12.0min




[CV] END C=502.7790232288615, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=2.400407594352162e-05; total time=12.0min




[CV] END C=502.7790232288615, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=2.400407594352162e-05; total time=12.4min




[CV] END C=632.4058305935795, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=0.0018438580889418843; total time= 4.5min




[CV] END C=632.4058305935795, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=0.0018438580889418843; total time= 4.6min




[CV] END C=632.4058305935795, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=0.0018438580889418843; total time= 4.7min
[CV] END C=369.7544560614045, class_weight=None, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=2.9589152820953114e-05; total time=16.2min
[CV] END C=369.7544560614045, class_weight=None, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=2.9589152820953114e-05; total time=17.3min
[CV] END C=369.7544560614045, class_weight=None, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=2.9589152820953114e-05; total time=17.3min
[CV] END C=337.715171403628, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=7.49992427239817e-06; total time=33.1min
[CV] END C=337.715171403628, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=7.49992427239817e-06; total time=32.0min
[CV] END C=337.715171403628, class_weight={0: 1, 1: 2}, d



[CV] END C=798.4451249845512, class_weight=None, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=5.5728075000299646e-06; total time=24.6min




[CV] END C=798.4451249845512, class_weight=None, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=5.5728075000299646e-06; total time=24.4min




[CV] END C=798.4451249845512, class_weight=None, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=5.5728075000299646e-06; total time=26.2min
[CV] END C=985.7504541106007, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=2.105781497027901e-06; total time=41.8min




[CV] END C=924.7936182785628, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=8.207295819490822e-06; total time= 4.7min




[CV] END C=40.87514155476392, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=7.6026358684559036e-06; total time=23.3min
[CV] END C=985.7504541106007, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=2.105781497027901e-06; total time=43.6min




[CV] END C=924.7936182785628, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=8.207295819490822e-06; total time= 4.7min




[CV] END C=40.87514155476392, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=7.6026358684559036e-06; total time=23.8min




[CV] END C=40.87514155476392, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=7.6026358684559036e-06; total time=23.8min




[CV] END C=924.7936182785628, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=8.207295819490822e-06; total time= 4.9min
[CV] END C=175.0549270959362, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=0.0001314021022620739; total time= 4.8min
[CV] END C=241.9522909004517, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l2, tol=0.00017106998386865805; total time= 4.4min




[CV] END C=882.7363431893398, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=2.493666083446518e-05; total time= 2.2min
[CV] END C=175.0549270959362, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=0.0001314021022620739; total time= 5.5min




[CV] END C=882.7363431893398, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=2.493666083446518e-05; total time= 2.2min
[CV] END C=175.0549270959362, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=0.0001314021022620739; total time= 5.5min
[CV] END C=241.9522909004517, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l2, tol=0.00017106998386865805; total time= 4.1min
[CV] END C=241.9522909004517, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l2, tol=0.00017106998386865805; total time= 5.0min




[CV] END C=882.7363431893398, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l1, tol=2.493666083446518e-05; total time= 2.3min
[CV] END C=985.7504541106007, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=2.105781497027901e-06; total time=46.1min




[CV] END C=837.810105907328, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=2.843767489444368e-06; total time=24.0min




[CV] END C=898.6541885270792, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=3.084118569618283e-05; total time= 4.5min




[CV] END C=837.810105907328, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=2.843767489444368e-06; total time=23.8min




[CV] END C=898.6541885270792, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=3.084118569618283e-05; total time= 4.6min




[CV] END C=898.6541885270792, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=2000, penalty=l1, tol=3.084118569618283e-05; total time= 4.7min
[CV] END C=548.8337893665862, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=0.00950232435013485; total time=  14.9s
[CV] END C=548.8337893665862, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=0.00950232435013485; total time=  16.4s
[CV] END C=548.8337893665862, class_weight=balanced, dual=False, loss=squared_hinge, max_iter=5000, penalty=l2, tol=0.00950232435013485; total time=  16.3s




[CV] END C=837.810105907328, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=2.843767489444368e-06; total time=25.1min




[CV] END C=887.8700987609599, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=4.43113372285762e-06; total time=11.6min




[CV] END C=887.8700987609599, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=4.43113372285762e-06; total time=11.6min




[CV] END C=887.8700987609599, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=5000, penalty=l1, tol=4.43113372285762e-06; total time=11.8min
[CV] END C=726.0556788702394, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=4.149851045848951e-05; total time=20.0min
[CV] END C=726.0556788702394, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=4.149851045848951e-05; total time=21.2min
[CV] END C=176.02525267734538, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=2.0026211229345322e-05; total time=18.5min
[CV] END C=726.0556788702394, class_weight={0: 1, 1: 2}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l2, tol=4.149851045848951e-05; total time=25.8min
[CV] END C=176.02525267734538, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=2.0026211229345322e-05; total time=19.5min
[CV] END C=176.0252526



[CV] END C=746.5914051180241, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=0.0007651731056383424; total time=20.7min




[CV] END C=746.5914051180241, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=0.0007651731056383424; total time=21.9min




[CV] END C=746.5914051180241, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=0.0007651731056383424; total time=21.8min
[CV] END C=940.2334424577784, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=4.3977668944839625e-06; total time=32.0min
[CV] END C=940.2334424577784, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=4.3977668944839625e-06; total time=32.9min




[CV] END C=308.16079185238925, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=2.95708094149435e-05; total time=19.3min




[CV] END C=308.16079185238925, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=2.95708094149435e-05; total time=19.0min
[CV] END C=940.2334424577784, class_weight=None, dual=False, loss=squared_hinge, max_iter=1000, penalty=l2, tol=4.3977668944839625e-06; total time=36.6min




[CV] END C=308.16079185238925, class_weight={0: 2, 1: 1}, dual=False, loss=squared_hinge, max_iter=10000, penalty=l1, tol=2.95708094149435e-05; total time=17.6min
Best parameters found: {'C': 539.4422419156507, 'class_weight': 'balanced', 'dual': False, 'loss': 'squared_hinge', 'max_iter': 5000, 'penalty': 'l2', 'tol': 2.661897646793291e-05}
Top 27 parameter sets:
Set 1:
  C: 356.8533266935893
  class_weight: {0: 2, 1: 1}
  dual: False
  loss: squared_hinge
  max_iter: 10000
  penalty: l2
  tol: 1.2705645329288717e-05

Set 2:
  C: 772.3447692966574
  class_weight: {0: 2, 1: 1}
  dual: False
  loss: squared_hinge
  max_iter: 10000
  penalty: l2
  tol: 1.1384639705304647e-06

Set 3:
  C: 139.59386065204183
  class_weight: {0: 2, 1: 1}
  dual: False
  loss: squared_hinge
  max_iter: 10000
  penalty: l2
  tol: 8.532678095658724e-06

Set 4:
  C: 176.02525267734538
  class_weight: {0: 2, 1: 1}
  dual: False
  loss: squared_hinge
  max_iter: 1000
  penalty: l2
  tol: 2.0026211229345322e-05

S



Validation Accuracy: 0.8076870469156067
Validation Precision: 0.8083990923261748
Validation Recall: 0.8076870469156067
Validation F1 Score: 0.8075617999973789
Validation ROC AUC Score: 0.9322615567514327
Confusion Matrix:
[[2064  155  213]
 [ 107 2030  303]
 [ 204  424 1811]]


### Testing the model after validation

In [35]:
# Load both test and reload training data.
X_train = pd.read_csv('../data/X_train_500.csv', low_memory=False)
X_test = pd.read_csv('../data/X_test_500.csv', low_memory=False)

In [36]:
num_models = 27

# This is the soft-voting function used to aggregate the predictions.
def soft_vote(preds):
    summed_preds = [[np.sum(preds[:, j][:, i]) for i in range(3)] for j in range(len(preds[0]))]
    return [softmax(np.log(sp)) for sp in summed_preds]

# Again, this checks for columns that only contain NaN values.
def drop_allnan(data):
    for col in data.columns:
        if data[col].isna().sum() == len(data):
            data = data.drop(columns=col)
    return data

# Input the training data and do initial pre-processing.
X_train = drop_allnan(X_train)
columns = X_train.columns.tolist()

# Invoke the pre-processing function.
preprocessor = joblib.load('../models/linearSVC-validation/preprocessor.joblib')

# Iterate over the models in the ensemble.
models = []
num_models = 27  
for i in range(num_models):
    models.append(joblib.load(f'../models/linearSVC-validation/svm_model_{i}.joblib'))

# Feed the data into the pre-processor functions.
y_test = pd.read_csv('../data/y_test_id.csv', low_memory=False) 
impact_vals = {'LOW': 0, 'MODIFIER': 1, 'MODERATE': 1.5, 'HIGH': 2}
encoded_impacts = [impact_vals[imp] for imp in X_test['IMPACT']]
X_test = X_test.drop(columns=['IMPACT'])
X_test['IMPACT'] = encoded_impacts
X_test = X_test[columns]
ids = X_test['ID'].tolist()
X_test = X_test.drop(columns='ID')

# Make sure the data types are the same, because NumPy arrays are not tolerated in places where DataFrames are expected.
for col in X_test.columns:
    X_test[col] = X_test[col].astype(X_train[col].dtype)
X_test = utils.transform(X_test, preprocessor)

# Pool the predictions into a list.
all_preds = []
for i in range(num_models):
    preds = models[i].predict_proba(X_test) 
    all_preds.append(preds)

# Apply the soft-voting function.
y_pred_proba = soft_vote(np.array(all_preds))
y_pred = [np.argmax(p) for p in y_pred_proba]

# Map the labels to numbers.
label_mapping = {'Neutral': 0, 'GOF': 1, 'LOF': 2}
y_test_numeric = [label_mapping[label] for label in y_test['label']]

# Perform the evaluation using SciKit-learn's metrics.
accuracy = accuracy_score(y_test_numeric, y_pred)
precision = precision_score(y_test_numeric, y_pred, average='weighted')
recall = recall_score(y_test_numeric, y_pred, average='weighted')
f1 = f1_score(y_test_numeric, y_pred, average='weighted')
roc_auc = roc_auc_score(y_test_numeric, y_pred_proba, multi_class='ovo')
conf_matrix = confusion_matrix(y_test_numeric, y_pred)

# Print the recorded metrics.
print(f'Test Accuracy: {accuracy}')
print(f'Test Precision: {precision}')
print(f'Test Recall: {recall}')
print(f'Test F1 Score: {f1}')
print(f'Test ROC AUC Score: {roc_auc}')
print(f'Confusion Matrix:\n{conf_matrix}')

out = []
for i in range(len(y_pred)):
    out.append([ids[i], ['Neutral', 'GOF', 'LOF'][y_pred[i]], *y_pred_proba[i]])
out = pd.DataFrame(out, columns=['ID', 'prediction', 'LoGoFunc_Neutral', 'LoGoFunc_GOF', 'LoGoFunc_LOF'])
out.to_csv(output_path, index=None)

(2831, 472)
Test Accuracy: 0.772165312610385
Test Precision: 0.8603888492984171
Test Recall: 0.772165312610385
Test F1 Score: 0.8046903332226987
Test ROC AUC Score: 0.8802155566601174
Confusion Matrix:
[[1117  128   94]
 [  18  102   32]
 [  91  282  967]]




### Evalutation against y_test

In [37]:
# This is just to manually verify the test output.
predictions_file = output_path

# Load y_test.
y_test_path = '../data/y_test_id.csv'
y_test = pd.read_csv(y_test_path)
y_true = y_test['label']

# Encode labels to numbers.
label_mapping = {'Neutral': 0, 'GOF': 1, 'LOF': 2}
y_true_numeric = [label_mapping[label] for label in y_true]

# Load predictions.
predictions = pd.read_csv(predictions_file)

# Ensure DataFrames have same number of rows.
assert len(predictions) == len(y_test)
y_pred = predictions['prediction']
y_pred_numeric = [label_mapping[label] for label in y_pred]

# Perform the evaluation using Scikit-learn's metrics.
accuracy = accuracy_score(y_true_numeric, y_pred_numeric)
precision = precision_score(y_true_numeric, y_pred_numeric, average='weighted')
recall = recall_score(y_true_numeric, y_pred_numeric, average='weighted')
f1 = f1_score(y_true_numeric, y_pred_numeric, average='weighted')
conf_matrix = confusion_matrix(y_true_numeric, y_pred_numeric)

results = [{
    'accuracy': accuracy,
    'precision': precision,
    'recall': recall,
    'f1_score': f1,
    'confusion_matrix': conf_matrix.tolist()
}]

results_df = pd.DataFrame(results)
results_df.to_csv(metrics_file, index=False)
print(results_df)

   accuracy  precision    recall  f1_score  \
0  0.772165   0.860389  0.772165   0.80469   

                                   confusion_matrix  
0  [[1117, 128, 94], [18, 102, 32], [91, 282, 967]]  


### Compute Macro-REC

In [5]:
input_file = metrics_file
output_file = f'../metrics/{model_type}-evaluation.csv'

def macro(confusion_matrix):
    cm = np.array(confusion_matrix)
    recalls = np.diag(cm) / np.sum(cm, axis=1)
    return np.mean(recalls)

def micro(confusion_matrix):
    cm = np.array(confusion_matrix)
    true_positives = np.diag(cm)
    total_true_positives = np.sum(true_positives)
    total_actual_positives = np.sum(cm)
    return total_true_positives / total_actual_positives

with open(input_file, 'r') as csvfile:
    reader = csv.DictReader(csvfile)
    rows = list(reader)

for row in rows:
    confusion_matrix = ast.literal_eval(row['confusion_matrix'])
    macro = macro(confusion_matrix)
    micro_recall = micro(confusion_matrix)
    row['micro_recall'] = f'{micro_recall:.4f}'
    row['macro'] = f'{macro:.4f}'

    # Remove the original 'weighted' REC column.
    if 'recall' in row:
        del row['recall']

with open(output_file, 'w', newline='') as csvfile:
    fieldnames = ['accuracy', 'precision', 'f1_score', 'micro_recall', 'macro', 'confusion_matrix']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for row in rows:
        writer.writerow(row)

print(f"Saved to {output_file}")

Saved to ../metrics/linearSVC-evaluation.csv
