# Evaluate cancer subtype classification SVM model with main classification metrics 

In [1]:
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from random import shuffle
import seaborn as sns
from tqdm import tqdm

from pathway_forte.multiclass_prediction import *

In [1]:
# update paths 
CANCER_SUBTYPES = os.path.join(DATA,'tcga_datasets','brca','brca_subtypes_matrix.txt')
brca_subtypes_df = pd.read_csv(CANCER_SUBTYPES, sep='\t')

kegg_ssgsea_path = os.path.join(KEGG_SSGSEA, 'kegg_brca.tsv')
wikipathways_ssgsea_path = os.path.join(WIKIPATHWAYS_SSGSEA, 'wikipathways_brca.tsv')
reactome_ssgsea_path = os.path.join(REACTOME_SSGSEA, 'reactome_brca.tsv')
merge_ssgsea_path = os.path.join(MERGE_SSGSEA, 'merge_brca.tsv')

msigdb_kegg_ssgsea_path = os.path.join(MSIG_SSGSEA, 'kegg_msig_brca.tsv')
msigdb_reactome_ssgsea_path = os.path.join(MSIG_SSGSEA, 'reactome_msig_brca.tsv')

NameError: name 'os' is not defined

In [3]:
# Get sample IDs and corresponding cancer subtypes
patient_ids = get_sample_ids_with_cancer_subtypes(CANCER_SUBTYPES)

In [4]:
# Get ssGSEA scores dataFrame 
kegg_enrichment_score_df = stabilize_ssgsea_scores_df(kegg_ssgsea_path) 
wikipathways_enrichment_score_df = stabilize_ssgsea_scores_df(wikipathways_ssgsea_path) 
reactome_enrichment_score_df = stabilize_ssgsea_scores_df(reactome_ssgsea_path)
merge_enrichment_score_df = stabilize_ssgsea_scores_df(merge_ssgsea_path)

msig_kegg_enrichment_score_df = stabilize_ssgsea_scores_df(msigdb_kegg_ssgsea_path)
msig_reactome_enrichment_score_df = stabilize_ssgsea_scores_df(msigdb_reactome_ssgsea_path)

Match sample IDs in ssGSEA scores dataFrame with those in cancer subtype list such that only cancer patients with specified cancer subtypes are retained in the scores dataFrame. This filters out all control samples and any cancer cases with normal or NA type cancer subtype. A total of 1050 are retained from the complete set of 1215 samples. TCGA reports 5 cancer subtypes: normal, basal, Her2, LumA and LumB.

In [5]:
kegg_pathway_features = match_samples(kegg_enrichment_score_df,patient_ids)
reactome_pathway_features = match_samples(reactome_enrichment_score_df,patient_ids)
wikipathways_pathway_features = match_samples(wikipathways_enrichment_score_df,patient_ids)
merged_pathway_features = match_samples(merge_enrichment_score_df,patient_ids)

msig_kegg_pathway_features = match_samples(msig_kegg_enrichment_score_df,patient_ids)
msig_reactome_pathway_features = match_samples(msig_reactome_enrichment_score_df,patient_ids)

In [6]:
print('The number of samples by features/pathways for each resource are:')
print('KEGG: {}'.format(kegg_pathway_features.shape))
print('Reactome: {}'.format(reactome_pathway_features.shape))
print('WikiPathways: {}'.format(wikipathways_pathway_features.shape))
print('PathwayForte: {}'.format(merged_pathway_features.shape))

print('MSigDB KEGG: {}'.format(msig_kegg_pathway_features.shape))
print('MSigDB Reactome: {}'.format(msig_reactome_pathway_features.shape))

The number of samples by features/pathways for each resource are:
KEGG: (1050, 311)
Reactome: (1050, 1170)
WikiPathways: (1050, 362)
PathwayForte: (1050, 1726)
MSigDB KEGG: (1050, 177)
MSigDB Reactome: (1050, 523)


In [7]:
# Get arrays of class labels ordered the same way as features
kegg_class_labels = get_class_labels(kegg_pathway_features, brca_subtypes_df)
reactome_class_labels = get_class_labels(reactome_pathway_features, brca_subtypes_df)
wikipathways_class_labels = get_class_labels(wikipathways_pathway_features, brca_subtypes_df)
merged_class_labels = get_class_labels(merged_pathway_features, brca_subtypes_df)

msig_kegg_class_labels = get_class_labels(msig_kegg_pathway_features, brca_subtypes_df)
msig_reactome_class_labels = get_class_labels(msig_reactome_pathway_features, brca_subtypes_df)

In [8]:
# Get arrays of class labels ordered the same way as features
shuffle_kegg_class_labels = get_class_labels(kegg_pathway_features, brca_subtypes_df)
shuffle_reactome_class_labels = get_class_labels(reactome_pathway_features, brca_subtypes_df)
shuffle_wikipathways_class_labels = get_class_labels(wikipathways_pathway_features, brca_subtypes_df)
shuffle_merged_class_labels = get_class_labels(merged_pathway_features, brca_subtypes_df)

shuffle_msig_kegg_class_labels = get_class_labels(msig_kegg_pathway_features, brca_subtypes_df)
shuffle_msig_reactome_class_labels = get_class_labels(msig_reactome_pathway_features, brca_subtypes_df)

In [9]:
kegg_features_array = convert_df_to_features_array(kegg_pathway_features)
reactome_features_array = convert_df_to_features_array(reactome_pathway_features)
wikipathways_features_array = convert_df_to_features_array(wikipathways_pathway_features)
merged_features_array = convert_df_to_features_array(merged_pathway_features)

msig_kegg_features_array = convert_df_to_features_array(msig_kegg_pathway_features)
msig_reactome_features_array = convert_df_to_features_array(msig_reactome_pathway_features)

In [13]:
kegg_accuracies, kegg_f1_scores = train_multiclass_svm(
                        kegg_features_array, 
                        kegg_class_labels, 
                        inner_cv=10, 
                        outer_cv=10,
                        chain_pca=True, 
                        explained_variance=0.95    
)

1it [00:10, 10.66s/it]

For iteration 1:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'rbf'}
test accuracy is 0.8666666666666667
f1 score is 0.8630854640706856


              precision    recall  f1-score   support

     Class 0       0.84      0.93      0.88        55
     Class 1       0.79      0.65      0.71        23
     Class 2       1.00      0.71      0.83         7
     Class 3       1.00      1.00      1.00        20

   micro avg       0.87      0.87      0.87       105
   macro avg       0.91      0.82      0.86       105
weighted avg       0.87      0.87      0.86       105



2it [00:21, 10.50s/it]

For iteration 2:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.9238095238095239
f1 score is 0.9251575174848831


              precision    recall  f1-score   support

     Class 0       0.95      0.93      0.94        59
     Class 1       0.78      0.88      0.82        16
     Class 2       0.86      1.00      0.92         6
     Class 3       1.00      0.92      0.96        24

   micro avg       0.92      0.92      0.92       105
   macro avg       0.90      0.93      0.91       105
weighted avg       0.93      0.92      0.93       105



3it [00:31, 10.34s/it]

For iteration 3:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'rbf'}
test accuracy is 0.8857142857142857
f1 score is 0.8819634910544002


              precision    recall  f1-score   support

     Class 0       0.90      0.98      0.94        58
     Class 1       0.86      0.67      0.75        18
     Class 2       0.67      0.67      0.67         9
     Class 3       0.95      0.90      0.92        20

   micro avg       0.89      0.89      0.89       105
   macro avg       0.84      0.80      0.82       105
weighted avg       0.88      0.89      0.88       105



4it [00:41, 10.26s/it]

For iteration 4:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.9428571428571428
f1 score is 0.9428571428571428


              precision    recall  f1-score   support

     Class 0       0.96      0.96      0.96        54
     Class 1       0.88      0.83      0.86        18
     Class 2       0.82      0.90      0.86        10
     Class 3       1.00      1.00      1.00        23

   micro avg       0.94      0.94      0.94       105
   macro avg       0.92      0.92      0.92       105
weighted avg       0.94      0.94      0.94       105



5it [00:51, 10.31s/it]

For iteration 5:
best parameter is {'estimator__C': 100, 'estimator__kernel': 'rbf'}
test accuracy is 0.9047619047619048
f1 score is 0.902998866213152


              precision    recall  f1-score   support

     Class 0       0.92      0.95      0.94        63
     Class 1       0.85      0.65      0.73        17
     Class 2       0.62      0.83      0.71         6
     Class 3       1.00      1.00      1.00        19

   micro avg       0.90      0.90      0.90       105
   macro avg       0.85      0.86      0.85       105
weighted avg       0.91      0.90      0.90       105



6it [01:01, 10.24s/it]

For iteration 6:
best parameter is {'estimator__C': 100, 'estimator__kernel': 'rbf'}
test accuracy is 0.8285714285714286
f1 score is 0.8222532100413721


              precision    recall  f1-score   support

     Class 0       0.89      0.91      0.90        53
     Class 1       0.71      0.76      0.73        29
     Class 2       0.67      0.40      0.50        10
     Class 3       0.93      1.00      0.96        13

   micro avg       0.83      0.83      0.83       105
   macro avg       0.80      0.77      0.77       105
weighted avg       0.82      0.83      0.82       105



7it [01:11, 10.29s/it]

For iteration 7:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.8857142857142857
f1 score is 0.8845972073039743


              precision    recall  f1-score   support

     Class 0       0.93      0.93      0.93        58
     Class 1       0.75      0.75      0.75        20
     Class 2       0.78      0.70      0.74        10
     Class 3       0.94      1.00      0.97        17

   micro avg       0.89      0.89      0.89       105
   macro avg       0.85      0.85      0.85       105
weighted avg       0.88      0.89      0.88       105



8it [01:22, 10.27s/it]

For iteration 8:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'rbf'}
test accuracy is 0.8666666666666667
f1 score is 0.856034308370757


              precision    recall  f1-score   support

     Class 0       0.81      1.00      0.90        48
     Class 1       0.90      0.64      0.75        28
     Class 2       0.75      0.43      0.55         7
     Class 3       1.00      1.00      1.00        22

   micro avg       0.87      0.87      0.87       105
   macro avg       0.87      0.77      0.80       105
weighted avg       0.87      0.87      0.86       105



9it [01:32, 10.27s/it]

For iteration 9:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.8952380952380953
f1 score is 0.8924542337126367


              precision    recall  f1-score   support

     Class 0       0.92      0.95      0.94        61
     Class 1       0.77      0.62      0.69        16
     Class 2       0.75      0.82      0.78        11
     Class 3       1.00      1.00      1.00        17

   micro avg       0.90      0.90      0.90       105
   macro avg       0.86      0.85      0.85       105
weighted avg       0.89      0.90      0.89       105



10it [01:42, 10.24s/it]

For iteration 10:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'rbf'}
test accuracy is 0.8761904761904762
f1 score is 0.8751489322917892


              precision    recall  f1-score   support

     Class 0       0.90      0.91      0.91        58
     Class 1       0.75      0.68      0.71        22
     Class 2       0.75      1.00      0.86         6
     Class 3       1.00      0.95      0.97        19

   micro avg       0.88      0.88      0.88       105
   macro avg       0.85      0.89      0.86       105
weighted avg       0.88      0.88      0.88       105






In [14]:
reactome_accuracies, reactome_f1_scores = train_multiclass_svm(
                        reactome_features_array, 
                        reactome_class_labels,
                        inner_cv=10, 
                        outer_cv=10,
                        chain_pca=True, 
                        explained_variance=0.95    
)

1it [00:10, 10.55s/it]

For iteration 1:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.8761904761904762
f1 score is 0.8781855802296439


              precision    recall  f1-score   support

     Class 0       0.93      0.93      0.93        54
     Class 1       0.73      0.76      0.74        21
     Class 2       0.70      0.78      0.74         9
     Class 3       1.00      0.90      0.95        21

   micro avg       0.88      0.88      0.88       105
   macro avg       0.84      0.84      0.84       105
weighted avg       0.88      0.88      0.88       105



2it [00:20, 10.44s/it]

For iteration 2:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.8761904761904762
f1 score is 0.8772274867393767


              precision    recall  f1-score   support

     Class 0       0.89      0.89      0.89        53
     Class 1       0.71      0.74      0.72        23
     Class 2       1.00      0.89      0.94         9
     Class 3       1.00      1.00      1.00        20

   micro avg       0.88      0.88      0.88       105
   macro avg       0.90      0.88      0.89       105
weighted avg       0.88      0.88      0.88       105



3it [00:31, 10.38s/it]

For iteration 3:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.9238095238095239
f1 score is 0.9233446570117408


              precision    recall  f1-score   support

     Class 0       0.92      0.96      0.94        50
     Class 1       0.95      0.77      0.85        26
     Class 2       0.70      1.00      0.82         7
     Class 3       1.00      1.00      1.00        22

   micro avg       0.92      0.92      0.92       105
   macro avg       0.89      0.93      0.90       105
weighted avg       0.93      0.92      0.92       105



4it [00:42, 10.52s/it]

For iteration 4:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.9047619047619048
f1 score is 0.9052769452769452


              precision    recall  f1-score   support

     Class 0       0.94      0.92      0.93        53
     Class 1       0.76      0.89      0.82        18
     Class 2       0.90      0.75      0.82        12
     Class 3       0.95      0.95      0.95        22

   micro avg       0.90      0.90      0.90       105
   macro avg       0.89      0.88      0.88       105
weighted avg       0.91      0.90      0.91       105



5it [00:53, 10.61s/it]

For iteration 5:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.8952380952380953
f1 score is 0.8882313572655851


              precision    recall  f1-score   support

     Class 0       0.89      0.97      0.93        61
     Class 1       0.90      0.56      0.69        16
     Class 2       0.50      0.50      0.50         4
     Class 3       0.96      1.00      0.98        24

   micro avg       0.90      0.90      0.90       105
   macro avg       0.81      0.76      0.78       105
weighted avg       0.89      0.90      0.89       105



6it [01:03, 10.54s/it]

For iteration 6:
best parameter is {'estimator__C': 1, 'estimator__kernel': 'linear'}
test accuracy is 0.9142857142857143
f1 score is 0.9162144723510905


              precision    recall  f1-score   support

     Class 0       0.94      0.92      0.93        48
     Class 1       0.88      0.88      0.88        26
     Class 2       0.67      0.86      0.75         7
     Class 3       1.00      0.96      0.98        24

   micro avg       0.91      0.91      0.91       105
   macro avg       0.87      0.90      0.88       105
weighted avg       0.92      0.91      0.92       105



7it [01:13, 10.48s/it]

For iteration 7:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.9142857142857143
f1 score is 0.9142305037957212


              precision    recall  f1-score   support

     Class 0       0.93      0.91      0.92        58
     Class 1       0.83      0.83      0.83        24
     Class 2       0.88      1.00      0.93         7
     Class 3       1.00      1.00      1.00        16

   micro avg       0.91      0.91      0.91       105
   macro avg       0.91      0.94      0.92       105
weighted avg       0.91      0.91      0.91       105



8it [01:24, 10.50s/it]

For iteration 8:
best parameter is {'estimator__C': 100, 'estimator__kernel': 'linear'}
test accuracy is 0.9047619047619048
f1 score is 0.9078176725235549


              precision    recall  f1-score   support

     Class 0       0.97      0.92      0.94        61
     Class 1       0.67      0.80      0.73        15
     Class 2       0.80      0.80      0.80        10
     Class 3       1.00      1.00      1.00        19

   micro avg       0.90      0.90      0.90       105
   macro avg       0.86      0.88      0.87       105
weighted avg       0.91      0.90      0.91       105



9it [01:35, 10.59s/it]

For iteration 9:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.8952380952380953
f1 score is 0.8942680583683854


              precision    recall  f1-score   support

     Class 0       0.94      0.95      0.95        65
     Class 1       0.76      0.72      0.74        18
     Class 2       0.70      0.70      0.70        10
     Class 3       1.00      1.00      1.00        12

   micro avg       0.90      0.90      0.90       105
   macro avg       0.85      0.84      0.85       105
weighted avg       0.89      0.90      0.89       105



10it [01:46, 10.65s/it]

For iteration 10:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.9047619047619048
f1 score is 0.9028708000182488


              precision    recall  f1-score   support

     Class 0       0.91      0.95      0.93        64
     Class 1       0.78      0.70      0.74        20
     Class 2       1.00      0.86      0.92         7
     Class 3       1.00      1.00      1.00        14

   micro avg       0.90      0.90      0.90       105
   macro avg       0.92      0.88      0.90       105
weighted avg       0.90      0.90      0.90       105






In [15]:
wp_accuracies, wp_f1_scores = train_multiclass_svm(
                            wikipathways_features_array, 
                            wikipathways_class_labels,
                            inner_cv=10, 
                            outer_cv=10,
                            chain_pca=True, 
                            explained_variance=0.95    
)

1it [00:11, 11.99s/it]

For iteration 1:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'rbf'}
test accuracy is 0.9333333333333333
f1 score is 0.9295858694354935


              precision    recall  f1-score   support

     Class 0       0.95      0.98      0.96        56
     Class 1       0.82      0.90      0.86        20
     Class 2       1.00      0.56      0.71         9
     Class 3       1.00      1.00      1.00        20

   micro avg       0.93      0.93      0.93       105
   macro avg       0.94      0.86      0.88       105
weighted avg       0.94      0.93      0.93       105



2it [00:23, 11.78s/it]

For iteration 2:
best parameter is {'estimator__C': 100, 'estimator__kernel': 'rbf'}
test accuracy is 0.9142857142857143
f1 score is 0.9135207000223874


              precision    recall  f1-score   support

     Class 0       0.92      0.97      0.94        62
     Class 1       0.94      0.73      0.82        22
     Class 2       0.62      0.83      0.71         6
     Class 3       1.00      1.00      1.00        15

   micro avg       0.91      0.91      0.91       105
   macro avg       0.87      0.88      0.87       105
weighted avg       0.92      0.91      0.91       105



3it [00:34, 11.62s/it]

For iteration 3:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'rbf'}
test accuracy is 0.9047619047619048
f1 score is 0.9010407374368123


              precision    recall  f1-score   support

     Class 0       0.90      0.96      0.93        57
     Class 1       0.82      0.67      0.74        21
     Class 2       0.89      0.89      0.89         9
     Class 3       1.00      1.00      1.00        18

   micro avg       0.90      0.90      0.90       105
   macro avg       0.90      0.88      0.89       105
weighted avg       0.90      0.90      0.90       105



4it [00:46, 11.57s/it]

For iteration 4:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'rbf'}
test accuracy is 0.9142857142857143
f1 score is 0.9117153957831926


              precision    recall  f1-score   support

     Class 0       0.89      0.98      0.93        56
     Class 1       1.00      0.68      0.81        22
     Class 2       0.67      1.00      0.80         4
     Class 3       1.00      0.96      0.98        23

   micro avg       0.91      0.91      0.91       105
   macro avg       0.89      0.91      0.88       105
weighted avg       0.93      0.91      0.91       105



5it [00:57, 11.49s/it]

For iteration 5:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'rbf'}
test accuracy is 0.8761904761904762
f1 score is 0.8764256056715221


              precision    recall  f1-score   support

     Class 0       0.98      0.93      0.95        56
     Class 1       0.56      0.88      0.68        16
     Class 2       0.83      0.42      0.56        12
     Class 3       1.00      1.00      1.00        21

   micro avg       0.88      0.88      0.88       105
   macro avg       0.84      0.81      0.80       105
weighted avg       0.90      0.88      0.88       105



6it [01:08, 11.41s/it]

For iteration 6:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'rbf'}
test accuracy is 0.9047619047619048
f1 score is 0.9047619047619048


              precision    recall  f1-score   support

     Class 0       0.92      0.92      0.92        50
     Class 1       0.83      0.83      0.83        23
     Class 2       0.93      0.93      0.93        14
     Class 3       0.94      0.94      0.94        18

   micro avg       0.90      0.90      0.90       105
   macro avg       0.90      0.90      0.90       105
weighted avg       0.90      0.90      0.90       105



7it [01:19, 11.39s/it]

For iteration 7:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.9142857142857143
f1 score is 0.9149088938562623


              precision    recall  f1-score   support

     Class 0       0.95      0.95      0.95        55
     Class 1       0.82      0.82      0.82        22
     Class 2       0.80      0.89      0.84         9
     Class 3       1.00      0.95      0.97        19

   micro avg       0.91      0.91      0.91       105
   macro avg       0.89      0.90      0.89       105
weighted avg       0.92      0.91      0.91       105



8it [01:31, 11.39s/it]

For iteration 8:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'rbf'}
test accuracy is 0.9333333333333333
f1 score is 0.9294886233910624


              precision    recall  f1-score   support

     Class 0       0.92      1.00      0.96        56
     Class 1       0.95      0.79      0.86        24
     Class 2       1.00      0.60      0.75         5
     Class 3       0.95      1.00      0.98        20

   micro avg       0.93      0.93      0.93       105
   macro avg       0.96      0.85      0.89       105
weighted avg       0.94      0.93      0.93       105



9it [01:42, 11.39s/it]

For iteration 9:
best parameter is {'estimator__C': 1, 'estimator__kernel': 'linear'}
test accuracy is 0.9047619047619048
f1 score is 0.9060015741833923


              precision    recall  f1-score   support

     Class 0       0.95      0.93      0.94        61
     Class 1       0.67      0.80      0.73        15
     Class 2       0.86      0.67      0.75         9
     Class 3       1.00      1.00      1.00        20

   micro avg       0.90      0.90      0.90       105
   macro avg       0.87      0.85      0.85       105
weighted avg       0.91      0.90      0.91       105



10it [01:53, 11.38s/it]

For iteration 10:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.8666666666666667
f1 score is 0.8620712620712621


              precision    recall  f1-score   support

     Class 0       0.87      0.95      0.91        58
     Class 1       0.76      0.59      0.67        22
     Class 2       0.67      0.80      0.73         5
     Class 3       1.00      0.95      0.97        20

   micro avg       0.87      0.87      0.87       105
   macro avg       0.83      0.82      0.82       105
weighted avg       0.86      0.87      0.86       105






In [16]:
merge_accuracies, merge_f1_scores = train_multiclass_svm(
                        merged_features_array, 
                        merged_class_labels,
                        inner_cv=10, 
                        outer_cv=10,
                        chain_pca=True, 
                        explained_variance=0.95    
)

1it [00:13, 13.20s/it]

For iteration 1:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.9047619047619048
f1 score is 0.9041377454936776


              precision    recall  f1-score   support

     Class 0       0.92      0.95      0.93        58
     Class 1       0.90      0.84      0.87        31
     Class 2       0.80      0.80      0.80         5
     Class 3       0.91      0.91      0.91        11

   micro avg       0.90      0.90      0.90       105
   macro avg       0.88      0.87      0.88       105
weighted avg       0.90      0.90      0.90       105



2it [00:26, 13.00s/it]

For iteration 2:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.9047619047619048
f1 score is 0.9037851037851038


              precision    recall  f1-score   support

     Class 0       0.93      0.93      0.93        56
     Class 1       0.83      0.83      0.83        23
     Class 2       0.83      0.71      0.77         7
     Class 3       0.95      1.00      0.97        19

   micro avg       0.90      0.90      0.90       105
   macro avg       0.88      0.87      0.87       105
weighted avg       0.90      0.90      0.90       105



3it [00:39, 13.02s/it]

For iteration 3:
best parameter is {'estimator__C': 1, 'estimator__kernel': 'linear'}
test accuracy is 0.9142857142857143
f1 score is 0.9119179308086871


              precision    recall  f1-score   support

     Class 0       0.91      0.98      0.94        60
     Class 1       0.93      0.78      0.85        18
     Class 2       0.75      0.67      0.71         9
     Class 3       1.00      0.94      0.97        18

   micro avg       0.91      0.91      0.91       105
   macro avg       0.90      0.84      0.87       105
weighted avg       0.91      0.91      0.91       105



4it [00:52, 13.01s/it]

For iteration 4:
best parameter is {'estimator__C': 1, 'estimator__kernel': 'linear'}
test accuracy is 0.8761904761904762
f1 score is 0.8801949317738792


              precision    recall  f1-score   support

     Class 0       0.96      0.86      0.91        57
     Class 1       0.69      0.88      0.77        25
     Class 2       0.86      0.75      0.80         8
     Class 3       1.00      1.00      1.00        15

   micro avg       0.88      0.88      0.88       105
   macro avg       0.88      0.87      0.87       105
weighted avg       0.89      0.88      0.88       105



5it [01:04, 12.94s/it]

For iteration 5:
best parameter is {'estimator__C': 1, 'estimator__kernel': 'linear'}
test accuracy is 0.9047619047619048
f1 score is 0.9031422092646582


              precision    recall  f1-score   support

     Class 0       0.88      0.96      0.92        47
     Class 1       0.83      0.76      0.79        25
     Class 2       1.00      0.78      0.88         9
     Class 3       1.00      1.00      1.00        24

   micro avg       0.90      0.90      0.90       105
   macro avg       0.93      0.87      0.90       105
weighted avg       0.91      0.90      0.90       105



6it [01:17, 12.95s/it]

For iteration 6:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.9428571428571428
f1 score is 0.942857142857143


              precision    recall  f1-score   support

     Class 0       0.95      0.95      0.95        60
     Class 1       0.80      0.80      0.80        15
     Class 2       1.00      1.00      1.00         5
     Class 3       1.00      1.00      1.00        25

   micro avg       0.94      0.94      0.94       105
   macro avg       0.94      0.94      0.94       105
weighted avg       0.94      0.94      0.94       105



7it [01:30, 12.93s/it]

For iteration 7:
best parameter is {'estimator__C': 100, 'estimator__kernel': 'rbf'}
test accuracy is 0.9142857142857143
f1 score is 0.9176459270576918


              precision    recall  f1-score   support

     Class 0       1.00      0.89      0.94        54
     Class 1       0.73      1.00      0.84        19
     Class 2       0.87      0.87      0.87        15
     Class 3       1.00      0.94      0.97        17

   micro avg       0.91      0.91      0.91       105
   macro avg       0.90      0.92      0.91       105
weighted avg       0.93      0.91      0.92       105



8it [01:43, 12.93s/it]

For iteration 8:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.9238095238095239
f1 score is 0.9231627610393751


              precision    recall  f1-score   support

     Class 0       0.95      0.97      0.96        61
     Class 1       0.71      0.71      0.71        14
     Class 2       0.88      0.78      0.82         9
     Class 3       1.00      1.00      1.00        21

   micro avg       0.92      0.92      0.92       105
   macro avg       0.89      0.86      0.87       105
weighted avg       0.92      0.92      0.92       105



9it [01:56, 12.92s/it]

For iteration 9:
best parameter is {'estimator__C': 100, 'estimator__kernel': 'rbf'}
test accuracy is 0.8952380952380953
f1 score is 0.8963399743887548


              precision    recall  f1-score   support

     Class 0       0.92      0.93      0.92        58
     Class 1       0.89      0.77      0.83        22
     Class 2       0.57      0.80      0.67         5
     Class 3       0.95      0.95      0.95        20

   micro avg       0.90      0.90      0.90       105
   macro avg       0.83      0.86      0.84       105
weighted avg       0.90      0.90      0.90       105



10it [02:09, 12.92s/it]

For iteration 10:
best parameter is {'estimator__C': 100, 'estimator__kernel': 'rbf'}
test accuracy is 0.9142857142857143
f1 score is 0.9142700758464305


              precision    recall  f1-score   support

     Class 0       0.95      0.95      0.95        56
     Class 1       0.79      0.73      0.76        15
     Class 2       0.73      0.80      0.76        10
     Class 3       1.00      1.00      1.00        24

   micro avg       0.91      0.91      0.91       105
   macro avg       0.86      0.87      0.87       105
weighted avg       0.91      0.91      0.91       105






In [17]:
msig_kegg_accuracies, msig_kegg_f1_scores = train_multiclass_svm(
                        msig_kegg_features_array, 
                        msig_kegg_class_labels, 
                        inner_cv=10, 
                        outer_cv=10,
                        chain_pca=True, 
                        explained_variance=0.95    
)

1it [00:08,  8.59s/it]

For iteration 1:
best parameter is {'estimator__C': 100, 'estimator__kernel': 'linear'}
test accuracy is 0.8476190476190476
f1 score is 0.8492754834739568


              precision    recall  f1-score   support

     Class 0       0.91      0.92      0.92        65
     Class 1       0.42      0.50      0.45        10
     Class 2       0.67      0.55      0.60        11
     Class 3       1.00      0.95      0.97        19

   micro avg       0.85      0.85      0.85       105
   macro avg       0.75      0.73      0.74       105
weighted avg       0.85      0.85      0.85       105



2it [00:17,  8.59s/it]

For iteration 2:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.8952380952380953
f1 score is 0.8923811065627482


              precision    recall  f1-score   support

     Class 0       0.93      0.96      0.94        70
     Class 1       0.64      0.60      0.62        15
     Class 2       0.75      0.60      0.67         5
     Class 3       1.00      1.00      1.00        15

   micro avg       0.90      0.90      0.90       105
   macro avg       0.83      0.79      0.81       105
weighted avg       0.89      0.90      0.89       105



3it [00:25,  8.61s/it]

For iteration 3:
best parameter is {'estimator__C': 100, 'estimator__kernel': 'linear'}
test accuracy is 0.9333333333333333
f1 score is 0.9288186998937507


              precision    recall  f1-score   support

     Class 0       0.92      0.98      0.95        58
     Class 1       0.92      0.65      0.76        17
     Class 2       0.89      1.00      0.94         8
     Class 3       1.00      1.00      1.00        22

   micro avg       0.93      0.93      0.93       105
   macro avg       0.93      0.91      0.91       105
weighted avg       0.93      0.93      0.93       105



4it [00:34,  8.58s/it]

For iteration 4:
best parameter is {'estimator__C': 100, 'estimator__kernel': 'linear'}
test accuracy is 0.8571428571428571
f1 score is 0.8600834852772836


              precision    recall  f1-score   support

     Class 0       0.89      0.92      0.91        53
     Class 1       0.86      0.67      0.75        27
     Class 2       0.29      0.50      0.36         4
     Class 3       0.95      1.00      0.98        21

   micro avg       0.86      0.86      0.86       105
   macro avg       0.75      0.77      0.75       105
weighted avg       0.87      0.86      0.86       105



5it [00:42,  8.55s/it]

For iteration 5:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.8571428571428571
f1 score is 0.8515567765567765


              precision    recall  f1-score   support

     Class 0       0.80      0.94      0.87        48
     Class 1       0.81      0.63      0.71        27
     Class 2       1.00      0.60      0.75         5
     Class 3       1.00      1.00      1.00        25

   micro avg       0.86      0.86      0.86       105
   macro avg       0.90      0.79      0.83       105
weighted avg       0.86      0.86      0.85       105



6it [00:51,  8.52s/it]

For iteration 6:
best parameter is {'estimator__C': 100, 'estimator__kernel': 'linear'}
test accuracy is 0.8571428571428571
f1 score is 0.8523875523875523


              precision    recall  f1-score   support

     Class 0       0.85      0.96      0.90        52
     Class 1       0.90      0.61      0.73        31
     Class 2       0.56      0.83      0.67         6
     Class 3       1.00      1.00      1.00        16

   micro avg       0.86      0.86      0.86       105
   macro avg       0.83      0.85      0.82       105
weighted avg       0.87      0.86      0.85       105



7it [00:59,  8.50s/it]

For iteration 7:
best parameter is {'estimator__C': 100, 'estimator__kernel': 'linear'}
test accuracy is 0.8380952380952381
f1 score is 0.8446584209441352


              precision    recall  f1-score   support

     Class 0       0.89      0.83      0.86        58
     Class 1       0.57      0.76      0.65        17
     Class 2       0.92      0.85      0.88        13
     Class 3       1.00      0.94      0.97        17

   micro avg       0.84      0.84      0.84       105
   macro avg       0.84      0.84      0.84       105
weighted avg       0.86      0.84      0.84       105



8it [01:07,  8.48s/it]

For iteration 8:
best parameter is {'estimator__C': 100, 'estimator__kernel': 'linear'}
test accuracy is 0.8857142857142857
f1 score is 0.8886475674280551


              precision    recall  f1-score   support

     Class 0       0.94      0.90      0.92        49
     Class 1       0.68      0.79      0.73        19
     Class 2       0.86      0.86      0.86        14
     Class 3       1.00      0.96      0.98        23

   micro avg       0.89      0.89      0.89       105
   macro avg       0.87      0.88      0.87       105
weighted avg       0.89      0.89      0.89       105



9it [01:16,  8.49s/it]

For iteration 9:
best parameter is {'estimator__C': 100, 'estimator__kernel': 'rbf'}
test accuracy is 0.8285714285714286
f1 score is 0.8316893424036281


              precision    recall  f1-score   support

     Class 0       0.87      0.87      0.87        55
     Class 1       0.67      0.67      0.67        21
     Class 2       0.56      0.71      0.63         7
     Class 3       1.00      0.91      0.95        22

   micro avg       0.83      0.83      0.83       105
   macro avg       0.77      0.79      0.78       105
weighted avg       0.84      0.83      0.83       105



10it [01:24,  8.49s/it]

For iteration 10:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.8761904761904762
f1 score is 0.8678022988505747


              precision    recall  f1-score   support

     Class 0       0.88      0.98      0.93        59
     Class 1       0.82      0.61      0.70        23
     Class 2       0.86      0.67      0.75         9
     Class 3       0.93      1.00      0.97        14

   micro avg       0.88      0.88      0.88       105
   macro avg       0.87      0.81      0.84       105
weighted avg       0.87      0.88      0.87       105






In [18]:
msig_reactome_accuracies, msig_reactome_f1_scores = train_multiclass_svm(
                        msig_reactome_features_array, 
                        msig_reactome_class_labels, 
                        inner_cv=10, 
                        outer_cv=10,
                        chain_pca=True, 
                        explained_variance=0.95    
)

1it [00:09,  9.49s/it]

For iteration 1:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'rbf'}
test accuracy is 0.8476190476190476
f1 score is 0.8442228879319312


              precision    recall  f1-score   support

     Class 0       0.84      0.92      0.88        52
     Class 1       0.77      0.65      0.71        26
     Class 2       0.90      0.82      0.86        11
     Class 3       0.94      0.94      0.94        16

   micro avg       0.85      0.85      0.85       105
   macro avg       0.86      0.83      0.85       105
weighted avg       0.85      0.85      0.84       105



2it [00:18,  9.48s/it]

For iteration 2:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.9047619047619048
f1 score is 0.9047276710828113


              precision    recall  f1-score   support

     Class 0       0.92      0.91      0.92        54
     Class 1       0.76      0.76      0.76        21
     Class 2       0.86      1.00      0.92         6
     Class 3       1.00      1.00      1.00        24

   micro avg       0.90      0.90      0.90       105
   macro avg       0.89      0.92      0.90       105
weighted avg       0.91      0.90      0.90       105



3it [00:28,  9.47s/it]

For iteration 3:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.8666666666666667
f1 score is 0.8619395801331287


              precision    recall  f1-score   support

     Class 0       0.86      0.95      0.90        59
     Class 1       0.86      0.62      0.72        29
     Class 2       0.50      1.00      0.67         2
     Class 3       1.00      1.00      1.00        15

   micro avg       0.87      0.87      0.87       105
   macro avg       0.80      0.89      0.82       105
weighted avg       0.87      0.87      0.86       105



4it [00:37,  9.47s/it]

For iteration 4:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.8476190476190476
f1 score is 0.8444288316419464


              precision    recall  f1-score   support

     Class 0       0.89      0.92      0.90        60
     Class 1       0.67      0.67      0.67        21
     Class 2       1.00      0.60      0.75        10
     Class 3       0.88      1.00      0.93        14

   micro avg       0.85      0.85      0.85       105
   macro avg       0.86      0.80      0.81       105
weighted avg       0.85      0.85      0.84       105



5it [00:47,  9.46s/it]

For iteration 5:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.8952380952380953
f1 score is 0.8887725779967159


              precision    recall  f1-score   support

     Class 0       0.87      0.96      0.91        55
     Class 1       0.85      0.58      0.69        19
     Class 2       0.86      0.86      0.86         7
     Class 3       1.00      1.00      1.00        24

   micro avg       0.90      0.90      0.90       105
   macro avg       0.89      0.85      0.86       105
weighted avg       0.89      0.90      0.89       105



6it [00:56,  9.46s/it]

For iteration 6:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.9333333333333333
f1 score is 0.9328024889000498


              precision    recall  f1-score   support

     Class 0       0.93      0.98      0.95        54
     Class 1       0.85      0.85      0.85        20
     Class 2       1.00      0.80      0.89        10
     Class 3       1.00      0.95      0.98        21

   micro avg       0.93      0.93      0.93       105
   macro avg       0.94      0.90      0.92       105
weighted avg       0.94      0.93      0.93       105



7it [01:06,  9.48s/it]

For iteration 7:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'rbf'}
test accuracy is 0.8761904761904762
f1 score is 0.8779009463501443


              precision    recall  f1-score   support

     Class 0       0.95      0.90      0.93        62
     Class 1       0.67      0.78      0.72        18
     Class 2       0.75      0.67      0.71         9
     Class 3       0.94      1.00      0.97        16

   micro avg       0.88      0.88      0.88       105
   macro avg       0.83      0.84      0.83       105
weighted avg       0.88      0.88      0.88       105



8it [01:15,  9.48s/it]

For iteration 8:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'rbf'}
test accuracy is 0.9142857142857143
f1 score is 0.9212566550037412


              precision    recall  f1-score   support

     Class 0       0.98      0.93      0.96        69
     Class 1       0.56      0.91      0.69        11
     Class 2       1.00      0.70      0.82        10
     Class 3       1.00      1.00      1.00        15

   micro avg       0.91      0.91      0.91       105
   macro avg       0.89      0.88      0.87       105
weighted avg       0.94      0.91      0.92       105



9it [01:25,  9.49s/it]

For iteration 9:
best parameter is {'estimator__C': 10, 'estimator__kernel': 'linear'}
test accuracy is 0.8857142857142857
f1 score is 0.8863069459696432


              precision    recall  f1-score   support

     Class 0       0.90      0.92      0.91        50
     Class 1       0.75      0.78      0.77        23
     Class 2       0.88      0.78      0.82         9
     Class 3       1.00      0.96      0.98        23

   micro avg       0.89      0.89      0.89       105
   macro avg       0.88      0.86      0.87       105
weighted avg       0.89      0.89      0.89       105



10it [01:34,  9.49s/it]

For iteration 10:
best parameter is {'estimator__C': 100, 'estimator__kernel': 'rbf'}
test accuracy is 0.8952380952380953
f1 score is 0.8962735042735043


              precision    recall  f1-score   support

     Class 0       0.92      0.94      0.93        52
     Class 1       0.75      0.79      0.77        19
     Class 2       0.75      0.75      0.75         8
     Class 3       1.00      0.92      0.96        26

   micro avg       0.90      0.90      0.90       105
   macro avg       0.86      0.85      0.85       105
weighted avg       0.90      0.90      0.90       105






In [19]:
kegg_f1_scores

{1: 0.8630854640706856,
 2: 0.9251575174848831,
 3: 0.8819634910544002,
 4: 0.9428571428571428,
 5: 0.902998866213152,
 6: 0.8222532100413721,
 7: 0.8845972073039743,
 8: 0.856034308370757,
 9: 0.8924542337126367,
 10: 0.8751489322917892}

In [20]:
reactome_f1_scores

{1: 0.8781855802296439,
 2: 0.8772274867393767,
 3: 0.9233446570117408,
 4: 0.9052769452769452,
 5: 0.8882313572655851,
 6: 0.9162144723510905,
 7: 0.9142305037957212,
 8: 0.9078176725235549,
 9: 0.8942680583683854,
 10: 0.9028708000182488}

In [21]:
wp_f1_scores

{1: 0.9295858694354935,
 2: 0.9135207000223874,
 3: 0.9010407374368123,
 4: 0.9117153957831926,
 5: 0.8764256056715221,
 6: 0.9047619047619048,
 7: 0.9149088938562623,
 8: 0.9294886233910624,
 9: 0.9060015741833923,
 10: 0.8620712620712621}

In [22]:
merge_f1_scores

{1: 0.9041377454936776,
 2: 0.9037851037851038,
 3: 0.9119179308086871,
 4: 0.8801949317738792,
 5: 0.9031422092646582,
 6: 0.942857142857143,
 7: 0.9176459270576918,
 8: 0.9231627610393751,
 9: 0.8963399743887548,
 10: 0.9142700758464305}

In [23]:
msig_kegg_f1_scores

{1: 0.8492754834739568,
 2: 0.8923811065627482,
 3: 0.9288186998937507,
 4: 0.8600834852772836,
 5: 0.8515567765567765,
 6: 0.8523875523875523,
 7: 0.8446584209441352,
 8: 0.8886475674280551,
 9: 0.8316893424036281,
 10: 0.8678022988505747}

In [24]:
msig_reactome_f1_scores

{1: 0.8442228879319312,
 2: 0.9047276710828113,
 3: 0.8619395801331287,
 4: 0.8444288316419464,
 5: 0.8887725779967159,
 6: 0.9328024889000498,
 7: 0.8779009463501443,
 8: 0.9212566550037412,
 9: 0.8863069459696432,
 10: 0.8962735042735043}

In [None]:
sum = 0 
for acc in reactome_all_metrics.values():
    sum += acc

In [None]:
sum/10

In [None]:
resource_accuracies = [
    kegg_all_metrics,
    reactome_all_metrics,
    wikipathways_all_metrics,
    merged_all_metrics,
    msig_kegg_all_metrics,
    msig_reactome_all_metrics
]

In [None]:
kegg_df = pd.DataFrame()
kegg_df['Accuracy'] = kegg_all_metrics.values()
kegg_df['Resource'] = 'KEGG'

reactome_df = pd.DataFrame()
reactome_df['Accuracy'] = reactome_all_metrics.values()
reactome_df['Resource'] = 'Reactome'

wp_df = pd.DataFrame()
wp_df['Accuracy'] = wikipathways_all_metrics.values()
wp_df['Resource'] = 'WikiPathways'

merged_df = pd.DataFrame()
merged_df['Accuracy'] = merged_all_metrics.values()
merged_df['Resource'] = 'PathwayForte'

msigk_df = pd.DataFrame()
msigk_df['Accuracy'] = msig_kegg_all_metrics.values()
msigk_df['Resource'] = 'MSigDB-KEGG'

msigr_df = pd.DataFrame()
msigr_df['Accuracy'] = msig_reactome_all_metrics.values()
msigr_df['Resource'] = 'MSigDB-Reactome'

In [None]:
all_resources_df = pd.concat([kegg_df, reactome_df, wp_df, merged_df, msigk_df, msigr_df])

In [None]:
ax = sns.boxplot(x="Resource", y="Accuracy", 
                 data=all_resources_df, showfliers=False,
                 palette="Set2").set_title('Distribution of accuracies over 10-fold CV of shuffled breast cancer dataset', fontsize=17)

plt.rcParams["axes.labelsize"] = 14

fig = plt.gcf()
fig.set_size_inches(12, 8)
plt.savefig('kegg_boxplot.png', dpi=500)

In [None]:
# shuffle labels
np.random.shuffle(shuffle_kegg_class_labels)
np.random.shuffle(shuffle_reactome_class_labels)
np.random.shuffle(shuffle_wikipathways_class_labels)
np.random.shuffle(shuffle_merged_class_labels)

np.random.shuffle(shuffle_msig_kegg_class_labels)
np.random.shuffle(shuffle_msig_reactome_class_labels)

In [None]:
shuffle_kegg_all_metrics = train_multiclass_log_reg(
                        kegg_features_array, 
                        shuffle_kegg_class_labels, 
                        inner_cv=10, 
                        outer_cv=10,
                        chain_pca=True, 
                        explained_variance=0.95    
)

In [None]:
shuffle_reactome_all_metrics = train_multiclass_log_reg(
                        reactome_features_array, 
                        shuffle_reactome_class_labels,
                        inner_cv=10, 
                        outer_cv=10,
                        chain_pca=True, 
                        explained_variance=0.95    
)

In [None]:
shuffle_wikipathways_all_metrics = train_multiclass_log_reg(
                            wikipathways_features_array, 
                            shuffle_wikipathways_class_labels,
                            inner_cv=10, 
                            outer_cv=10,
                            chain_pca=True, 
                            explained_variance=0.95    
)

In [None]:
shuffle_merged_all_metrics = train_multiclass_log_reg(
                        merged_features_array, 
                        merged_class_labels,
                        inner_cv=10, 
                        outer_cv=10,
                        chain_pca=True, 
                        explained_variance=0.95    
)

In [None]:
shuffle_msig_kegg_all_metrics = train_multiclass_log_reg(
                        msig_kegg_features_array, 
                        msig_kegg_class_labels, 
                        inner_cv=10, 
                        outer_cv=10,
                        chain_pca=True, 
                        explained_variance=0.95    
)

In [None]:
shuffle_msig_reactome_all_metrics = train_multiclass_log_reg(
                        msig_reactome_features_array, 
                        msig_reactome_class_labels, 
                        inner_cv=10, 
                        outer_cv=10,
                        chain_pca=True, 
                        explained_variance=0.95    
)

In [None]:
brca_cindex = [
    0.5277884986491702,0.584945412756177, 0.4972871842843779,0.49943778110944526,
    0.5286965850169855,0.5287651154268963, 0.5248087860364777,0.595642540620384,
    0.5391722099039172, 0.532621359223301
    ]

In [None]:
lihc_cindex = [
    0.5671296296296297,0.5532994923857868, 0.4863157894736842,
    0.5325,0.49743589743589745,0.6302816901408451, 0.4436781609195402,
    0.5081206496519721,0.6033254156769596,0.4553846153846154
    ]

In [None]:
brcac_df = pd.DataFrame()
brcac_df['C-index'] = brca_cindex
brcac_df['Dataset'] = 'BRCA'

In [None]:
lihcc_df = pd.DataFrame()
lihcc_df['C-index'] = lihc_cindex
lihcc_df['Dataset'] = 'LIHC'

In [None]:
surv_resources_df = pd.concat([brcac_df, lihcc_df])

In [None]:
ax = sns.boxplot(x="Dataset", y="C-index", 
                 data=surv_resources_df, showfliers=True,
                 palette="Set2").set_title('Distribution of C-indices from survival analysis in cancer datasets', fontsize=16)

plt.rcParams["axes.labelsize"] = 14

fig = plt.gcf()
fig.set_size_inches(10, 6)
plt.savefig('surv_boxplot.png', dpi=500)