# Evaluate cancer subtype classification model with main classification metrics

In [6]:
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

from compath_revolutions.constants import *
from pathway_forte.multiclass_prediction import *

In [7]:
CANCER_SUBTYPES = os.path.join(DATA,'tcga_datasets','brca','brca_subtypes_matrix.txt')
brca_subtypes_df = pd.read_csv(CANCER_SUBTYPES, sep='\t')

kegg_ssgsea_path = os.path.join(KEGG_SSGSEA, 'kegg_brca.tsv')
wikipathways_ssgsea_path = os.path.join(WIKIPATHWAYS_SSGSEA, 'wikipathways_brca.tsv')
reactome_ssgsea_path = os.path.join(REACTOME_SSGSEA, 'reactome_brca.tsv')
merge_ssgsea_path = os.path.join(MERGE_SSGSEA, 'merge_brca.tsv')

In [8]:
# Get sample IDs and corresponding cancer subtypes
patient_ids = get_sample_ids_with_cancer_subtypes(CANCER_SUBTYPES)

In [9]:
# Get ssGSEA scores dataFrame 
kegg_enrichment_score_df = stabilize_ssgsea_scores_df(kegg_ssgsea_path) 
wikipathways_enrichment_score_df = stabilize_ssgsea_scores_df(wikipathways_ssgsea_path) 
reactome_enrichment_score_df = stabilize_ssgsea_scores_df(reactome_ssgsea_path)
merge_enrichment_score_df = stabilize_ssgsea_scores_df(merge_ssgsea_path)

Match sample IDs in ssGSEA scores dataFrame with those in cancer subtype list such that only cancer patients with specified cancer subtypes are retained in the scores dataFrame. This filters out all control samples and any cancer cases with normal or NA type cancer subtype. A total of 1050 are retained from the complete set of 1215 samples. TCGA reports 5 cancer subtypes: normal, basal, Her2, LumA and LumB.

In [10]:
kegg_pathway_features = match_samples(kegg_enrichment_score_df,patient_ids)
reactome_pathway_features = match_samples(reactome_enrichment_score_df,patient_ids)
wikipathways_pathway_features = match_samples(wikipathways_enrichment_score_df,patient_ids)
merged_pathway_features = match_samples(merge_enrichment_score_df,patient_ids)

In [11]:
print('The number of samples by features/pathways for each resource are:')
print('KEGG: {}'.format(kegg_pathway_features.shape))
print('Reactome: {}'.format(reactome_pathway_features.shape))
print('WikiPathways: {}'.format(wikipathways_pathway_features.shape))
print('PathwayForte: {}'.format(merged_pathway_features.shape))

The number of samples by features/pathways for each resource are:
KEGG: (1050, 311)
Reactome: (1050, 1170)
WikiPathways: (1050, 362)
PathwayForte: (1050, 1726)


In [12]:
# Get arrays of class labels ordered the same way as features
kegg_class_labels = get_class_labels(kegg_pathway_features, brca_subtypes_df)
reactome_class_labels = get_class_labels(reactome_pathway_features, brca_subtypes_df)
wikipathways_class_labels = get_class_labels(wikipathways_pathway_features, brca_subtypes_df)
merged_class_labels = get_class_labels(merged_pathway_features, brca_subtypes_df)

In [9]:
kegg_all_metrics = train_multiclass_log_reg(
                        kegg_pathway_features, 
                        kegg_class_labels, 
                        inner_cv=5, 
                        outer_cv=5,
                        chain_pca=False, 
                        explained_variance=0.95    
)

  'precision', 'predicted', average, warn_for)
1it [01:16, 76.88s/it]

For iteration 1:
test accuracy is 0.8238095238095238
F1 score is 0.8679245283018868


              precision    recall  f1-score   support

     Class 0       0.89      0.94      0.91       114
     Class 1       0.78      0.70      0.74        44
     Class 2       1.00      0.67      0.80        12
     Class 3       0.97      0.95      0.96        40

   micro avg       0.89      0.88      0.88       210
   macro avg       0.91      0.81      0.85       210
weighted avg       0.89      0.88      0.88       210
 samples avg       0.85      0.88      0.86       210



  'precision', 'predicted', average, warn_for)
2it [02:14, 67.16s/it]

For iteration 2:
test accuracy is 0.8095238095238095
F1 score is 0.8634920634920635


              precision    recall  f1-score   support

     Class 0       0.93      0.92      0.92       121
     Class 1       0.68      0.68      0.68        37
     Class 2       0.65      0.73      0.69        15
     Class 3       1.00      0.92      0.96        37

   micro avg       0.87      0.86      0.87       210
   macro avg       0.81      0.81      0.81       210
weighted avg       0.87      0.86      0.87       210
 samples avg       0.84      0.86      0.84       210



  'precision', 'predicted', average, warn_for)
3it [03:09, 63.10s/it]

For iteration 3:
test accuracy is 0.7761904761904762
F1 score is 0.8317460317460317


              precision    recall  f1-score   support

     Class 0       0.90      0.93      0.91       112
     Class 1       0.66      0.59      0.62        46
     Class 2       0.69      0.53      0.60        17
     Class 3       0.97      1.00      0.99        35

   micro avg       0.85      0.83      0.84       210
   macro avg       0.80      0.76      0.78       210
weighted avg       0.84      0.83      0.84       210
 samples avg       0.80      0.83      0.81       210



  'precision', 'predicted', average, warn_for)
4it [04:05, 61.31s/it]

For iteration 4:
test accuracy is 0.8095238095238095
F1 score is 0.8466666666666666


              precision    recall  f1-score   support

     Class 0       0.91      0.94      0.93       114
     Class 1       0.71      0.49      0.58        41
     Class 2       0.88      0.74      0.80        19
     Class 3       1.00      1.00      1.00        36

   micro avg       0.90      0.84      0.87       210
   macro avg       0.88      0.79      0.83       210
weighted avg       0.89      0.84      0.86       210
 samples avg       0.83      0.84      0.83       210



  'precision', 'predicted', average, warn_for)
5it [04:57, 59.54s/it]

For iteration 5:
test accuracy is 0.7904761904761904
F1 score is 0.8421052631578947


              precision    recall  f1-score   support

     Class 0       0.89      0.90      0.89       106
     Class 1       0.76      0.64      0.69        39
     Class 2       0.69      0.58      0.63        19
     Class 3       0.98      0.98      0.98        46

   micro avg       0.87      0.84      0.85       210
   macro avg       0.83      0.77      0.80       210
weighted avg       0.87      0.84      0.85       210
 samples avg       0.81      0.84      0.82       210






In [11]:
reactome_all_metrics = train_multiclass_log_reg(
                        reactome_pathway_features, 
                        reactome_class_labels,
                        inner_cv=5, 
                        outer_cv=5,
                        chain_pca=False, 
                        explained_variance=0.95    
)

  'precision', 'predicted', average, warn_for)
1it [03:15, 195.81s/it]

For iteration 1:
test accuracy is 0.8428571428571429
F1 score is 0.8933333333333334


              precision    recall  f1-score   support

     Class 0       0.97      0.91      0.94       123
     Class 1       0.76      0.67      0.71        33
     Class 2       0.77      0.77      0.77        13
     Class 3       0.98      0.98      0.98        41

   micro avg       0.93      0.88      0.90       210
   macro avg       0.87      0.83      0.85       210
weighted avg       0.93      0.88      0.90       210
 samples avg       0.86      0.88      0.87       210



  'precision', 'predicted', average, warn_for)
2it [06:24, 192.07s/it]

For iteration 2:
test accuracy is 0.819047619047619
F1 score is 0.8606060606060606


              precision    recall  f1-score   support

     Class 0       0.91      0.92      0.92       118
     Class 1       0.67      0.77      0.72        43
     Class 2       0.86      0.63      0.73        19
     Class 3       1.00      1.00      1.00        30

   micro avg       0.86      0.88      0.87       210
   macro avg       0.86      0.83      0.84       210
weighted avg       0.87      0.88      0.87       210
 samples avg       0.85      0.88      0.86       210



  'precision', 'predicted', average, warn_for)
3it [09:40, 193.35s/it]

For iteration 3:
test accuracy is 0.8714285714285714
F1 score is 0.9114754098360655


              precision    recall  f1-score   support

     Class 0       0.93      0.95      0.94       109
     Class 1       0.88      0.80      0.83        44
     Class 2       0.85      0.69      0.76        16
     Class 3       1.00      1.00      1.00        41

   micro avg       0.93      0.91      0.92       210
   macro avg       0.91      0.86      0.88       210
weighted avg       0.93      0.91      0.92       210
 samples avg       0.89      0.91      0.90       210



  'precision', 'predicted', average, warn_for)
4it [13:02, 195.60s/it]

For iteration 4:
test accuracy is 0.8571428571428571
F1 score is 0.9078947368421052


              precision    recall  f1-score   support

     Class 0       0.94      0.96      0.95       112
     Class 1       0.89      0.72      0.79        43
     Class 2       0.81      0.68      0.74        19
     Class 3       0.95      0.97      0.96        36

   micro avg       0.92      0.89      0.90       210
   macro avg       0.90      0.83      0.86       210
weighted avg       0.92      0.89      0.90       210
 samples avg       0.87      0.89      0.88       210



  'precision', 'predicted', average, warn_for)
5it [16:22, 196.55s/it]

For iteration 5:
test accuracy is 0.8571428571428571
F1 score is 0.8927335640138407


              precision    recall  f1-score   support

     Class 0       0.91      0.93      0.92       105
     Class 1       0.97      0.70      0.82        44
     Class 2       0.93      0.87      0.90        15
     Class 3       1.00      0.96      0.98        46

   micro avg       0.94      0.89      0.91       210
   macro avg       0.95      0.87      0.90       210
weighted avg       0.94      0.89      0.91       210
 samples avg       0.87      0.89      0.88       210






In [12]:
wikipathways_all_metrics = train_multiclass_log_reg(
                            wikipathways_pathway_features, 
                            wikipathways_class_labels,
                            inner_cv=5, 
                            outer_cv=5,
                            chain_pca=False, 
                            explained_variance=0.95    
)

  'precision', 'predicted', average, warn_for)
1it [01:12, 72.80s/it]

For iteration 1:
test accuracy is 0.8428571428571429
F1 score is 0.872852233676976


              precision    recall  f1-score   support

     Class 0       0.90      0.97      0.93        98
     Class 1       0.86      0.64      0.74        50
     Class 2       1.00      0.78      0.88        18
     Class 3       0.98      0.98      0.98        44

   micro avg       0.92      0.88      0.90       210
   macro avg       0.93      0.84      0.88       210
weighted avg       0.91      0.88      0.89       210
 samples avg       0.86      0.88      0.87       210



  'precision', 'predicted', average, warn_for)
2it [02:06, 63.40s/it]

For iteration 2:
test accuracy is 0.8333333333333334
F1 score is 0.8634920634920635


              precision    recall  f1-score   support

     Class 0       0.93      0.94      0.93       119
     Class 1       0.69      0.60      0.64        40
     Class 2       0.87      0.72      0.79        18
     Class 3       1.00      0.97      0.98        33

   micro avg       0.89      0.86      0.88       210
   macro avg       0.87      0.81      0.84       210
weighted avg       0.89      0.86      0.87       210
 samples avg       0.85      0.86      0.85       210



  'precision', 'predicted', average, warn_for)
3it [03:00, 60.05s/it]

For iteration 3:
test accuracy is 0.8714285714285714
F1 score is 0.9065743944636678


              precision    recall  f1-score   support

     Class 0       0.93      0.97      0.95       111
     Class 1       0.74      0.74      0.74        31
     Class 2       0.86      0.67      0.75        18
     Class 3       1.00      1.00      1.00        50

   micro avg       0.91      0.92      0.92       210
   macro avg       0.88      0.85      0.86       210
weighted avg       0.91      0.92      0.91       210
 samples avg       0.90      0.92      0.90       210



  'precision', 'predicted', average, warn_for)
4it [03:50, 57.58s/it]

For iteration 4:
test accuracy is 0.8571428571428571
F1 score is 0.8982035928143712


              precision    recall  f1-score   support

     Class 0       0.96      0.91      0.94       129
     Class 1       0.76      0.80      0.78        40
     Class 2       0.71      0.92      0.80        13
     Class 3       1.00      0.96      0.98        28

   micro avg       0.90      0.90      0.90       210
   macro avg       0.86      0.90      0.87       210
weighted avg       0.91      0.90      0.90       210
 samples avg       0.88      0.90      0.89       210



  'precision', 'predicted', average, warn_for)
5it [04:41, 56.20s/it]

For iteration 5:
test accuracy is 0.819047619047619
F1 score is 0.8799999999999999


              precision    recall  f1-score   support

     Class 0       0.95      0.94      0.94       110
     Class 1       0.81      0.63      0.71        46
     Class 2       0.80      0.53      0.64        15
     Class 3       0.97      0.95      0.96        39

   micro avg       0.92      0.84      0.88       210
   macro avg       0.88      0.76      0.81       210
weighted avg       0.91      0.84      0.87       210
 samples avg       0.83      0.84      0.83       210






In [13]:
merged_all_metrics = train_multiclass_log_reg(
                        merged_pathway_features, 
                        merged_class_labels,
                        inner_cv=5, 
                        outer_cv=5,
                        chain_pca=False, 
                        explained_variance=0.95    
)

  'precision', 'predicted', average, warn_for)
1it [04:03, 243.85s/it]

For iteration 1:
test accuracy is 0.8571428571428571
F1 score is 0.904109589041096


              precision    recall  f1-score   support

     Class 0       0.93      0.95      0.94       106
     Class 1       0.91      0.72      0.81        43
     Class 2       0.85      0.65      0.73        17
     Class 3       1.00      0.98      0.99        44

   micro avg       0.93      0.89      0.91       210
   macro avg       0.92      0.82      0.87       210
weighted avg       0.93      0.89      0.91       210
 samples avg       0.87      0.89      0.88       210



  'precision', 'predicted', average, warn_for)
2it [07:37, 228.99s/it]

For iteration 2:
test accuracy is 0.8047619047619048
F1 score is 0.8627450980392156


              precision    recall  f1-score   support

     Class 0       0.90      0.94      0.92       113
     Class 1       0.81      0.60      0.69        43
     Class 2       0.60      0.60      0.60        15
     Class 3       1.00      0.95      0.97        39

   micro avg       0.88      0.85      0.86       210
   macro avg       0.83      0.77      0.80       210
weighted avg       0.88      0.85      0.86       210
 samples avg       0.83      0.85      0.83       210



  'precision', 'predicted', average, warn_for)
3it [11:54, 238.17s/it]

For iteration 3:
test accuracy is 0.8571428571428571
F1 score is 0.8761904761904762


              precision    recall  f1-score   support

     Class 0       0.93      0.91      0.92       109
     Class 1       0.75      0.83      0.79        47
     Class 2       1.00      0.88      0.93        16
     Class 3       1.00      0.97      0.99        38

   micro avg       0.90      0.90      0.90       210
   macro avg       0.92      0.90      0.91       210
weighted avg       0.91      0.90      0.90       210
 samples avg       0.88      0.90      0.89       210



  'precision', 'predicted', average, warn_for)
4it [16:02, 240.59s/it]

For iteration 4:
test accuracy is 0.9047619047619048
F1 score is 0.9430379746835443


              precision    recall  f1-score   support

     Class 0       0.98      0.96      0.97       125
     Class 1       0.78      0.91      0.84        32
     Class 2       1.00      0.67      0.80        15
     Class 3       0.95      1.00      0.97        38

   micro avg       0.94      0.94      0.94       210
   macro avg       0.93      0.88      0.90       210
weighted avg       0.95      0.94      0.94       210
 samples avg       0.92      0.94      0.93       210



  'precision', 'predicted', average, warn_for)
5it [20:11, 242.28s/it]

For iteration 5:
test accuracy is 0.8523809523809524
F1 score is 0.8888888888888887


              precision    recall  f1-score   support

     Class 0       0.93      0.96      0.94       114
     Class 1       0.82      0.64      0.72        42
     Class 2       0.83      0.79      0.81        19
     Class 3       1.00      1.00      1.00        35

   micro avg       0.92      0.89      0.90       210
   macro avg       0.90      0.85      0.87       210
weighted avg       0.91      0.89      0.90       210
 samples avg       0.87      0.89      0.87       210






In [14]:
kegg_all_metrics

defaultdict(list,
            {1: [{'Accuracy': 0.8238095238095238,
               'F1 score': 0.8679245283018868,
               'Precision': 0.8888888888888888,
               'Recall': 0.8761904761904762}],
             2: [{'Accuracy': 0.8095238095238095,
               'F1 score': 0.8634920634920635,
               'Precision': 0.8701923076923077,
               'Recall': 0.861904761904762}],
             3: [{'Accuracy': 0.7761904761904762,
               'F1 score': 0.8317460317460317,
               'Precision': 0.8495145631067961,
               'Recall': 0.8333333333333334}],
             4: [{'Accuracy': 0.8095238095238095,
               'F1 score': 0.8466666666666666,
               'Precision': 0.8984771573604061,
               'Recall': 0.8428571428571429}],
             5: [{'Accuracy': 0.7904761904761904,
               'F1 score': 0.8421052631578947,
               'Precision': 0.8712871287128713,
               'Recall': 0.8380952380952381}]})

In [15]:
reactome_all_metrics

defaultdict(list,
            {1: [{'Accuracy': 0.8428571428571429,
               'F1 score': 0.8933333333333334,
               'Precision': 0.9292929292929293,
               'Recall': 0.8761904761904762}],
             2: [{'Accuracy': 0.819047619047619,
               'F1 score': 0.8606060606060606,
               'Precision': 0.863849765258216,
               'Recall': 0.8761904761904762}],
             3: [{'Accuracy': 0.8714285714285714,
               'F1 score': 0.9114754098360655,
               'Precision': 0.9271844660194175,
               'Recall': 0.9095238095238095}],
             4: [{'Accuracy': 0.8571428571428571,
               'F1 score': 0.9078947368421052,
               'Precision': 0.9207920792079208,
               'Recall': 0.8857142857142857}],
             5: [{'Accuracy': 0.8571428571428571,
               'F1 score': 0.8927335640138407,
               'Precision': 0.9393939393939394,
               'Recall': 0.8857142857142857}]})

In [16]:
wikipathways_all_metrics

defaultdict(list,
            {1: [{'Accuracy': 0.8428571428571429,
               'F1 score': 0.872852233676976,
               'Precision': 0.9154228855721394,
               'Recall': 0.8761904761904762}],
             2: [{'Accuracy': 0.8333333333333334,
               'F1 score': 0.8634920634920635,
               'Precision': 0.8916256157635468,
               'Recall': 0.861904761904762}],
             3: [{'Accuracy': 0.8714285714285714,
               'F1 score': 0.9065743944636678,
               'Precision': 0.9146919431279621,
               'Recall': 0.919047619047619}],
             4: [{'Accuracy': 0.8571428571428571,
               'F1 score': 0.8982035928143712,
               'Precision': 0.9043062200956937,
               'Recall': 0.9}],
             5: [{'Accuracy': 0.819047619047619,
               'F1 score': 0.8799999999999999,
               'Precision': 0.921875,
               'Recall': 0.8428571428571429}]})

In [17]:
merged_all_metrics

defaultdict(list,
            {1: [{'Accuracy': 0.8571428571428571,
               'F1 score': 0.904109589041096,
               'Precision': 0.9346733668341709,
               'Recall': 0.8857142857142857}],
             2: [{'Accuracy': 0.8047619047619048,
               'F1 score': 0.8627450980392156,
               'Precision': 0.8811881188118812,
               'Recall': 0.8476190476190476}],
             3: [{'Accuracy': 0.8571428571428571,
               'F1 score': 0.8761904761904762,
               'Precision': 0.9,
               'Recall': 0.9}],
             4: [{'Accuracy': 0.9047619047619048,
               'F1 score': 0.9430379746835443,
               'Precision': 0.9425837320574163,
               'Recall': 0.9380952380952381}],
             5: [{'Accuracy': 0.8523809523809524,
               'F1 score': 0.8888888888888887,
               'Precision': 0.916256157635468,
               'Recall': 0.8857142857142857}]})