# Evaluate cancer subtype classification SVM model with main classification metrics 

In [50]:
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from sklearn.linear_model import LogisticRegressionCV
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier
from sklearn import metrics
from sklearn.svm import SVC, LinearSVC
from sklearn.model_selection import StratifiedKFold, KFold, train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import label_binarize

from tqdm import tqdm

from pathway_forte.constants import *
from pathway_forte.multiclass_prediction import *

In [3]:
CANCER_SUBTYPES = os.path.join(DATA,'tcga_datasets','brca','brca_subtypes_matrix.txt')
brca_subtypes_df = pd.read_csv(CANCER_SUBTYPES, sep='\t')

kegg_ssgsea_path = os.path.join(KEGG_SSGSEA, 'kegg_brca.tsv')
wikipathways_ssgsea_path = os.path.join(WIKIPATHWAYS_SSGSEA, 'wikipathways_brca.tsv')
reactome_ssgsea_path = os.path.join(REACTOME_SSGSEA, 'reactome_brca.tsv')
merge_ssgsea_path = os.path.join(MERGE_SSGSEA, 'merge_brca.tsv')

In [4]:
# Get sample IDs and corresponding cancer subtypes
patient_ids = get_sample_ids_with_cancer_subtypes(CANCER_SUBTYPES)

In [5]:
# Get ssGSEA scores dataFrame 
kegg_enrichment_score_df = stabilize_ssgsea_scores_df(kegg_ssgsea_path) 
wikipathways_enrichment_score_df = stabilize_ssgsea_scores_df(wikipathways_ssgsea_path) 
reactome_enrichment_score_df = stabilize_ssgsea_scores_df(reactome_ssgsea_path)
merge_enrichment_score_df = stabilize_ssgsea_scores_df(merge_ssgsea_path)

Match sample IDs in ssGSEA scores dataFrame with those in cancer subtype list such that only cancer patients with specified cancer subtypes are retained in the scores dataFrame. This filters out all control samples and any cancer cases with normal or NA type cancer subtype. A total of 1050 are retained from the complete set of 1215 samples. TCGA reports 5 cancer subtypes: normal, basal, Her2, LumA and LumB.

In [6]:
kegg_pathway_features = match_samples(kegg_enrichment_score_df,patient_ids)
reactome_pathway_features = match_samples(reactome_enrichment_score_df,patient_ids)
wikipathways_pathway_features = match_samples(wikipathways_enrichment_score_df,patient_ids)
merged_pathway_features = match_samples(merge_enrichment_score_df,patient_ids)

In [7]:
print('The number of samples by features/pathways for each resource are:')
print('KEGG: {}'.format(kegg_pathway_features.shape))
print('Reactome: {}'.format(reactome_pathway_features.shape))
print('WikiPathways: {}'.format(wikipathways_pathway_features.shape))
print('PathwayForte: {}'.format(merged_pathway_features.shape))

The number of samples by features/pathways for each resource are:
KEGG: (1050, 311)
Reactome: (1050, 1170)
WikiPathways: (1050, 362)
PathwayForte: (1050, 1726)


In [8]:
# Get arrays of class labels ordered the same way as features
kegg_class_labels = get_class_labels(kegg_pathway_features, brca_subtypes_df)
reactome_class_labels = get_class_labels(reactome_pathway_features, brca_subtypes_df)
wikipathways_class_labels = get_class_labels(wikipathways_pathway_features, brca_subtypes_df)
merged_class_labels = get_class_labels(merged_pathway_features, brca_subtypes_df)

In [38]:
def convert_df_to_features_array(df):
    
    # Get list of pathways as features
    feature_cols = list(df.columns.values)

    # Features
    pathways = df[feature_cols]  # Features

    # Transform features dataFrame to numpy array
    pathways_array = pathways.values

    return np.asarray(pathways_array)

In [61]:
kegg_features_array = convert_df_to_features_array(kegg_pathway_features)
reactome_features_array = convert_df_to_features_array(reactome_pathway_features)
wikipathways_features_array = convert_df_to_features_array(wikipathways_pathway_features)
merged_features_array = convert_df_to_features_array(merged_pathway_features)

In [53]:
def train_multiclass_log_reg(X, y, inner_cv, outer_cv, chain_pca=False, explained_variance=0.95, roc_auc=False):

    all_metrics = defaultdict(list)
        
  #  y = label_binarize(y, classes=[0,1,2,3])

    target_names = ['Class 0', 'Class 1', 'Class 2', 'Class 3']
    
    kf = KFold(n_splits=outer_cv, shuffle=True)

    iterator = tqdm(kf.split(X, y))

    for i, (train_index, test_index) in enumerate(iterator):

        X_train = X[train_index]
        X_test = X[test_index]
        y_train, y_test = np.asarray([y[i] for i in train_index]), np.asarray(
        [y[i] for i in test_index])
        
        if chain_pca:
            # Apply PCA
            X_train, X_test = pca_chaining(X_train, X_test, explained_variance)

        # Fit one classifier per class 
        # For each classifier, class is fit against all other classes
        classifier = OneVsOneClassifier(
                        LinearSVC()
        )

        classifier.fit(X_train, y_train)
        y_pred = classifier.predict(X_test)

        # Get the subset accuracy st labels predicted for a sample exactly match true labels (harsh)
        accurcay = metrics.accuracy_score(y_test, y_pred) # set sample_weight to get weighted accuracy
        f1_score = metrics.f1_score(y_test, y_pred, average='micro', labels=np.unique(y_pred))
        
        metrics_dict = {
            'Accuracy': accurcay,
            'F1 score': f1_score,
            'Precision': metrics.precision_score(y_test, y_pred, average='micro'),
            'Recall': metrics.recall_score(y_test, y_pred, average='micro')
        }
        
        all_metrics[i+1].append(metrics_dict)
       
        print('For iteration {}:'.format(i+1))
        print('test accuracy is {}'.format(accurcay))
        print('F1 score is {}'.format(f1_score))
        print("\n")
        print(metrics.classification_report(y_test, y_pred, target_names=target_names))
        
    return all_metrics

In [54]:
kegg_all_metrics = train_multiclass_log_reg(
                        kegg_features_array, 
                        kegg_class_labels, 
                        inner_cv=5, 
                        outer_cv=5,
                        chain_pca=True, 
                        explained_variance=0.95    
)












0it [00:00, ?it/s][A[A[A[A[A[A[A[A[A[A[A










1it [00:00,  2.75it/s][A[A[A[A[A[A[A[A[A[A[A

For iteration 1:
test accuracy is 0.8666666666666667
F1 score is 0.8666666666666667


              precision    recall  f1-score   support

     Class 0       0.87      0.96      0.91       117
     Class 1       0.77      0.61      0.68        38
     Class 2       0.77      0.59      0.67        17
     Class 3       0.97      0.97      0.97        38

   micro avg       0.87      0.87      0.87       210
   macro avg       0.84      0.78      0.81       210
weighted avg       0.86      0.87      0.86       210














2it [00:00,  2.65it/s][A[A[A[A[A[A[A[A[A[A[A

For iteration 2:
test accuracy is 0.9047619047619048
F1 score is 0.9047619047619048


              precision    recall  f1-score   support

     Class 0       0.88      0.97      0.93       109
     Class 1       0.90      0.70      0.79        40
     Class 2       0.83      0.71      0.77        14
     Class 3       0.98      0.98      0.98        47

   micro avg       0.90      0.90      0.90       210
   macro avg       0.90      0.84      0.87       210
weighted avg       0.91      0.90      0.90       210














3it [00:01,  2.54it/s][A[A[A[A[A[A[A[A[A[A[A

For iteration 3:
test accuracy is 0.8809523809523809
F1 score is 0.8809523809523809


              precision    recall  f1-score   support

     Class 0       0.87      0.95      0.91       105
     Class 1       0.80      0.70      0.75        47
     Class 2       0.92      0.73      0.81        15
     Class 3       0.98      0.95      0.96        43

   micro avg       0.88      0.88      0.88       210
   macro avg       0.89      0.84      0.86       210
weighted avg       0.88      0.88      0.88       210














4it [00:01,  2.55it/s][A[A[A[A[A[A[A[A[A[A[A

For iteration 4:
test accuracy is 0.8952380952380953
F1 score is 0.8952380952380953


              precision    recall  f1-score   support

     Class 0       0.91      0.94      0.93       118
     Class 1       0.73      0.77      0.75        39
     Class 2       1.00      0.67      0.80        18
     Class 3       1.00      1.00      1.00        35

   micro avg       0.90      0.90      0.90       210
   macro avg       0.91      0.84      0.87       210
weighted avg       0.90      0.90      0.89       210














5it [00:01,  2.55it/s][A[A[A[A[A[A[A[A[A[A[A










[A[A[A[A[A[A[A[A[A[A[A

For iteration 5:
test accuracy is 0.8095238095238095
F1 score is 0.8095238095238095


              precision    recall  f1-score   support

     Class 0       0.86      0.90      0.88       118
     Class 1       0.62      0.58      0.60        43
     Class 2       0.64      0.50      0.56        18
     Class 3       0.91      0.97      0.94        31

   micro avg       0.81      0.81      0.81       210
   macro avg       0.76      0.74      0.75       210
weighted avg       0.80      0.81      0.80       210



In [59]:
reactome_all_metrics = train_multiclass_log_reg(
                        r_features_array, 
                        reactome_class_labels,
                        inner_cv=5, 
                        outer_cv=5,
                        chain_pca=True, 
                        explained_variance=0.95    
)



























1it [00:02,  2.99s/it][A[A[A[A[A[A[A[A[A[A[A[A[A

For iteration 1:
test accuracy is 0.9
F1 score is 0.9


              precision    recall  f1-score   support

     Class 0       0.92      0.93      0.92       109
     Class 1       0.77      0.77      0.77        39
     Class 2       0.83      0.71      0.77        14
     Class 3       0.98      1.00      0.99        48

   micro avg       0.90      0.90      0.90       210
   macro avg       0.88      0.85      0.86       210
weighted avg       0.90      0.90      0.90       210
















2it [00:06,  3.15s/it][A[A[A[A[A[A[A[A[A[A[A[A[A

For iteration 2:
test accuracy is 0.919047619047619
F1 score is 0.919047619047619


              precision    recall  f1-score   support

     Class 0       0.95      0.94      0.94       112
     Class 1       0.80      0.89      0.84        44
     Class 2       0.92      0.75      0.83        16
     Class 3       1.00      0.97      0.99        38

   micro avg       0.92      0.92      0.92       210
   macro avg       0.92      0.89      0.90       210
weighted avg       0.92      0.92      0.92       210
















3it [00:10,  3.39s/it][A[A[A[A[A[A[A[A[A[A[A[A[A

For iteration 3:
test accuracy is 0.9142857142857143
F1 score is 0.9142857142857143


              precision    recall  f1-score   support

     Class 0       0.93      0.93      0.93       120
     Class 1       0.82      0.82      0.82        44
     Class 2       0.87      1.00      0.93        13
     Class 3       1.00      0.97      0.98        33

   micro avg       0.91      0.91      0.91       210
   macro avg       0.90      0.93      0.92       210
weighted avg       0.92      0.91      0.91       210
















4it [00:13,  3.36s/it][A[A[A[A[A[A[A[A[A[A[A[A[A

For iteration 4:
test accuracy is 0.9238095238095239
F1 score is 0.9238095238095239


              precision    recall  f1-score   support

     Class 0       0.93      0.98      0.96       110
     Class 1       0.88      0.77      0.82        39
     Class 2       0.90      0.86      0.88        21
     Class 3       0.95      0.95      0.95        40

   micro avg       0.92      0.92      0.92       210
   macro avg       0.92      0.89      0.90       210
weighted avg       0.92      0.92      0.92       210
















5it [00:16,  3.28s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[A

For iteration 5:
test accuracy is 0.9095238095238095
F1 score is 0.9095238095238095


              precision    recall  f1-score   support

     Class 0       0.95      0.93      0.94       116
     Class 1       0.79      0.83      0.81        41
     Class 2       0.78      0.78      0.78        18
     Class 3       1.00      1.00      1.00        35

   micro avg       0.91      0.91      0.91       210
   macro avg       0.88      0.88      0.88       210
weighted avg       0.91      0.91      0.91       210



In [62]:
wikipathways_all_metrics = train_multiclass_log_reg(
                            wikipathways_features_array, 
                            wikipathways_class_labels,
                            inner_cv=5, 
                            outer_cv=5,
                            chain_pca=True, 
                            explained_variance=0.95    
)














0it [00:00, ?it/s][A[A[A[A[A[A[A[A[A[A[A[A[A












1it [00:00,  2.23it/s][A[A[A[A[A[A[A[A[A[A[A[A[A

For iteration 1:
test accuracy is 0.9
F1 score is 0.9


              precision    recall  f1-score   support

     Class 0       0.94      0.95      0.94       106
     Class 1       0.76      0.85      0.80        46
     Class 2       1.00      0.47      0.64        17
     Class 3       0.95      1.00      0.98        41

   micro avg       0.90      0.90      0.90       210
   macro avg       0.91      0.82      0.84       210
weighted avg       0.91      0.90      0.89       210
















2it [00:00,  2.15it/s][A[A[A[A[A[A[A[A[A[A[A[A[A

For iteration 2:
test accuracy is 0.9238095238095239
F1 score is 0.9238095238095239


              precision    recall  f1-score   support

     Class 0       0.95      0.96      0.96       111
     Class 1       0.86      0.82      0.84        44
     Class 2       0.71      0.77      0.74        13
     Class 3       1.00      0.98      0.99        42

   micro avg       0.92      0.92      0.92       210
   macro avg       0.88      0.88      0.88       210
weighted avg       0.92      0.92      0.92       210
















3it [00:01,  2.00it/s][A[A[A[A[A[A[A[A[A[A[A[A[A

For iteration 3:
test accuracy is 0.9238095238095239
F1 score is 0.9238095238095239


              precision    recall  f1-score   support

     Class 0       0.94      0.96      0.95       122
     Class 1       0.76      0.79      0.77        28
     Class 2       0.85      0.73      0.79        15
     Class 3       1.00      0.98      0.99        45

   micro avg       0.92      0.92      0.92       210
   macro avg       0.89      0.86      0.87       210
weighted avg       0.92      0.92      0.92       210
















4it [00:01,  2.08it/s][A[A[A[A[A[A[A[A[A[A[A[A[A

For iteration 4:
test accuracy is 0.9047619047619048
F1 score is 0.9047619047619048


              precision    recall  f1-score   support

     Class 0       0.93      0.95      0.94       118
     Class 1       0.71      0.77      0.74        35
     Class 2       1.00      0.75      0.86        20
     Class 3       1.00      0.97      0.99        37

   micro avg       0.90      0.90      0.90       210
   macro avg       0.91      0.86      0.88       210
weighted avg       0.91      0.90      0.91       210
















5it [00:02,  2.12it/s][A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[A

For iteration 5:
test accuracy is 0.8904761904761904
F1 score is 0.8904761904761904


              precision    recall  f1-score   support

     Class 0       0.87      0.97      0.92       110
     Class 1       0.90      0.69      0.78        54
     Class 2       0.88      0.82      0.85        17
     Class 3       0.97      1.00      0.98        29

   micro avg       0.89      0.89      0.89       210
   macro avg       0.90      0.87      0.88       210
weighted avg       0.89      0.89      0.89       210



In [63]:
merged_all_metrics = train_multiclass_log_reg(
                        merged_features_array, 
                        merged_class_labels,
                        inner_cv=5, 
                        outer_cv=5,
                        chain_pca=True, 
                        explained_variance=0.95    
)



























1it [00:04,  4.31s/it][A[A[A[A[A[A[A[A[A[A[A[A[A

For iteration 1:
test accuracy is 0.919047619047619
F1 score is 0.919047619047619


              precision    recall  f1-score   support

     Class 0       0.92      0.98      0.95       100
     Class 1       0.89      0.82      0.85        50
     Class 2       0.76      0.72      0.74        18
     Class 3       1.00      0.98      0.99        42

   micro avg       0.92      0.92      0.92       210
   macro avg       0.90      0.87      0.88       210
weighted avg       0.92      0.92      0.92       210
















2it [00:09,  4.81s/it][A[A[A[A[A[A[A[A[A[A[A[A[A

For iteration 2:
test accuracy is 0.9095238095238095
F1 score is 0.9095238095238095


              precision    recall  f1-score   support

     Class 0       0.95      0.92      0.93       125
     Class 1       0.71      0.81      0.76        31
     Class 2       0.89      0.84      0.86        19
     Class 3       0.97      1.00      0.99        35

   micro avg       0.91      0.91      0.91       210
   macro avg       0.88      0.89      0.89       210
weighted avg       0.91      0.91      0.91       210
















3it [00:14,  4.93s/it][A[A[A[A[A[A[A[A[A[A[A[A[A

For iteration 3:
test accuracy is 0.9
F1 score is 0.9


              precision    recall  f1-score   support

     Class 0       0.91      0.94      0.93       108
     Class 1       0.85      0.74      0.80        47
     Class 2       0.76      0.87      0.81        15
     Class 3       0.97      0.97      0.97        40

   micro avg       0.90      0.90      0.90       210
   macro avg       0.88      0.88      0.88       210
weighted avg       0.90      0.90      0.90       210
















4it [00:20,  5.07s/it][A[A[A[A[A[A[A[A[A[A[A[A[A

For iteration 4:
test accuracy is 0.9333333333333333
F1 score is 0.9333333333333333


              precision    recall  f1-score   support

     Class 0       0.94      0.95      0.94       117
     Class 1       0.83      0.83      0.83        35
     Class 2       1.00      0.88      0.94        17
     Class 3       0.98      1.00      0.99        41

   micro avg       0.93      0.93      0.93       210
   macro avg       0.94      0.91      0.92       210
weighted avg       0.93      0.93      0.93       210
















5it [00:25,  5.09s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[A

For iteration 5:
test accuracy is 0.9238095238095239
F1 score is 0.9238095238095239


              precision    recall  f1-score   support

     Class 0       0.95      0.94      0.94       117
     Class 1       0.81      0.89      0.85        44
     Class 2       1.00      0.77      0.87        13
     Class 3       0.97      0.97      0.97        36

   micro avg       0.92      0.92      0.92       210
   macro avg       0.93      0.89      0.91       210
weighted avg       0.93      0.92      0.92       210



In [64]:
kegg_all_metrics

defaultdict(list,
            {1: [{'Accuracy': 0.8666666666666667,
               'F1 score': 0.8666666666666667,
               'Precision': 0.8666666666666667,
               'Recall': 0.8666666666666667}],
             2: [{'Accuracy': 0.9047619047619048,
               'F1 score': 0.9047619047619048,
               'Precision': 0.9047619047619048,
               'Recall': 0.9047619047619048}],
             3: [{'Accuracy': 0.8809523809523809,
               'F1 score': 0.8809523809523809,
               'Precision': 0.8809523809523809,
               'Recall': 0.8809523809523809}],
             4: [{'Accuracy': 0.8952380952380953,
               'F1 score': 0.8952380952380953,
               'Precision': 0.8952380952380953,
               'Recall': 0.8952380952380953}],
             5: [{'Accuracy': 0.8095238095238095,
               'F1 score': 0.8095238095238095,
               'Precision': 0.8095238095238095,
               'Recall': 0.8095238095238095}]})

In [65]:
reactome_all_metrics

defaultdict(list,
            {1: [{'Accuracy': 0.9,
               'F1 score': 0.9,
               'Precision': 0.9,
               'Recall': 0.9}],
             2: [{'Accuracy': 0.919047619047619,
               'F1 score': 0.919047619047619,
               'Precision': 0.919047619047619,
               'Recall': 0.919047619047619}],
             3: [{'Accuracy': 0.9142857142857143,
               'F1 score': 0.9142857142857143,
               'Precision': 0.9142857142857143,
               'Recall': 0.9142857142857143}],
             4: [{'Accuracy': 0.9238095238095239,
               'F1 score': 0.9238095238095239,
               'Precision': 0.9238095238095239,
               'Recall': 0.9238095238095239}],
             5: [{'Accuracy': 0.9095238095238095,
               'F1 score': 0.9095238095238095,
               'Precision': 0.9095238095238095,
               'Recall': 0.9095238095238095}]})

In [66]:
wikipathways_all_metrics

defaultdict(list,
            {1: [{'Accuracy': 0.9,
               'F1 score': 0.9,
               'Precision': 0.9,
               'Recall': 0.9}],
             2: [{'Accuracy': 0.9238095238095239,
               'F1 score': 0.9238095238095239,
               'Precision': 0.9238095238095239,
               'Recall': 0.9238095238095239}],
             3: [{'Accuracy': 0.9238095238095239,
               'F1 score': 0.9238095238095239,
               'Precision': 0.9238095238095239,
               'Recall': 0.9238095238095239}],
             4: [{'Accuracy': 0.9047619047619048,
               'F1 score': 0.9047619047619048,
               'Precision': 0.9047619047619048,
               'Recall': 0.9047619047619048}],
             5: [{'Accuracy': 0.8904761904761904,
               'F1 score': 0.8904761904761904,
               'Precision': 0.8904761904761904,
               'Recall': 0.8904761904761904}]})

In [67]:
merged_all_metrics

defaultdict(list,
            {1: [{'Accuracy': 0.919047619047619,
               'F1 score': 0.919047619047619,
               'Precision': 0.919047619047619,
               'Recall': 0.919047619047619}],
             2: [{'Accuracy': 0.9095238095238095,
               'F1 score': 0.9095238095238095,
               'Precision': 0.9095238095238095,
               'Recall': 0.9095238095238095}],
             3: [{'Accuracy': 0.9,
               'F1 score': 0.9,
               'Precision': 0.9,
               'Recall': 0.9}],
             4: [{'Accuracy': 0.9333333333333333,
               'F1 score': 0.9333333333333333,
               'Precision': 0.9333333333333333,
               'Recall': 0.9333333333333333}],
             5: [{'Accuracy': 0.9238095238095239,
               'F1 score': 0.9238095238095239,
               'Precision': 0.9238095238095239,
               'Recall': 0.9238095238095239}]})