# Cancer Data Experimental ML Analysis v4

In [1]:
#import useful libraries
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
from matplotlib.pyplot import figure
import sys
import umap.umap_ as umap
import random
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import mutual_info_classif
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import make_pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.decomposition import KernelPCA
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.model_selection import KFold

In [None]:
# import sys
# !{sys.executable} -m pip install umap-learn
# import umap.umap_ as umap

In [2]:
#import data (beware, it takes around 10 minutes)
df = pd.read_csv('../clinical_TumorCompendium_v11_PolyA_2020-04-09.tsv', sep='\t', header=0)
df2 = pd.read_csv('../TumorCompendium_v11_PolyA_hugo_log2tpm_58581genes_2020-04-09.tsv', sep='\t', header=0)

### Methods to Conduct Experiments

In [4]:
def experiments_BACs(X_train, X_test, y_train, y_test, k):
    
    #CST goes here
    
    
    pca = PCA(n_components = k)
    pca.fit(X_train)
    X_train_trans = pca.transform(X_train)
    X_test_trans = pca.transform(X_test)
    pca_bacs = experiment_BACs_helper(X_train_trans, X_test_trans, y_train, y_test, k)
    
    selectKBest = SelectKBest(score_func=mutual_info_classif, k=4)
    selectKBest.fit(X_train, y_train)
    X_train_trans = selectKBest.transform(X_train)
    X_test_trans = selectKBest.transform(X_test)
    mi_bacs = experiment_BACs_helper(X_train_trans, X_test_trans, y_train, y_test, k)
    
    mapper = umap.UMAP(n_neighbors=(k*4)).fit(X_train, y_train)
    X_train_trans = mapper.transform(X_train)
    X_test_trans = mapper.transform(X_test)
    umap_bacs = experiment_BACs_helper(X_train_trans, X_test_trans, y_train, y_test, k)
    
    kpca = KernelPCA(n_components = k, kernel = 'poly')
    kpca.fit(X_train)
    X_train_trans = kpca.transform(X_train)
    X_test_trans = kpca.transform(X_test)
    kpca_bacs = experiment_BACs_helper(X_train_trans, X_test_trans, y_train, y_test, k)
    
    return  pca_bacs, mi_bacs, umap_bacs, kpca_bacs
    

In [5]:
def experiment_BACs_helper(X_train_trans, X_test_trans, y_train, y_test, k):
    
    temp_BACs = np.zeros(6)
    
    neighbors = KNeighborsClassifier(n_neighbors=6)
    neighbors.fit(X_train_trans, y_train)
    temp_BACs[0] = balanced_accuracy_score(y_test, neighbors.predict(X_test_trans))
    
    svm = SVC(kernel='linear', C=1)
    svm.fit(X_train_trans, y_train)
    temp_BACs[1] = balanced_accuracy_score(y_test, svm.predict(X_test_trans))
    
    gnb = GaussianNB()
    gnb.fit(X_train_trans, y_train)
    temp_BACs[2] = balanced_accuracy_score(y_test, gnb.predict(X_test_trans))
    
    rf = RandomForestClassifier(random_state=137)
    rf.fit(X_train_trans, y_train)
    temp_BACs[3] = balanced_accuracy_score(y_test, rf.predict(X_test_trans))
    
    sgd = make_pipeline(StandardScaler(), SGDClassifier(max_iter=1000, tol=1e-3))
    sgd.fit(X_train_trans, y_train)
    temp_BACs[4] = balanced_accuracy_score(y_test, sgd.predict(X_test_trans))
    
    lda = LinearDiscriminantAnalysis()
    lda.fit(X_train_trans, y_train)
    temp_BACs[5] = balanced_accuracy_score(y_test, lda.predict(X_test_trans))
    
    return temp_BACs
    

In [6]:
#checks out bc 50 total iterations from for loops and each iteration conducts 30 ML experiments
def runexperiments_full_6(g1_samples, g2_samples, g3_samples, g4_samples, g5_samples, g6_samples):
    
    x = pd.concat([g1_samples.T, g2_samples.T, g3_samples.T, g4_samples.T, g5_samples.T, g6_samples.T]) #add together all postiive and negative labels 
    x = StandardScaler().fit_transform(x) #standardize values in x (z-score standardization)

    target_g1 = pd.DataFrame(np.zeros((len(g1_samples.T), 1))) #get number of target group 1
    target_g2 = pd.DataFrame(np.ones((len(g2_samples.T), 1))) #get number of target group 2
    target_g3 = pd.DataFrame(2*np.ones((len(g3_samples.T), 1))) #get number of target group 3
    target_g4 = pd.DataFrame(3*np.ones((len(g4_samples.T), 1))) #get number of target group 4
    target_g5 = pd.DataFrame(4*np.ones((len(g5_samples.T), 1))) #get number of target group 5
    target_g6 = pd.DataFrame(5*np.ones((len(g6_samples.T), 1))) #get number of target group 6

    target = pd.concat([target_g1, target_g2, target_g3, target_g4, target_g5, target_g6]) #concatenate target zeros ones twos etc together
    target = target.reset_index(drop=True)
    
    splits_rats = np.array([.1, .15, .2, .25, .3])

    #BAC_sums_cst = np.zeros(6)
    BAC_sums_pca = np.zeros(6)
    BAC_sums_mi = np.zeros(6)
    BAC_sums_umap = np.zeros(6)
    BAC_sums_kpca = np.zeros(6)
    
    
    for i in splits_rats:
        #split the data into test and train
        X_train, X_test, y_train, y_test = train_test_split(x, target.to_numpy(), test_size=i, random_state=42)
        y_train = y_train.ravel()
        y_test = y_test.ravel()
            
        t2, t3, t4, t5 = experiments_BACs(X_train, X_test, y_train, y_test, 6)
        #BAC_sums_cst += t1
        BAC_sums_pca += t2
        BAC_sums_mi += t3
        BAC_sums_umap += t4
        BAC_sums_kpca += t5
    
    numIter = splits_rats.shape[0]
    #BAC_sums_cst /= numIter
    BAC_sums_pca /= numIter
    BAC_sums_mi /= numIter
    BAC_sums_umap /= numIter
    BAC_sums_kpca /= numIter    
    
    return BAC_sums_pca, BAC_sums_mi, BAC_sums_umap, BAC_sums_kpca
    

In [7]:
#checks out bc 50 total iterations from for loops and each iteration conducts 30 ML experiments
def runexperiments_full_4(g1_samples, g2_samples, g3_samples, g4_samples):
    
    x = pd.concat([g1_samples.T, g2_samples.T, g3_samples.T, g4_samples.T]) #add together all postiive and negative labels 
    x = StandardScaler().fit_transform(x) #standardize values in x (z-score standardization)

    target_g1 = pd.DataFrame(np.zeros((len(g1_samples.T), 1))) #get number of target group 1
    target_g2 = pd.DataFrame(np.ones((len(g2_samples.T), 1))) #get number of target group 2
    target_g3 = pd.DataFrame(2*np.ones((len(g3_samples.T), 1))) #get number of target group 3
    target_g4 = pd.DataFrame(3*np.ones((len(g4_samples.T), 1))) #get number of target group 4

    target = pd.concat([target_g1, target_g2, target_g3, target_g4]) #concatenate target zeros ones twos etc together
    target = target.reset_index(drop=True)
    
    splits_rats = np.array([.1, .15, .2, .25, .3])

    #BAC_sums_cst = np.zeros(6)
    BAC_sums_pca = np.zeros(6)
    BAC_sums_mi = np.zeros(6)
    BAC_sums_umap = np.zeros(6)
    BAC_sums_kpca = np.zeros(6)
    
    
    for i in splits_rats:
        #split the data into test and train
        X_train, X_test, y_train, y_test = train_test_split(x, target.to_numpy(), test_size=i, random_state=42)
        y_train = y_train.ravel()
        y_test = y_test.ravel()
            
        t2, t3, t4, t5 = experiments_BACs(X_train, X_test, y_train, y_test, 4)
        #BAC_sums_cst += t1
        BAC_sums_pca += t2
        BAC_sums_mi += t3
        BAC_sums_umap += t4
        BAC_sums_kpca += t5
    
    numIter = splits_rats.shape[0]
    #BAC_sums_cst /= numIter
    BAC_sums_pca /= numIter
    BAC_sums_mi /= numIter
    BAC_sums_umap /= numIter
    BAC_sums_kpca /= numIter    
    
    return BAC_sums_pca, BAC_sums_mi, BAC_sums_umap, BAC_sums_kpca
    

In [1]:
#checks out bc 50 total iterations from for loops and each iteration conducts 30 ML experiments
def runexperiments_full_2(g1_samples, g2_samples):
    
    x = pd.concat([g1_samples, g2_samples]) #add together all postiive and negative labels 
    x = StandardScaler().fit_transform(x) #standardize values in x (z-score standardization)

    target_g1 = pd.DataFrame(np.zeros((len(g1_samples), 1))) #get number of target group 1
    target_g2 = pd.DataFrame(np.ones((len(g2_samples), 1))) #get number of target group 2

    target = pd.concat([target_g1, target_g2]) #concatenate target zeros ones twos etc together
    target = target.reset_index(drop=True)
    
    splits_rats = np.array([.1, .15, .2, .25, .3])

    #BAC_sums_cst = np.zeros(6)
    BAC_sums_pca = np.zeros(6)
    BAC_sums_mi = np.zeros(6)
    BAC_sums_umap = np.zeros(6)
    BAC_sums_kpca = np.zeros(6)
    
    
    for i in splits_rats:
        #split the data into test and train
        X_train, X_test, y_train, y_test = train_test_split(x, target.to_numpy(), test_size=i, random_state=42)
        y_train = y_train.ravel()
        y_test = y_test.ravel()
            
        t2, t3, t4, t5 = experiments_BACs(X_train, X_test, y_train, y_test, 2)
        #BAC_sums_cst += t1
        BAC_sums_pca += t2
        BAC_sums_mi += t3
        BAC_sums_umap += t4
        BAC_sums_kpca += t5
    
    numIter = splits_rats.shape[0]
    #BAC_sums_cst /= numIter
    BAC_sums_pca /= numIter
    BAC_sums_mi /= numIter
    BAC_sums_umap /= numIter
    BAC_sums_kpca /= numIter    
    
    return BAC_sums_pca, BAC_sums_mi, BAC_sums_umap, BAC_sums_kpca
    

### Get all data organized for each experimental permutation (1-7):

In [None]:
#1). Classifying between 6 types of carcinomas (6-way classification)

In [72]:
samples1 = df[df['disease']=='lung squamous cell carcinoma']['th_sampleid'] 
g1_samples = df2.loc[:,list(samples1)]

samples2 = df[df['disease']=='kidney clear cell carcinoma']['th_sampleid'] 
g2_samples = df2.loc[:,list(samples2)]

samples3 = df[df['disease']=='thyroid carcinoma']['th_sampleid'] 
g3_samples = df2.loc[:,list(samples3)]

samples4 = df[df['disease']=='head & neck squamous cell carcinoma']['th_sampleid'] 
g4_samples = df2.loc[:,list(samples4)]

samples5 = df[df['disease']=='kidney papillary cell carcinoma']['th_sampleid'] 
g5_samples = df2.loc[:,list(samples5)]

samples6 = df[df['disease']=='bladder urothelial carcinoma']['th_sampleid'] 
g6_samples = df2.loc[:,list(samples6)]

In [None]:
ex1_cst, ex1_pca, ex1_mi, ex1_umap, ex1_kpca = runexperiments_full_6(g1_samples, g2_samples, g3_samples, g4_samples, g5_samples, g6_samples)

In [None]:
#2). Classifying between 4 types of adenocarcinomas (4-way classification)

In [73]:
samples7 = df[df['disease']=='lung adenocarcinoma']['th_sampleid'] 
g7_samples = df2.loc[:,list(samples7)]

samples8 = df[df['disease']=='stomach adenocarcinoma']['th_sampleid'] 
g8_samples = df2.loc[:,list(samples8)]

samples9 = df[df['disease']=='prostate adenocarcinoma']['th_sampleid'] 
g9_samples = df2.loc[:,list(samples9)]

samples10 = df[df['disease']=='colon adenocarcinoma']['th_sampleid'] 
g10_samples = df2.loc[:,list(samples10)]

In [None]:
ex2_cst, ex2_pca, ex2_mi, ex2_umap, ex2_kpca = runexperiments_full_4(g7_samples, g8_samples, g9_samples, g10_samples)

In [None]:
#3). Classifying between male/female within 6 types of carcinomas (2-way classification)

In [74]:
df_samples = df[df['disease']=='lung squamous cell carcinoma']
samples = df_samples[df_samples['gender']=='male']['th_sampleid']
pos_samples11 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['gender']=='female']['th_sampleid'] 
neg_samples11 = df2.loc[:,list(samples)]

df_samples = df[df['disease']=='kidney clear cell carcinoma']
samples = df_samples[df_samples['gender']=='male']['th_sampleid']
pos_samples12 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['gender']=='female']['th_sampleid'] 
neg_samples12 = df2.loc[:,list(samples)]

df_samples = df[df['disease']=='thyroid carcinoma']
samples = df_samples[df_samples['gender']=='male']['th_sampleid']
pos_samples13 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['gender']=='female']['th_sampleid'] 
neg_samples13 = df2.loc[:,list(samples)]

df_samples = df[df['disease']=='head & neck squamous cell carcinoma']
samples = df_samples[df_samples['gender']=='male']['th_sampleid']
pos_samples14 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['gender']=='female']['th_sampleid'] 
neg_samples14 = df2.loc[:,list(samples)]

df_samples = df[df['disease']=='kidney papillary cell carcinoma']
samples = df_samples[df_samples['gender']=='male']['th_sampleid']
pos_samples15 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['gender']=='female']['th_sampleid'] 
neg_samples15 = df2.loc[:,list(samples)]

df_samples = df[df['disease']=='bladder urothelial carcinoma']
samples = df_samples[df_samples['gender']=='male']['th_sampleid']
pos_samples16 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['gender']=='female']['th_sampleid'] 
neg_samples16 = df2.loc[:,list(samples)]

In [75]:
pos_samples3 = pd.concat([pos_samples11.T, pos_samples12.T, pos_samples13.T, pos_samples14.T, pos_samples15.T, pos_samples16.T])
neg_samples3 = pd.concat([neg_samples11.T, neg_samples12.T, neg_samples13.T, neg_samples14.T, neg_samples15.T, neg_samples16.T])

In [None]:
ex3_cst, ex3_pca, ex3_mi, ex3_umap, ex3_kpca = runexperiments_full_2(pos_samples3, neg_samples3)

In [None]:
#4). Classifying between male/female within 4 types of adenocarcinomas (2-way classification)

In [76]:
df_samples = df[df['disease']=='lung adenocarcinoma']
samples = df_samples[df_samples['gender']=='male']['th_sampleid']
pos_samples17 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['gender']=='female']['th_sampleid'] 
neg_samples17 = df2.loc[:,list(samples)]

df_samples = df[df['disease']=='stomach adenocarcinoma']
samples = df_samples[df_samples['gender']=='male']['th_sampleid']
pos_samples18 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['gender']=='female']['th_sampleid'] 
neg_samples18 = df2.loc[:,list(samples)]

df_samples = df[df['disease']=='prostate adenocarcinoma']
samples = df_samples[df_samples['gender']=='male']['th_sampleid']
pos_samples19 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['gender']=='female']['th_sampleid'] 
neg_samples19 = df2.loc[:,list(samples)]

df_samples = df[df['disease']=='colon adenocarcinoma']
samples = df_samples[df_samples['gender']=='male']['th_sampleid']
pos_samples20 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['gender']=='female']['th_sampleid'] 
neg_samples20 = df2.loc[:,list(samples)]

In [77]:
pos_samples4 = pd.concat([pos_samples17.T, pos_samples18.T, pos_samples19.T, pos_samples20.T])
neg_samples4 = pd.concat([neg_samples17.T, neg_samples18.T, neg_samples19.T, neg_samples20.T])

In [None]:
ex4_cst, ex4_pca, ex4_mi, ex4_umap, ex4_kpca = runexperiments_full_2(pos_samples4, neg_samples4)

In [None]:
#5). Classifying between ped/non-ped within 6 types of carcinomas (2-way classification)

In [78]:
df_samples = df[df['disease']=='lung squamous cell carcinoma']
samples = df_samples[df_samples['pedaya']=='Yes, age < 30 years']['th_sampleid']
pos_samples21 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['pedaya']=='No']['th_sampleid'] 
neg_samples21 = df2.loc[:,list(samples)]

df_samples = df[df['disease']=='kidney clear cell carcinoma']
samples = df_samples[df_samples['pedaya']=='Yes, age < 30 years']['th_sampleid']
pos_samples22 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['pedaya']=='No']['th_sampleid'] 
neg_samples22 = df2.loc[:,list(samples)]

df_samples = df[df['disease']=='thyroid carcinoma']
samples = df_samples[df_samples['pedaya']=='Yes, age < 30 years']['th_sampleid']
pos_samples23 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['pedaya']=='No']['th_sampleid'] 
neg_samples23 = df2.loc[:,list(samples)]

df_samples = df[df['disease']=='head & neck squamous cell carcinoma']
samples = df_samples[df_samples['pedaya']=='Yes, age < 30 years']['th_sampleid']
pos_samples24 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['pedaya']=='No']['th_sampleid'] 
neg_samples24 = df2.loc[:,list(samples)]

df_samples = df[df['disease']=='kidney papillary cell carcinoma']
samples = df_samples[df_samples['pedaya']=='Yes, age < 30 years']['th_sampleid']
pos_samples25 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['pedaya']=='No']['th_sampleid'] 
neg_samples25 = df2.loc[:,list(samples)]

df_samples = df[df['disease']=='bladder urothelial carcinoma']
samples = df_samples[df_samples['pedaya']=='Yes, age < 30 years']['th_sampleid']
pos_samples26 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['pedaya']=='No']['th_sampleid'] 
neg_samples26 = df2.loc[:,list(samples)]

In [79]:
pos_samples5 = pd.concat([pos_samples21.T, pos_samples22.T, pos_samples23.T, pos_samples24.T, pos_samples25.T, pos_samples26.T])
neg_samples5 = pd.concat([neg_samples21.T, neg_samples22.T, neg_samples23.T, neg_samples24.T, neg_samples25.T, neg_samples26.T])

In [None]:
ex5_cst, ex5_pca, ex5_mi, ex5_umap, ex5_kpca = runexperiments_full_2(pos_samples5, neg_samples5)

In [None]:
#6). Classifying between ped-non-ped within 4 types of adenocarcinomas (2-way classification)

In [80]:
df_samples = df[df['disease']=='lung adenocarcinoma']
samples = df_samples[df_samples['pedaya']=='Yes, age < 30 years']['th_sampleid']
pos_samples27 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['pedaya']=='No']['th_sampleid'] 
neg_samples27 = df2.loc[:,list(samples)]

df_samples = df[df['disease']=='stomach adenocarcinoma']
samples = df_samples[df_samples['pedaya']=='Yes, age < 30 years']['th_sampleid']
pos_samples28 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['pedaya']=='No']['th_sampleid'] 
neg_samples28 = df2.loc[:,list(samples)]

df_samples = df[df['disease']=='prostate adenocarcinoma']
samples = df_samples[df_samples['pedaya']=='Yes, age < 30 years']['th_sampleid']
pos_samples29 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['pedaya']=='No']['th_sampleid'] 
neg_samples29 = df2.loc[:,list(samples)]

df_samples = df[df['disease']=='colon adenocarcinoma']
samples = df_samples[df_samples['pedaya']=='Yes, age < 30 years']['th_sampleid']
pos_samples30 = df2.loc[:,list(samples)]
samples = df_samples[df_samples['pedaya']=='No']['th_sampleid'] 
neg_samples30 = df2.loc[:,list(samples)]

In [81]:
pos_samples6 = pd.concat([pos_samples27.T, pos_samples28.T, pos_samples29.T, pos_samples30.T])
neg_samples6 = pd.concat([neg_samples27.T, neg_samples28.T, neg_samples29.T, neg_samples30.T])

In [None]:
ex6_cst, ex6_pca, ex6_mi, ex6_umap, ex6_kpca = runexperiments_full_2(pos_samples6, neg_samples6)

In [None]:
#7). Classifying between carcinomas (6 types) and adenocarcinomas (4 types) (2-way classification)

In [83]:
pos_samples7 = pd.concat([g1_samples.T, g2_samples.T, g3_samples.T, g4_samples.T, g5_samples.T, g6_samples.T])
neg_samples7 = pd.concat([g7_samples.T, g8_samples.T, g9_samples.T, g10_samples.T])

MemoryError: Unable to allocate 1.21 GiB for an array with shape (58581, 2762) and data type float64

In [None]:
ex7_cst, ex7_pca, ex7_mi, ex7_umap, ex7_kpca = runexperiments_full_2(pos_samples7, neg_samples7)

### Graphs of All Results


In [70]:
#method that takes in the arrays of BAC values to display

def graph_BAC_results(cstBACs, pcaBACs, miBACs, umapBACs, kpcaBACs, experiment):
    
    x = np.arange(6)
    cstBACs = np.ndarray.tolist(cstBACs)
    pcaBACs = np.ndarray.tolist(pcaBACs)
    miBACs = np.ndarray.tolist(miBACs)
    umapBACs = np.ndarray.tolist(umapBACs)
    kpcaBACs = np.ndarray.tolist(kpcaBACs)

    width = 0.86 / len(Groups[0])

    colors = ['#000000', '#404040', '#7f7f7f', '#bfbfbf', '#ffffff']
    DR = ['CST', 'PCA', 'MI', 'UMAP', 'kPCA']
    classifiers = ["kNN", "SVM", "GNB", "RF", "SGD", "LDA"]

    plt.figure(figsize=(15, 10))
    plt.bar(x-2*width, cstBACs, width, color='#000000', edgecolor='black')
    plt.bar(x-width, pcaBACs, width, color='#404040', edgecolor='black')
    plt.bar(x, miBACs, width, color='#7f7f7f', edgecolor='black')
    plt.bar(x+width, umapBACs, width, color='#bfbfbf', edgecolor='black')
    plt.bar(x+2*width, kpcaBACs, width, color='#ffffff', edgecolor='black')

    plt.xticks(x, classifiers)
    plt.xlabel("DR / Feature Selection")
    plt.ylabel("Balanced Accuracy scores")
    plt.legend(DR, fontsize=12)
    plt.title(experiment)
    plt.show()
    return


In [None]:
experiment1 = "Classifying Between\n6 Types of Carcinomas"
graph_BAC_results(ex1_cst, ex1_pca, ex1_mi, ex1_umap, ex1_kpca, experiment1)

In [None]:
experiment2 = "Classifying Between\n4 Types of Adenocarcinomas"
graph_BAC_results(ex2_cst, ex2_pca, ex2_mi, ex2_umap, ex2_kpca, experiment2)

In [None]:
experiment3 = "Classifying Between Male/Female\nwithin 6 Types of Carcinomas"
graph_BAC_results(ex3_cst, ex3_pca, ex3_mi, ex3_umap, ex3_kpca, experiment3)

In [None]:
experiment4 = "Classifying Between Male/Female\nwithin 4 Types of Adenocarcinomas"
graph_BAC_results(ex4_cst, ex4_pca, ex4_mi, ex4_umap, ex4_kpca, experiment4)

In [None]:
experiment5 = "Classifying Between Ped/Non-ped\nwithin 6 Types of Carcinomas"
graph_BAC_results(ex5_cst, ex5_pca, ex5_mi, ex5_umap, ex5_kpca, experiment5)

In [None]:
experiment6 = "Classifying Between Ped/Non-ped\nwithin 4 Types of Adenocarcinomas"
graph_BAC_results(ex6_cst, ex6_pca, ex6_mi, ex6_umap, ex6_kpca, experiment6)

In [None]:
experiment7 = "Classifying Between Carcinomas (6 Types)\nand Adenocarcinomas (4 Types)"
graph_BAC_results(ex7_cst, ex7_pca, ex7_mi, ex7_umap, ex7_kpca, experiment7)