In [None]:
import warnings
import pandas as pd
import urllib.request
import numpy as np
from IPython.display import display
from sklearn.model_selection import StratifiedKFold
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from textwrap import wrap
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
import glob

In [None]:
def getDataAndLabels(features):
    labels_string = features.cancer_type
    le            = preprocessing.LabelEncoder()
    labels        = le.fit_transform(labels_string)

    # Get rid of the cancer type and patient_id columns 
    data = features[features.columns[3:]]
    return {'data': data, 'labels': labels , 'label_encoder': le }

In [None]:
print('Loading training data ...')

train_files = glob.glob("./data/features_*.train.csv")
all_train_data = {}
for filename in train_files:
    
    name = filename[16:-10]
    print(" ", name)
    train_features = pd.read_csv(filename)
    all_train_data[name] = getDataAndLabels(train_features)

print("done.")

In [None]:
print('Loading test data ...')

test_files = glob.glob("./data/features_*.test.csv")
all_test_data = {}
for filename in test_files:
    
    name = filename[16:-9]
    print(" ", name)
    test_features = pd.read_csv(filename)
    all_test_data[name] = getDataAndLabels(test_features)

print("done.")

In [None]:
for name in all_train_data.keys():
    print("************************")
    print(name)
    print("************************")

    train      = all_train_data[name]
    test       = all_test_data[name]

In [None]:
import tensorflow as tf
import tensorflow.keras as K
from tensorflow.keras.layers import Dense as Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Dropout

train = all_train_data['bestfit_with_topgenes']
test = all_test_data['bestfit_with_topgenes']
train_data = train['data']
train_labels = train['labels']
test_data = test['data']
test_labels = test['labels']
tr_lab = to_categorical(train_labels)
test_lab = to_categorical(test_labels)
model = K.Sequential()
model.add(Dense(2000, input_dim=train_data.shape[1], activation='relu', kernel_regularizer=regularizers.l1_l2(l2=0.01,l1=0.01)))
model.add(Dropout(0.2))
model.add(Dense(1000, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(400, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='sigmoid'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics = ["accuracy"])
model.fit(train_data, tr_lab, epochs=100, batch_size=100)
evaluate = model.evaluate(x = test_data, y = test_lab)
test_pred = model.predict(test_data)

nn_prf_scores = precision_recall_fscore_support(test_labels,np.argmax(test_pred,1), average='weighted')
nn_scores_by_label = precision_recall_fscore_support(test_labels,np.argmax(test_pred,1), average=None)
nn_macro = precision_recall_fscore_support(test_labels,np.argmax(test_pred,1), average='macro')
print("Precision = (Weighted) ", np.round(nn_prf_scores[0], 4))  
print("Recall = (Weighted) ", np.round(nn_prf_scores[1], 4))  
print("F1 = (Weighted) ", np.round(nn_prf_scores[2], 4)) 
print("Precision (Macro) = ", np.round(nn_macro[0], 4))  
print("Recall (Macro) = ", np.round(nn_macro[1], 4))  
print("F1 (Macro) = ", np.round(nn_macro[2], 4))
print ("Loss = ", str(evaluate[0]))
print ("Test Accuracy = ", str(evaluate[1]))