In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.utils import shuffle
tf.compat.v1.reset_default_graph()
tf.random.set_seed(1234) 
gene_last = pd.read_table('gene_last.txt', sep = "\t", header = None)
gene_last = gene_last.iloc[:,0]
gene_last
df_normal =pd.read_table('TCGA_PAAD_mRNA_normal.txt', sep = "\t")
df_tumor=pd.read_table('TCGA_PAAD_mRNA_tumor.txt', sep = "\t")
df_normal
df1 = df_normal.loc[gene_last]
df1 = pd.DataFrame(df1.values.T, index=df1.columns, columns=df1.index)
df1['Exited'] = 0
df2 = df_tumor.loc[gene_last]
df2 = pd.DataFrame(df2.values.T, index=df2.columns, columns=df2.index)
df2['Exited'] = 1 
train_normal, test_normal = train_test_split(df1, test_size=0.2, random_state=2)
train_tumor, test_tumor = train_test_split(df2, test_size=0.2, random_state=2)
train = pd.concat([train_normal, train_tumor])
test = pd.concat([test_normal, test_tumor])
train = shuffle(train)    
test = shuffle(test)
label_train = train[['Exited']]
label_test = test[['Exited']]
train = train[set(train.columns) ^ set(['Exited'])]
test = test[set(test.columns) ^ set(['Exited'])]
model = tf.keras.Sequential()
model.add(layers.Dense(400,input_dim=len(train.columns), activation = 'relu',))
model.add(layers.Dense(100, activation = 'relu'))
model.add(layers.Dense(40, activation = 'relu'))
model.add(layers.Dense(1,activation ='sigmoid'))
model.compile(loss = 'binary_crossentropy',
              optimizer = tf.keras.optimizers.Adam(learning_rate=0.00001),
              metrics = [tf.keras.metrics.BinaryAccuracy()]
             )
history = model.fit(
          train,
          label_train,
          validation_data = (test, label_test),
          epochs = 1000, 
          batch_size = 12)
result = pd.DataFrame(history.history)
result
plt.rcParams.update(plt.rcParamsDefault)
plt.figure(dpi = 160)
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.plot(history.history['binary_accuracy'])
plt.plot(history.history['val_binary_accuracy'])
print(np.mean(history.history['val_binary_accuracy']))
plt.legend(['train accuracy', 'valid accuracy'], loc='lower right')
plt.show()
plt.figure(dpi = 160)
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['train loss', 'valid loss'], loc='upper right')
plt.show()
import shap
shap.initjs()
explainer = shap.KernelExplainer(model, train)
shap_values = explainer.shap_values(test)
shap.summary_plot(shap_values, train, max_display = 20)
which = lambda lst:list(np.where(lst)[0])
weight_df = pd.DataFrame(shap_values[0])
weight_df.columns = train.columns
for col in weight_df.columns:
    weight_df.loc[:,col] = weight_df[col].abs()
weight_df['Index'] = 0
weight_df = weight_df.groupby('Index').mean()
weight_df = pd.DataFrame(weight_df.values.T, index = weight_df.columns, columns = weight_df.index)
weight_df.columns = ['weight']
weight_values = weight_df['weight'].sort_values(ascending = False)

weight_genes  = weight_values.index
weight_values
weight_values.to_csv('weight_values_TCGA_mRNA.txt',sep='\t')