In [None]:
%matplotlib inline
import tensorflow as tf
import numpy as np
from pandas_plink import read_plink
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from joblib import Parallel, delayed
from sklearn import metrics
from math import sqrt
import lime
import lime.lime_tabular
from lime import submodular_pick
import random
from sklearn.metrics import roc_curve,roc_auc_score

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import auc
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score
from sklearn.preprocessing import OneHotEncoder
from deepexplain.tensorflow import DeepExplain

In [None]:
''' Parameters for experiment '''
threshold="0.001"
path_logs="/work/breastcancer/clean_test/logs/"
path_to_files="/work/breastcancer/clean_test/"

In [None]:
''' getting bim,fam,bed for training,validation and test sets '''
(bim, fam, bed)=read_plink(path_to_files+"train/sig"+threshold)
(bim2, fam2, bed2)=read_plink(path_to_files+"validation/val"+threshold)
(bim3, fam3, bed3)=read_plink(path_to_files+"test/test"+threshold)

In [None]:
''' Creating arrays with optimal data structure and filling missing values with 2--> Homozygous major '''
print("Convertion")
bed=bed.astype('uint8')
print("Compute")
X=bed.compute()
print("Filling Null Data")
X[np.isnan(X)]=2
#validation
print("Convertion")
bed2=bed2.astype('uint8')
print("Compute")
X_val=bed2.compute()
print("Filling Null Data")
X_val[np.isnan(X_val)]=2
#test
print("Convertion")
bed3=bed3.astype('uint8')
print("Compute")
X_test=bed3.compute()
print("Filling Null Data")
X_test[np.isnan(X_test)]=2

''' Preparing data.shape=(individuals,SNP) '''
#train
Y=fam["trait"].astype("int")
Y=Y-1
Xdf=pd.DataFrame(X.T)
Xdf["Y"]=Y

#validation
Y_val=fam2["trait"].astype("int")
Y_val=Y_val-1
Xdf_val=pd.DataFrame(X_val.T)
Xdf_val["Y"]=Y_val

#test
Y_test=fam3["trait"].astype("int")
Y_test=Y_test-1
Xdf_test=pd.DataFrame(X_test.T)
Xdf_test["Y"]=Y_test

''' Getting np arrays '''
x_train=Xdf.drop(['Y'],axis=1).values
y_train=Xdf[['Y']].values

x_val=Xdf_val.drop(['Y'],axis=1).values
y_val=Xdf_val[['Y']].values

x_test=Xdf_test.drop(['Y'],axis=1).values
y_test=Xdf_test[['Y']].values

In [None]:
'''Creating a Graph'''

In [None]:
tf.reset_default_graph()

In [None]:
sess.close()

In [None]:
sess = tf.InteractiveSession(config=tf.ConfigProto(device_count={ "CPU": 44}))


In [None]:
X=tf.placeholder(tf.float32,shape=(None,5273),name="X")
X_random=tf.placeholder(tf.float32,shape=(None,1),name="X")
Y=tf.placeholder(tf.float32,shape=(None,1),name="Y")

In [None]:
'''Instanciating graph in DeepExplain context'''
with DeepExplain(session=sess) as de:  # < enter DeepExplain context
    training = tf.placeholder_with_default(False, shape=(), name='training')
    initializer = tf.contrib.layers.xavier_initializer()
    hidden00_drop= tf.layers.dropout(X, 0.5, training=training)
    hidden01=tf.layers.dense(hidden00_drop, 1000, name="hidden01",activation=None, kernel_initializer=initializer)
    hidden01_norm=tf.layers.batch_normalization(hidden01, training=training, momentum=0.9)
    act_hidden01=tf.nn.leaky_relu(hidden01_norm)
    hidden01_drop = tf.layers.dropout(act_hidden01, 0.5, training=training)
    hidden0=tf.layers.dense(hidden01_drop, 250, name="hidden0",activation=None, kernel_initializer=initializer)
    hidden0_norm=tf.layers.batch_normalization(hidden0, training=training, momentum=0.9)
    act_hidden0=tf.nn.leaky_relu(hidden0_norm)
    hidden0_drop = tf.layers.dropout(act_hidden0, 0.5, training=training)
    hidden1=tf.layers.dense(hidden0_drop, 50, name="hidden1",activation=None, kernel_initializer=initializer)
    hidden1_norm=tf.layers.batch_normalization(hidden1, training=training, momentum=0.9)
    act_hidden1=tf.nn.leaky_relu(hidden1_norm)
    hidden1_drop = tf.layers.dropout(act_hidden1, 0.5, training=training)
    output=tf.layers.dense(  hidden1_drop, 1, name="output_final",activation=None)
    saver=tf.train.Saver()
    saver.restore(sess, path_logs+"preprocessed001.ckpt")
    pred=sess.run(tf.nn.sigmoid(output),feed_dict={X:x_test,Y:y_test})
    #0.67==Threshold whre precision=90%
    important1=np.argwhere(pred==pred[pred>0.67])[:,0]
    pred=sess.run(tf.nn.sigmoid(output),feed_dict={X:x_val,Y:y_val})
    important2=np.argwhere(pred==pred[pred>0.67])[:,0]
    explanation_test=de.explain('deeplift', output*y_test[important1], X, x_test[important1])
    explanation_val=de.explain('deeplift', output*y_val[important2], X, x_val[important2])

In [None]:
average_test_explanation=np.mean(np.array(explanation_test),axis=0)
average_val_explanation=np.mean(np.array(explanation_val),axis=0)

In [None]:
np.argwhere(pred==pred[pred>0.67]).shape

In [None]:
fig,ax=plt.subplots(figsize=(20,10))
ax.bar([x for x  in range(aa.shape[0])],np.absolute(average_test_explanation))
ax.set_xlabel("SNPs")
ax.set_ylabel("Importance absolute value")
plt.title("SNPs Importance using DeepLift test")

In [None]:
fig,ax=plt.subplots(figsize=(20,10))
ax.bar([x for x  in range(average_val_explanation.shape[0])],np.absolute(average_val_explanation))
ax.set_xlabel("SNPs")
ax.set_ylabel("Importance absolute value")
plt.title("SNPs Importance using DeepLift val")