# In this level, we will do the different iterations of the genes that we got after model interpretation using different interpretability methods -- GradientSHAP, DeepLIFT, and IntegratedGradients.

In [1]:
# https://stackoverflow.com/questions/21971449/how-do-i-increase-the-cell-width-of-the-jupyter-ipython-notebook-in-my-browser

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:98% !important; }</style>"))

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import roc_curve, auc, plot_confusion_matrix, plot_precision_recall_curve, classification_report

from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

from tqdm.notebook import tqdm

plt.style.use('seaborn-whitegrid')

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [3]:
import os
import gc
import time
import copy
import torch
import model_utils as u
import model_evaluation as me

In [4]:
torch.use_deterministic_algorithms(True)
os.environ["CUBLAS_WORKSPACE_CONFIG"]=":16:8"

In [5]:
import warnings
warnings.simplefilter('ignore')

In [6]:
# https://pytorch.org/docs/stable/notes/randomness.html
seed = 322
u.set_all_seeds(seed)

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

---
---
---

In [8]:
PATH = "D:/CANCER BIOLOGY/DATASET/TCGA/FROM Xena/"

In [9]:
# https://stackoverflow.com/questions/18885175/read-a-zipped-file-as-a-pandas-dataframe
# https://www.analyticsvidhya.com/blog/2021/04/delimiters-in-pandas-read_csv-function/

df_luad = pd.read_csv(PATH+"TCGA.LUAD.sampleMap_HiSeqV2_PANCAN.gz", compression = "gzip", sep = "\t")
df_lusu = pd.read_csv(PATH+"TCGA.LUSC.sampleMap_HiSeqV2_PANCAN.gz", compression = "gzip", sep = "\t")

In [10]:
df, labels, columns = u.dataset_preprocess(df_luad, df_lusu)

Unnamed: 0,label,ARHGEF10L,HIF3A,RNF17,RNF10,RNF11,RNF13,GTF2IP1,REM1,MTVR2,...,TULP2,NPY5R,GNGT2,GNGT1,TULP3,PTRF,BCL6B,GSTK1,SELP,SELS
TCGA-69-7978-01,1,0.125808,-1.29493,-0.112935,-1.41187,0.203922,0.0993901,-0.222094,0.504354,-0.423399,...,1.13472,-0.845117,1.76017,-1.28139,0.224623,0.863614,0.802173,0.108205,0.595367,-0.222712
TCGA-62-8399-01,1,0.561708,6.06917,-0.531035,-0.228672,0.0521219,-1.20601,-0.338894,1.44985,0.0394006,...,-0.286078,-0.055517,-0.0282335,0.0480102,0.295223,0.317114,1.07907,-0.782695,3.11427,-0.388912
TCGA-78-7539-01,1,-0.237592,3.58147,0.592065,-0.108372,-0.499978,-0.0254099,0.163006,0.131654,-0.0508994,...,2.20992,-1.58712,1.56537,2.63871,0.0491232,-1.24309,-1.28323,0.0341054,0.571467,0.233588
TCGA-50-5931-11,1,-1.18049,3.92767,0.291065,-0.043472,0.710822,1.05089,-0.564394,1.68395,0.398701,...,-0.748878,0.900483,1.90357,-1.28139,-0.549277,2.80271,2.25047,-0.540795,3.98597,0.370988
TCGA-73-4658-01,1,-0.656192,-0.525926,-0.531035,-0.156672,0.373522,0.44729,-0.438994,1.83155,-0.423399,...,-0.748878,-0.931417,1.91517,-0.17659,0.0423232,1.11071,1.51397,-0.454095,2.89317,0.193788
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TCGA-85-A512-01,0,-1.28699,-1.86343,-0.531035,0.020328,-0.560978,0.51729,0.519806,-0.748146,-0.423399,...,-0.748878,-1.58712,-0.236033,1.41071,1.65372,0.434414,-0.491427,0.318305,-1.01143,0.392288
TCGA-85-8354-01,0,-1.84779,-1.29733,-0.531035,-0.280072,-0.560278,0.85009,-0.166294,-1.51925,0.602101,...,0.276622,-1.58712,-0.784133,1.46201,0.765023,1.24691,-0.0700266,-0.620695,-1.39033,0.918888
TCGA-O2-A5IB-01,0,0.568508,0.0259737,0.084865,-0.00937199,-0.136278,-1.41811,0.173506,-2.19405,0.622701,...,-0.408378,-1.24662,-1.94053,-1.28139,1.28172,-2.42559,-0.734427,-1.92679,-2.12163,-0.637412
TCGA-77-7335-01,0,-0.849892,-2.41413,0.192865,0.197228,0.443922,0.54789,0.0844055,1.01505,-0.423399,...,-0.748878,-1.58712,2.16037,1.19391,0.197023,1.55011,0.294773,0.369005,1.90927,0.117688


---
---
---

# case 1: individual XAI methods genes list

In [11]:
output_path = PATH+"SECOND_ITERATION/cumulative_lists/"

In [12]:
gene_list_path = PATH+"SECOND_ITERATION/"


ig_50, ig_100, ig_150 = [],[],[]
gs_50, gs_100, gs_150 = [],[],[]
dl_50, dl_100, dl_150 = [],[],[]


for seeds in range(316, 326):
    
    '''
    IntegratedGradients
    '''
    ## 50_genes
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_50_genes_luad', "r") as file:
        ig_genes_luad = []
        for gene in file:
            gene=gene.strip()
            ig_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_50_genes_lusu', "r") as file:
        ig_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            ig_genes_lusu.append(gene)
    common_genes = set(ig_genes_luad) | set(ig_genes_lusu)
    for gene in common_genes:
        ig_50.append(gene)
    
    
    ## 100_genes
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_100_genes_luad', "r") as file:
        ig_genes_luad = []
        for gene in file:
            gene=gene.strip()
            ig_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_100_genes_lusu', "r") as file:
        ig_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            ig_genes_lusu.append(gene)
    common_genes = set(ig_genes_luad) | set(ig_genes_lusu)
    for gene in common_genes:
        ig_100.append(gene)
    
    
    ## 150_genes
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_150_genes_luad', "r") as file:
        ig_genes_luad = []
        for gene in file:
            gene=gene.strip()
            ig_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_150_genes_lusu', "r") as file:
        ig_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            ig_genes_lusu.append(gene)
    common_genes = set(ig_genes_luad) | set(ig_genes_lusu)
    for gene in common_genes:
        ig_150.append(gene)
    
    
    '''
    GradientSHAP
    '''
## 50_genes
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_50_genes_luad', "r") as file:
        gs_genes_luad = []
        for gene in file:
            gene=gene.strip()
            gs_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_50_genes_lusu', "r") as file:
        gs_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            gs_genes_lusu.append(gene)
    common_genes = set(gs_genes_luad) | set(gs_genes_lusu)
    for gene in common_genes:
        gs_50.append(gene)
    
    
    ## 100_genes
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_100_genes_luad', "r") as file:
        gs_genes_luad = []
        for gene in file:
            gene=gene.strip()
            gs_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_100_genes_lusu', "r") as file:
        gs_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            gs_genes_lusu.append(gene)
    common_genes = set(gs_genes_luad) | set(gs_genes_lusu)
    for gene in common_genes:
        gs_100.append(gene)
    
    
    ## 150_genes
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_150_genes_luad', "r") as file:
        gs_genes_luad = []
        for gene in file:
            gene=gene.strip()
            gs_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_150_genes_lusu', "r") as file:
        gs_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            gs_genes_lusu.append(gene)
    common_genes = set(gs_genes_luad) | set(gs_genes_lusu)
    for gene in common_genes:
        gs_150.append(gene)
    
    
    '''
    DeepLIFT
    '''
    ## 50_genes
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_50_genes_luad', "r") as file:
        dl_genes_luad = []
        for gene in file:
            gene=gene.strip()
            dl_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_50_genes_lusu', "r") as file:
        dl_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            dl_genes_lusu.append(gene)
    common_genes = set(dl_genes_luad) | set(dl_genes_lusu)
    for gene in common_genes:
        dl_50.append(gene)
    
    
    ## 100_genes
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_100_genes_luad', "r") as file:
        dl_genes_luad = []
        for gene in file:
            gene=gene.strip()
            dl_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_100_genes_lusu', "r") as file:
        dl_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            dl_genes_lusu.append(gene)
    common_genes = set(dl_genes_luad) | set(dl_genes_lusu)
    for gene in common_genes:
        dl_100.append(gene)
    
    
    ## 150_genes
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_150_genes_luad', "r") as file:
        dl_genes_luad = []
        for gene in file:
            gene=gene.strip()
            dl_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_150_genes_lusu', "r") as file:
        dl_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            dl_genes_lusu.append(gene)
    common_genes = set(dl_genes_luad) | set(dl_genes_lusu)
    for gene in common_genes:
        dl_150.append(gene)
            
    

'''
Write all the list in a file
'''

with open(output_path+"ig_50.kd", "w") as file:
    for gene in list(ig_50):
        file.write("%s\n" % gene)
with open(output_path+"ig_100.kd", "w") as file:
    for gene in list(ig_100):
        file.write("%s\n" % gene)
with open(output_path+"ig_150.kd", "w") as file:
    for gene in list(ig_150):
        file.write("%s\n" % gene)

with open(output_path+"gs_50.kd", "w") as file:
    for gene in list(gs_50):
        file.write("%s\n" % gene)
with open(output_path+"gs_100.kd", "w") as file:
    for gene in list(gs_100):
        file.write("%s\n" % gene)
with open(output_path+"gs_150.kd", "w") as file:
    for gene in list(gs_150):
        file.write("%s\n" % gene)
        
with open(output_path+"dl_50.kd", "w") as file:
    for gene in list(dl_50):
        file.write("%s\n" % gene)
with open(output_path+"dl_100.kd", "w") as file:
    for gene in list(dl_100):
        file.write("%s\n" % gene)
with open(output_path+"dl_150.kd", "w") as file:
    for gene in list(dl_150):
        file.write("%s\n" % gene)

    

print('Individual cases done !!')

Individual cases done !!


# case 2 & 3: IG+GS, IG+DL, DL+GS (50, 100, 150) IG.GS, IG.DL, DL.GS

In [13]:
## union cases
ig_dl_50, ig_dl_100, ig_dl_150 = [],[],[]
gs_dl_50, gs_dl_100, gs_dl_150 = [],[],[]
gs_ig_50, gs_ig_100, gs_ig_150 = [],[],[]

# intersection cases
prod_ig_dl_50, prod_ig_dl_100, prod_ig_dl_150 = [],[],[]
prod_gs_dl_50, prod_gs_dl_100, prod_gs_dl_150 = [],[],[]
prod_gs_ig_50, prod_gs_ig_100, prod_gs_ig_150 = [],[],[]



gene_list_path = PATH+"SECOND_ITERATION/"
for seeds in range(316, 326):
    
    
    '''
    IntegratedGradients + GradientSHAP for top_50
    '''
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_50_genes_luad', "r") as file:
        ig_genes_luad = []
        for gene in file:
            gene=gene.strip()
            ig_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_50_genes_lusu', "r") as file:
        ig_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            ig_genes_lusu.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_50_genes_luad', "r") as file:
        gs_genes_luad = []
        for gene in file:
            gene=gene.strip()
            gs_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_50_genes_lusu', "r") as file:
        gs_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            gs_genes_lusu.append(gene)
            
    ## union
    common_genes_luad = set(ig_genes_luad) | set(gs_genes_luad)
    common_genes_lusu = set(ig_genes_lusu) | set(gs_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        gs_ig_50.append(gene)
    
    ## intersection
    common_genes_luad = set(ig_genes_luad) & set(gs_genes_luad)
    common_genes_lusu = set(ig_genes_lusu) & set(gs_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        prod_gs_ig_50.append(gene)

    
    '''
    IntegratedGradients + GradientSHAP for top_100
    '''
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_100_genes_luad', "r") as file:
        ig_genes_luad = []
        for gene in file:
            gene=gene.strip()
            ig_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_100_genes_lusu', "r") as file:
        ig_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            ig_genes_lusu.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_100_genes_luad', "r") as file:
        gs_genes_luad = []
        for gene in file:
            gene=gene.strip()
            gs_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_100_genes_lusu', "r") as file:
        gs_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            gs_genes_lusu.append(gene)
            
    ## union
    common_genes_luad = set(ig_genes_luad) | set(gs_genes_luad)
    common_genes_lusu = set(ig_genes_lusu) | set(gs_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        gs_ig_100.append(gene)
    
    ## intersection
    common_genes_luad = set(ig_genes_luad) & set(gs_genes_luad)
    common_genes_lusu = set(ig_genes_lusu) & set(gs_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        prod_gs_ig_100.append(gene)
        
    '''
    IntegratedGradients + GradientSHAP for top_150
    '''
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_150_genes_luad', "r") as file:
        ig_genes_luad = []
        for gene in file:
            gene=gene.strip()
            ig_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_150_genes_lusu', "r") as file:
        ig_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            ig_genes_lusu.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_150_genes_luad', "r") as file:
        gs_genes_luad = []
        for gene in file:
            gene=gene.strip()
            gs_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_150_genes_lusu', "r") as file:
        gs_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            gs_genes_lusu.append(gene)
            
    ## union
    common_genes_luad = set(ig_genes_luad) | set(gs_genes_luad)
    common_genes_lusu = set(ig_genes_lusu) | set(gs_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        gs_ig_150.append(gene)
    
    ## intersection
    common_genes_luad = set(ig_genes_luad) & set(gs_genes_luad)
    common_genes_lusu = set(ig_genes_lusu) & set(gs_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        prod_gs_ig_150.append(gene)
        
        
        
    ## =================================================================================
 
    
    '''
    IntegratedGradients + DeepLIFT for top_50
    '''
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_50_genes_luad', "r") as file:
        ig_genes_luad = []
        for gene in file:
            gene=gene.strip()
            ig_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_50_genes_lusu', "r") as file:
        ig_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            ig_genes_lusu.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_50_genes_luad', "r") as file:
        dl_genes_luad = []
        for gene in file:
            gene=gene.strip()
            dl_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_50_genes_lusu', "r") as file:
        dl_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            dl_genes_lusu.append(gene)
            
    ## union
    common_genes_luad = set(ig_genes_luad) | set(dl_genes_luad)
    common_genes_lusu = set(ig_genes_lusu) | set(dl_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        ig_dl_50.append(gene)
    
    ## intersection
    common_genes_luad = set(ig_genes_luad) & set(dl_genes_luad)
    common_genes_lusu = set(ig_genes_lusu) & set(dl_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        prod_ig_dl_50.append(gene)

    
    '''
    IntegratedGradients + DeepLIFT for top_100
    '''
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_100_genes_luad', "r") as file:
        ig_genes_luad = []
        for gene in file:
            gene=gene.strip()
            ig_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_100_genes_lusu', "r") as file:
        ig_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            ig_genes_lusu.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_100_genes_luad', "r") as file:
        dl_genes_luad = []
        for gene in file:
            gene=gene.strip()
            dl_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_100_genes_lusu', "r") as file:
        dl_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            dl_genes_lusu.append(gene)
            
    ## union
    common_genes_luad = set(ig_genes_luad) | set(dl_genes_luad)
    common_genes_lusu = set(ig_genes_lusu) | set(dl_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        ig_dl_100.append(gene)
    
    ## intersection
    common_genes_luad = set(ig_genes_luad) & set(dl_genes_luad)
    common_genes_lusu = set(ig_genes_lusu) & set(dl_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        prod_ig_dl_100.append(gene)
        
    '''
    IntegratedGradients + DeepLIFT for top_150
    '''
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_150_genes_luad', "r") as file:
        ig_genes_luad = []
        for gene in file:
            gene=gene.strip()
            ig_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_150_genes_lusu', "r") as file:
        ig_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            ig_genes_lusu.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_150_genes_luad', "r") as file:
        dl_genes_luad = []
        for gene in file:
            gene=gene.strip()
            dl_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_150_genes_lusu', "r") as file:
        dl_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            dl_genes_lusu.append(gene)
            
    ## union
    common_genes_luad = set(ig_genes_luad) | set(dl_genes_luad)
    common_genes_lusu = set(ig_genes_lusu) | set(dl_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        ig_dl_150.append(gene)
    
    ## intersection
    common_genes_luad = set(ig_genes_luad) & set(dl_genes_luad)
    common_genes_lusu = set(ig_genes_lusu) & set(dl_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        prod_ig_dl_150.append(gene)
        
        
        
    ## =================================================================================
        
    '''
    GradientSHAP + DeepLIFT for top_50
    '''
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_50_genes_luad', "r") as file:
        gs_genes_luad = []
        for gene in file:
            gene=gene.strip()
            gs_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_50_genes_lusu', "r") as file:
        gs_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            gs_genes_lusu.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_50_genes_luad', "r") as file:
        dl_genes_luad = []
        for gene in file:
            gene=gene.strip()
            dl_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_50_genes_lusu', "r") as file:
        dl_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            dl_genes_lusu.append(gene)
            
    ## union
    common_genes_luad = set(gs_genes_luad) | set(dl_genes_luad)
    common_genes_lusu = set(gs_genes_lusu) | set(dl_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        gs_dl_50.append(gene)
    
    ## intersection
    common_genes_luad = set(gs_genes_luad) & set(dl_genes_luad)
    common_genes_lusu = set(gs_genes_lusu) & set(dl_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        prod_gs_dl_50.append(gene)

    
    '''
    GradientSHAP + DeepLIFT for top_100
    '''
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_100_genes_luad', "r") as file:
        gs_genes_luad = []
        for gene in file:
            gene=gene.strip()
            gs_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_100_genes_lusu', "r") as file:
        gs_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            gs_genes_lusu.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_100_genes_luad', "r") as file:
        dl_genes_luad = []
        for gene in file:
            gene=gene.strip()
            dl_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_100_genes_lusu', "r") as file:
        dl_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            dl_genes_lusu.append(gene)
            
    ## union
    common_genes_luad = set(gs_genes_luad) | set(dl_genes_luad)
    common_genes_lusu = set(gs_genes_lusu) | set(dl_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        gs_dl_100.append(gene)
    
    ## intersection
    common_genes_luad = set(gs_genes_luad) & set(dl_genes_luad)
    common_genes_lusu = set(gs_genes_lusu) & set(dl_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        prod_gs_dl_100.append(gene)
        
    '''
    GradientSHAP + DeepLIFT for top_150
    '''
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_150_genes_luad', "r") as file:
        gs_genes_luad = []
        for gene in file:
            gene=gene.strip()
            gs_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_150_genes_lusu', "r") as file:
        gs_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            gs_genes_lusu.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_150_genes_luad', "r") as file:
        dl_genes_luad = []
        for gene in file:
            gene=gene.strip()
            dl_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_150_genes_lusu', "r") as file:
        dl_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            dl_genes_lusu.append(gene)
            
    ## union
    common_genes_luad = set(gs_genes_luad) | set(dl_genes_luad)
    common_genes_lusu = set(gs_genes_lusu) | set(dl_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        gs_dl_150.append(gene)
    
    ## intersection
    common_genes_luad = set(gs_genes_luad) & set(dl_genes_luad)
    common_genes_lusu = set(gs_genes_lusu) & set(dl_genes_lusu)
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        prod_gs_dl_150.append(gene)
        
        
    ## =================================================================================
    ## =================================================================================
    
        
with open(output_path+"gs_ig_50.kd", "w") as file:
    for gene in list(gs_ig_50):
        file.write("%s\n" % gene)
with open(output_path+"gs_ig_100.kd", "w") as file:
    for gene in list(gs_ig_100):
        file.write("%s\n" % gene)
with open(output_path+"gs_ig_150.kd", "w") as file:
    for gene in list(gs_ig_150):
        file.write("%s\n" % gene)
        
with open(output_path+"ig_dl_50.kd", "w") as file:
    for gene in list(ig_dl_50):
        file.write("%s\n" % gene)
with open(output_path+"ig_dl_100.kd", "w") as file:
    for gene in list(ig_dl_100):
        file.write("%s\n" % gene)
with open(output_path+"ig_dl_150.kd", "w") as file:
    for gene in list(ig_dl_150):
        file.write("%s\n" % gene)
        
with open(output_path+"gs_dl_50.kd", "w") as file:
    for gene in list(gs_dl_50):
        file.write("%s\n" % gene)
with open(output_path+"gs_dl_100.kd", "w") as file:
    for gene in list(gs_dl_100):
        file.write("%s\n" % gene)
with open(output_path+"gs_dl_150.kd", "w") as file:
    for gene in list(gs_dl_150):
        file.write("%s\n" % gene)

        
        
        
        
with open(output_path+"prod_gs_ig_50.kd", "w") as file:
    for gene in list(prod_gs_ig_50):
        file.write("%s\n" % gene)
with open(output_path+"prod_gs_ig_100.kd", "w") as file:
    for gene in list(prod_gs_ig_100):
        file.write("%s\n" % gene)
with open(output_path+"prod_gs_ig_150.kd", "w") as file:
    for gene in list(prod_gs_ig_150):
        file.write("%s\n" % gene)
        
with open(output_path+"prod_ig_dl_50.kd", "w") as file:
    for gene in list(prod_ig_dl_50):
        file.write("%s\n" % gene)
with open(output_path+"prod_ig_dl_100.kd", "w") as file:
    for gene in list(prod_ig_dl_100):
        file.write("%s\n" % gene)
with open(output_path+"prod_ig_dl_150.kd", "w") as file:
    for gene in list(prod_ig_dl_150):
        file.write("%s\n" % gene)
        
with open(output_path+"prod_gs_dl_50.kd", "w") as file:
    for gene in list(prod_gs_dl_50):
        file.write("%s\n" % gene)
with open(output_path+"prod_gs_dl_100.kd", "w") as file:
    for gene in list(prod_gs_dl_100):
        file.write("%s\n" % gene)
with open(output_path+"prod_gs_dl_150.kd", "w") as file:
    for gene in list(prod_gs_dl_150):
        file.write("%s\n" % gene)

print("Case 2 and 3 done !!")

Case 2 and 3 done !!


# case 4: IG+DL+GS (50, 100, 150) IG.DL.GS

In [14]:
## union cases
ig_dl_gs_50, ig_dl_gs_100, ig_dl_gs_150 = [],[],[]

# intersection cases
prod_ig_dl_gs_50, prod_ig_dl_gs_100, prod_ig_dl_gs_150 = [],[],[]


gene_list_path = PATH+"SECOND_ITERATION/"
for seeds in range(316, 326):
    
    
    '''
    IntegratedGradients + GradientSHAP + DeepLIFT for top_50
    '''
    ig_genes, gs_genes, dl_genes = [],[],[]
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_50_genes_luad', "r") as file:
        ig_genes_luad = []
        for gene in file:
            gene=gene.strip()
            ig_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_50_genes_lusu', "r") as file:
        ig_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            ig_genes_lusu.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_50_genes_luad', "r") as file:
        gs_genes_luad = []
        for gene in file:
            gene=gene.strip()
            gs_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_50_genes_lusu', "r") as file:
        gs_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            gs_genes_lusu.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_50_genes_luad', "r") as file:
        dl_genes_luad = []
        for gene in file:
            gene=gene.strip()
            dl_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_50_genes_lusu', "r") as file:
        dl_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            dl_genes_lusu.append(gene)
    
    ## union
    common_genes_luad = set(ig_genes_luad) | set(gs_genes_luad) | set(dl_genes_luad) 
    common_genes_lusu = set(ig_genes_lusu) | set(gs_genes_lusu) | set(dl_genes_lusu) 
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        ig_dl_gs_50.append(gene)
    
    ## intersection
    common_genes_luad = set(ig_genes_luad) & set(gs_genes_luad) & set(dl_genes_luad) 
    common_genes_lusu = set(ig_genes_lusu) & set(gs_genes_lusu) & set(dl_genes_lusu) 
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        prod_ig_dl_gs_50.append(gene)
        
        
    
    '''
    IntegratedGradients + GradientSHAP + DeepLIFT for top_100
    '''
    ig_genes, gs_genes, dl_genes = [],[],[]
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_100_genes_luad', "r") as file:
        ig_genes_luad = []
        for gene in file:
            gene=gene.strip()
            ig_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_100_genes_lusu', "r") as file:
        ig_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            ig_genes_lusu.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_100_genes_luad', "r") as file:
        gs_genes_luad = []
        for gene in file:
            gene=gene.strip()
            gs_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_100_genes_lusu', "r") as file:
        gs_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            gs_genes_lusu.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_100_genes_luad', "r") as file:
        dl_genes_luad = []
        for gene in file:
            gene=gene.strip()
            dl_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_100_genes_lusu', "r") as file:
        dl_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            dl_genes_lusu.append(gene)
    
    ## union
    common_genes_luad = set(ig_genes_luad) | set(gs_genes_luad) | set(dl_genes_luad) 
    common_genes_lusu = set(ig_genes_lusu) | set(gs_genes_lusu) | set(dl_genes_lusu) 
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        ig_dl_gs_100.append(gene)
    
    ## intersection
    common_genes_luad = set(ig_genes_luad) & set(gs_genes_luad) & set(dl_genes_luad) 
    common_genes_lusu = set(ig_genes_lusu) & set(gs_genes_lusu) & set(dl_genes_lusu) 
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        prod_ig_dl_gs_100.append(gene)
        
        
    
    '''
    IntegratedGradients + GradientSHAP + DeepLIFT for top_150
    '''
    ig_genes, gs_genes, dl_genes = [],[],[]
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_150_genes_luad', "r") as file:
        ig_genes_luad = []
        for gene in file:
            gene=gene.strip()
            ig_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/ig_top_150_genes_lusu', "r") as file:
        ig_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            ig_genes_lusu.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_150_genes_luad', "r") as file:
        gs_genes_luad = []
        for gene in file:
            gene=gene.strip()
            gs_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/gs_top_150_genes_lusu', "r") as file:
        gs_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            gs_genes_lusu.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_150_genes_luad', "r") as file:
        dl_genes_luad = []
        for gene in file:
            gene=gene.strip()
            dl_genes_luad.append(gene)
    with open(gene_list_path+'seed='+str(seeds)+'/dl_top_150_genes_lusu', "r") as file:
        dl_genes_lusu = []
        for gene in file:
            gene=gene.strip()
            dl_genes_lusu.append(gene)
    
    ## union
    common_genes_luad = set(ig_genes_luad) | set(gs_genes_luad) | set(dl_genes_luad) 
    common_genes_lusu = set(ig_genes_lusu) | set(gs_genes_lusu) | set(dl_genes_lusu) 
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        ig_dl_gs_150.append(gene)
    
    ## intersection
    common_genes_luad = set(ig_genes_luad) & set(gs_genes_luad) & set(dl_genes_luad) 
    common_genes_lusu = set(ig_genes_lusu) & set(gs_genes_lusu) & set(dl_genes_lusu) 
    common_genes = set(common_genes_luad) | set(common_genes_lusu)
    for gene in common_genes:
        prod_ig_dl_gs_150.append(gene)

        
        
        ## =================================================================================
        
        
with open(output_path+"ig_dl_gs_50.kd", "w") as file:
    for gene in list(ig_dl_gs_50):
        file.write("%s\n" % gene)
with open(output_path+"ig_dl_gs_100.kd", "w") as file:
    for gene in list(ig_dl_gs_100):
        file.write("%s\n" % gene)
with open(output_path+"ig_dl_gs_150.kd", "w") as file:
    for gene in list(ig_dl_gs_150):
        file.write("%s\n" % gene)
        
        
with open(output_path+"prod_ig_dl_gs_50.kd", "w") as file:
    for gene in list(prod_ig_dl_gs_50):
        file.write("%s\n" % gene)
with open(output_path+"prod_ig_dl_gs_100.kd", "w") as file:
    for gene in list(prod_ig_dl_gs_100):
        file.write("%s\n" % gene)
with open(output_path+"prod_ig_dl_gs_150.kd", "w") as file:
    for gene in list(prod_ig_dl_gs_150):
        file.write("%s\n" % gene)
        
        
print('Case 4 done.')

Case 4 done.


---
---
---