In [None]:
import numpy as np
import numpy.matlib
import pandas as pd
import matplotlib.pyplot as plt

import PHD.cnv_model_utils as u

import torch
import torchvision
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler

from sklearn.linear_model import Lasso
from sklearn.model_selection import cross_val_score,StratifiedKFold


%matplotlib inline

In [None]:
import warnings
warnings.simplefilter('ignore')

In [None]:
# https://pytorch.org/docs/stable/notes/randomness.html
seed = u.get_seed()
print('Seed = ', seed)
u.set_all_seeds(seed)

In [None]:
PATH = 'D:/CANCER BIOLOGY/DATASET/CNV/TCGA/FROM Xena/'

In [None]:
df_final = pd.read_csv(PATH+'TCGA_XENA_LUAD_LUSC_CNV_dataset_preprocessed.gz', sep='\t', compression='gzip')
df_final = df_final.sample(frac=1, random_state=seed).reset_index(drop=True)
labels = list(df_final['label'])
df_final.drop(columns=['label'], axis=1, inplace=True) ## drop column sample_id and label
columns = list(df_final.columns)

In [None]:
xtrain = df_final.to_numpy()
ytrain = labels

In [None]:
device = torch.device('cuda' if  torch.cuda.is_available() else 'cpu')
device

In [None]:
input_dim = xtrain.shape[1]
output_dim = 256

---
---
---

In [None]:
class Network(nn.Module):
    
    def __init__(self, input_dim, output_dim):
        super().__init__()
        
        self.layer1 = nn.Linear(input_dim, 4096)
        self.norm1 = nn.Dropout(0.4)
        self.relu1 = nn.ReLU()
        
        self.layer2 = nn.Linear(4096, 2048)
        self.norm2 = nn.Dropout(0.3)
        self.relu2 = nn.ReLU()
        
        self.layer3 = nn.Linear(2048, 1024)
        self.norm3 = nn.Dropout(0.2)
        self.relu3 = nn.ReLU()
        
        self.layer4 = nn.Linear(1024, 512)
        self.norm4 = nn.Dropout(0.1)
        self.relu4 = nn.ReLU()
        
        self.layer5 = nn.Linear(512, output_dim)
        self.relu5 = nn.ReLU()
        
        self.layer6 = nn.Linear(output_dim, 2)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        
        layer1 = self.layer1(x)
        norm1 = self.norm1(layer1)
        relu1 = self.relu1(norm1)
        
        layer2 = self.layer2(relu1)
        norm2 = self.norm2(layer2)
        relu2 = self.relu2(norm2)
        
        layer3 = self.layer3(relu2)
        norm3 = self.norm3(layer3)
        relu3 = self.relu3(norm3)
        
        layer4 = self.layer4(relu3)
        norm4 = self.norm4(layer4)
        relu4 = self.relu4(norm4)
        
        layer5 = self.layer5(relu4)
        relu5 = self.relu5(layer5)
        
        layer6 = self.layer6(relu5)
        sigmoid = self.sigmoid(layer6)
        return sigmoid
        

---
---
---

In [None]:
'''
Load both LASSO and XGB model

'''
model_lasso = Network(input_dim, output_dim).to(device)
model_xgb = Network(input_dim, output_dim).to(device)
model_lasso.load_state_dict(torch.load(PATH+"models/CUSTOM_CNV_NETWORK_lasso.kd"))
model_xgb.load_state_dict(torch.load(PATH+"models/CUSTOM_CNV_NETWORK_xgb.kd"))

---
---
---

In [None]:
from captum.attr import IntegratedGradients, GradientShap, DeepLift

In [None]:
def get_top_genes_from_attributions(attr1, attr0, columns, seed, algo):
    attr0 = torch.abs(attr1)
    attr1 = torch.abs(attr0)
    attr = torch.add(attr0, attr1)
    mean_attr = torch.mean(attr, axis=0)
    temp_df = pd.DataFrame(mean_attr.detach().cpu().numpy(), columns=['mean_score'])
    temp_df.insert(0, 'genes', columns)
    temp_df.to_csv(PATH+'models/top genes/'+str(seed)+algo+'.csv', index=False)
    return temp_df

In [None]:
inputs = torch.tensor(xtrain).float().to(device)

In [None]:
inputs.requires_grad = True

In [None]:
baselines = torch.Tensor(np.zeros(xtrain.shape)).to(device)

# LASSO

In [None]:
## IntegratedGradients (baselines)

model_lasso.eval()
algo = IntegratedGradients(model_lasso)
algo_attr1, d1 = algo.attribute(inputs, target=0, n_steps=6, baselines=baselines, return_convergence_delta=True)
algo_attr0, d0 = algo.attribute(inputs, target=1, n_steps=6, baselines=baselines, return_convergence_delta=True)
print("Delta_1, Delta_0 = {}, {}".format(torch.mean(d1), torch.mean(d0)))

In [None]:
top_genes = get_top_genes_from_attributions(algo_attr1, algo_attr0, columns, seed, algo='ig_lasso')
top_genes.sort_values(by=['mean_score'], ascending=False, inplace=True)

In [None]:
torch.cuda.empty_cache()
top_genes.head(50)

In [None]:
## GradientSHAP

model_lasso.eval()
algo = GradientShap(model_lasso)
algo_attr1, d1 = algo.attribute(inputs, target=0, n_samples=4, baselines=baselines, return_convergence_delta=True)
algo_attr0, d0 = algo.attribute(inputs, target=1, n_samples=4, baselines=baselines, return_convergence_delta=True)
print("Delta_1, Delta_0 = {}, {}".format(torch.mean(d1), torch.mean(d0)))

In [None]:
top_genes = get_top_genes_from_attributions(algo_attr1, algo_attr0, columns, seed, algo='gs_lasso')
top_genes.sort_values(by=['mean_score'], ascending=False, inplace=True)

In [None]:
torch.cuda.empty_cache()
top_genes.head(50)

In [None]:
## DeepLIFT


model_lasso.eval()
algo = DeepLift(model_lasso)
algo_attr1, d1 = algo.attribute(inputs, target=0, baselines=baselines, return_convergence_delta=True)
algo_attr0, d0 = algo.attribute(inputs, target=1, baselines=baselines, return_convergence_delta=True)
print("Delta_1, Delta_0 = {}, {}".format(torch.mean(d1), torch.mean(d0)))

In [None]:
top_genes = get_top_genes_from_attributions(algo_attr1, algo_attr0, columns, seed, algo='dl_lasso')
top_genes.sort_values(by=['mean_score'], ascending=False, inplace=True)

In [None]:
torch.cuda.empty_cache()
top_genes.head(50)

In [None]:
del model_lasso

# XGB

In [None]:
## IntegratedGradients (baselines)

model_xgb.eval()
algo = IntegratedGradients(model_xgb)
algo_attr1, d1 = algo.attribute(inputs, target=0, n_steps=6, baselines=baselines, return_convergence_delta=True)
algo_attr0, d0 = algo.attribute(inputs, target=1, n_steps=6, baselines=baselines, return_convergence_delta=True)
print("Delta_1, Delta_0 = {}, {}".format(torch.mean(d1), torch.mean(d0)))

In [None]:
top_genes = get_top_genes_from_attributions(algo_attr1, algo_attr0, columns, seed, algo='ig_xgb')
top_genes.sort_values(by=['mean_score'], ascending=False, inplace=True)

In [None]:
torch.cuda.empty_cache()
top_genes.head(50)

In [None]:
## GradientShap (baselines)

model_xgb.eval()
algo = GradientShap(model_xgb)
algo_attr1, d1 = algo.attribute(inputs, target=0, n_samples=4, baselines=baselines, return_convergence_delta=True)
algo_attr0, d0 = algo.attribute(inputs, target=1, n_samples=4, baselines=baselines, return_convergence_delta=True)
print("Delta_1, Delta_0 = {}, {}".format(torch.mean(d1), torch.mean(d0)))

In [None]:
top_genes = get_top_genes_from_attributions(algo_attr1, algo_attr0, columns, seed, algo='gs_xgb')
top_genes.sort_values(by=['mean_score'], ascending=False, inplace=True)

In [None]:
torch.cuda.empty_cache()
top_genes.head(50)

In [None]:
## DeepLift (baselines)

model_xgb.eval()
algo = DeepLift(model_xgb)
algo_attr1, d1 = algo.attribute(inputs, target=0, baselines=baselines, return_convergence_delta=True)
algo_attr0, d0 = algo.attribute(inputs, target=1, baselines=baselines, return_convergence_delta=True)
print("Delta_1, Delta_0 = {}, {}".format(torch.mean(d1), torch.mean(d0)))

In [None]:
top_genes = get_top_genes_from_attributions(algo_attr1, algo_attr0, columns, seed, algo='dl_xgb')
top_genes.sort_values(by=['mean_score'], ascending=False, inplace=True)

In [None]:
torch.cuda.empty_cache()
top_genes.head(50)

---
---
---

In [None]:
from scipy.io.wavfile import read

fs, data = read('PHD/alert.wav', mmap=True)  # fs - sampling frequency
data = data.reshape(-1, 1)
import sounddevice as sd
sd.play(data, 44100)

---
---
---