In [1]:
import os
import numpy as np
import torch
from utils.train_util import ClassicTrainer, ADGTrainer
from utils.data_util import dataset, dataset_CADG

## NN and CDGN

In [42]:
#################### Settings ##############################
num_epochs = 100
batch_size = 256
dim1 = 1136
dim2 = 100
d_dim = 20499
dim_label = 39
dataset_name = 39
dim_domain = 64
data_path = 'data/'
model_path = 'ckpts/'
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1"
#################### Settings ##############################

d_NN = dataset(data_path, batch_size, label_size=dim_label,dataset_name=dataset_name, validation=False)
d_scDGN = dataset_CADG(data_path, batch_size, label_size=dim_label,dataset_name=dataset_name, validation=False)

NN_name = 'pancreas1_test_NN'
scDGN_name = 'pancreas1_test_scDGN'

log = dict()
t_scDGN = ADGTrainer(d_dim, 1, 1, dim1, dim2, dim_label, dim_domain, num_epochs, model_path, batch_size, use_gpu=True)
t_NN = ClassicTrainer(d_dim, dim1, dim2, dim_label, num_epochs, model_path, batch_size, use_gpu=True)

t_NN.dataset = d_NN
t_NN.load_model(os.path.join(model_path, NN_name,'final_model.ckpt')
t_scDGN.dataset = d_scDGN
t_scDGN.load_model(os.path.join(model_path, scDGN_name,'final_model.ckpt')

gene_names = np.load('data/gene_symbols_0413.npy')
cell_names = [' '.join(line.split('-')[0].split()[1:]) for line in open('data/scquery_ct.txt').readlines()]
# gene_names = [line.split(',')[1].strip('"\n') for line in open('data/genename_pancreas.csv')][1:]
# cell_names = [line.split('-')[0] for line in open('data/pancreas.txt').readlines()]

DANN_Siamese(
  (feature_extractor): Sequential(
    (0): Linear(in_features=20499, out_features=1136, bias=True)
    (1): Tanh()
    (2): Linear(in_features=1136, out_features=100, bias=True)
    (3): Tanh()
  )
  (domain_classifier): Sequential(
    (0): Linear(in_features=100, out_features=40, bias=True)
  )
  (label_classifier): Sequential(
    (0): Linear(in_features=100, out_features=39, bias=True)
  )
)
ClassicNN(
  (h1): Sequential(
    (0): Linear(in_features=20499, out_features=1136, bias=True)
    (1): Tanh()
  )
  (h2): Sequential(
    (0): Linear(in_features=1136, out_features=100, bias=True)
    (1): Tanh()
  )
  (o): Sequential(
    (0): Linear(in_features=100, out_features=39, bias=True)
  )
)


In [49]:
# extract the mean expression values
n_genes = 100
out_path = 'eval/'
for cate_id in range(39):
    counts = 0.

    mean_value_ori = np.zeros(20499)
    # mean_value_ori = np.zeros(3000)

    # analysis NN: backpropogate the gradient of beta and quiescent_stellate 
    mean_value = np.copy(mean_value_ori)
    resulted_values_NN = []
    for i in range(100):
        t_NN.D.zero_grad()
        mean_value_variable = Variable(torch.Tensor(mean_value).view(1, -1).cuda(), requires_grad=True)
        act_value = t_NN.D.forward(mean_value_variable)
        diff_out = act_value[:,cate_id:cate_id+1]
        diff_out.backward()
        mean_value_variable.data.add_(mean_value_variable.grad.data)
        mean_value = mean_value_variable.data.cpu().numpy()
        resulted_values_NN.append(mean_value)

    # analysis scDGN: backpropogate the gradient of beta and quiescent_stellate 
    mean_value = np.copy(mean_value_ori)
    resulted_values_scDGN = []
    for i in range(100):
        t_scDGN.D.zero_grad()
        mean_value_variable = Variable(torch.Tensor(mean_value).view(1, -1).cuda(), requires_grad=True)
        act_value, _, _ = t_scDGN.D.forward(mean_value_variable, mean_value_variable)
        diff_out = act_value[:,cate_id:cate_id+1]
        diff_out.backward()
        mean_value_variable.data.add_(mean_value_variable.grad.data)
        mean_value = mean_value_variable.data.cpu().numpy()
        resulted_values_scDGN.append(mean_value)

    # save DE gene names 
    cell_type = cell_names[cate_id]
    with open(os.path.join(out_path, 'NN_%s_%d.txt'%(cell_type, n_genes)), 'w') as fw:
        diff_NN = (resulted_values_NN[-1]-mean_value_ori)[0]
        diff_NN_ids = np.abs(diff_NN).argsort()[-n_genes:][::-1]
        for index in diff_NN_ids:
            fw.write('%s\n'%(gene_names[index]))

    with open(os.path.join(out_path, 'scDGN_%s_%d.txt'%(cell_type, n_genes)), 'w') as fw:
        diff_scDGN = (resulted_values_scDGN[-1]-mean_value_ori)[0]
        diff_scDGN_ids = np.abs(diff_scDGN).argsort()[-n_genes:][::-1]
        for index in diff_scDGN_ids:
            fw.write('%s\n'%(gene_names[index]))

    with open(os.path.join(out_path, 'Diff_%s_%d.txt'%(cell_type, n_genes)), 'w') as fw:
        diff_ids = np.setdiff1d(diff_scDGN_ids, diff_NN_ids)
        for index in diff_ids:
            fw.write('%s\n'%(gene_names[index]))