In [1]:
import numpy as np
from numpy import random
from sklearn.model_selection import train_test_split
import torch.utils.data
import matplotlib
import os
import sys
matplotlib.use('Agg')

import aitac_v2
import plot_utils


In [2]:
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


In [3]:
# Hyper parameters
num_epochs = 10
num_classes = 141
batch_size = 10
learning_rate = 0.001


In [4]:
#pre-trained model to use
model_name = 'mini_sample'


In [5]:
#create output figure directory
output_file_path = "../../outputs/" + model_name + "/layer2_motifs/"
directory = os.path.dirname(output_file_path)
if not os.path.exists(directory):
    print("Creating directory %s" % output_file_path)
    os.makedirs(directory)
else:
     print("Directory %s exists" % output_file_path)


Directory ../../outputs/mini_sample/layer2_motifs/ exists


In [6]:
# Load all data
x = np.load('../../BRCA_data/mini_sample_one_hot_seqs.npy')
x = x.astype(np.float32)
y = np.load('../../BRCA_data/mini_sample_cell_type_array.npy')
y = y.astype(np.float32)
peak_names = np.load('../../BRCA_data/mini_sample_peak_names.npy')


In [7]:
#load names of test set from original model
test_peaks = np.load("../../outputs/" + model_name + "/training/test_OCR_names.npy")
idx = np.in1d(peak_names, test_peaks)

In [8]:
# split the data into training and test sets
eval_data, eval_labels, eval_names = x[idx, :, :], y[idx, :], peak_names[idx]
train_data, train_labels, train_names = x[~idx, :, :], y[~idx, :], peak_names[~idx]


In [9]:
# Data loader
eval_dataset = torch.utils.data.TensorDataset(torch.from_numpy(eval_data), torch.from_numpy(eval_labels))
eval_loader = torch.utils.data.DataLoader(dataset=eval_dataset, batch_size=batch_size, shuffle=False)


In [10]:
#load trained model weights
checkpoint = torch.load("../../models/" + model_name + ".ckpt")


In [11]:
# initialize model 
model = aitac_v2.ConvNet(num_classes).to(device)
checkpoint2 = model.state_dict()


In [12]:
#copy original model weights into new model
for i, (layer_name, layer_weights) in enumerate(checkpoint.items()):
        new_name = list(checkpoint2.keys())[i]
        checkpoint2[new_name] = layer_weights


In [13]:
#load weights into new model
model.load_state_dict(checkpoint2)


<All keys matched successfully>

In [14]:
#get layer 2 motifs
predictions, max_act_layer2, activations_layer2, act_index_layer2 = aitac_v2.test_model(eval_loader, model, device)

correlations = plot_utils.plot_cors(eval_labels, predictions, output_file_path)


torch.Size([10, 200, 166])
weighted_cor is 0.5052579089850296
number of NaN values: 0


In [15]:
#get PWMs of second layer motifs
plot_utils.get_memes2(activations_layer2, eval_data, eval_labels, output_file_path)


In [16]:
#save files
np.save(output_file_path + "second_layer_maximum_activations.npy", max_act_layer2)
np.save(output_file_path + "second_layer_maxact_index.npy", act_index_layer2)


In [19]:
sec_ly_mx_acty = np.load('../../outputs/first_approach/layer2_motifs/second_layer_maximum_activations.npy')
print(sec_ly_mx_acty.shape)
print(sec_ly_mx_acty)

(10, 300)
[[0.7326865  0.7258493  0.40777296 ... 0.63635755 0.728843   0.9178561 ]
 [0.8193272  0.922959   0.585779   ... 0.78894603 0.92611015 0.7782495 ]
 [0.7876787  0.8905411  0.73525065 ... 0.78446746 0.89635366 0.9944668 ]
 ...
 [0.8324806  0.7036093  0.5567735  ... 0.78909206 0.7539064  0.77290446]
 [0.8127548  0.7196656  0.58814245 ... 0.85957146 0.876065   0.90636307]
 [0.7485167  0.798457   0.49164894 ... 0.78751886 0.8667381  0.88092035]]


In [21]:
sec_ly_mx_acty_idx = np.load('../../outputs/first_approach/layer2_motifs/second_layer_maxact_index.npy')
print(sec_ly_mx_acty_idx.shape)
print(sec_ly_mx_acty_idx)

(10, 300)
[[ 19. 193. 232. ...   1.  73.  23.]
 [146.  41. 198. ...  95. 170. 108.]
 [167.  52. 131. ...  76. 223. 223.]
 ...
 [106. 156. 188. ...   3. 141. 189.]
 [204. 236.  24. ... 152.  13. 228.]
 [224. 114. 203. ...  94.  16.  75.]]
