In [85]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
#imports 
from src.preprocessing import *
from src.models import *
from src.train_eval_helpers import *
from src.plots import *
%load_ext autoreload
%autoreload 2
#checking gpu status

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
class deepcat_cnn(torch.nn.Module):
    """
    PyTorch version of DeepCAT CNN
    seq_len represents the length (# of AAs) of the CDR3 region (L = 12, 13, ..., 16)
    When initializing CNN_CDR3, the L should be specified 
    """ 
    def __init__(self, seq_len):
        super(deepcat_cnn, self).__init__()
        #Convolutions
        self.conv1 = nn.Conv2d(1, 8, kernel_size=(15,2))
        self.pool1 = nn.MaxPool2d(kernel_size = (1,2), stride=(1,1))
        self.conv2 = nn.Conv2d(8, 16, kernel_size = (1,2))
        self.pool2 = nn.MaxPool2d(kernel_size = (1,2), stride=(1,1))
        #Getting the dimension after convolutions
        self.dummy_param = nn.Parameter(torch.empty(0))
        self.length = seq_len
        self.name = 'deepcat_cnn_'+str(seq_len)
        #Linear/Dense layers
        self.fc1 = nn.Linear(16*(self.length-4), 10)
        self.fc2 = nn.Linear(10,2)
        self.dropout= nn.Dropout(0.4)
        
    def reset_parameters(self):
        for layer in self.children():
            if hasattr(layer, 'reset_parameters'):
                layer.reset_parameters()
                layer.zero_grad()
                
    def forward(self, x):
        #Conv -> ReLU -> MaxPool
        #print("input",x.shape)
        x = F.relu(self.conv1(x))
        #print("after conv1", x.shape)
        x = self.pool1(x)
        #print("after pool1",x.shape)
        x = F.relu(self.conv2(x))
        #print("After conv2", x.shape)
        x = self.pool2(x)
        #print("After pool2", x.shape)
        #Linear->ReLU->Dropout
        #print("Before reshape",x.shape)
        x = x.view(-1, x.shape[1]*x.shape[2]*x.shape[3]) #reshaping after convolution
        #print("reshaped",x.shape)
        x = self.dropout(F.relu(self.fc1(x))) 
        #THERES A RELU HERE THAT SHOULD BE ::++++ TODO
        x = self.dropout(F.relu(self.fc2(x))) #Getting binary logits
        
        predictions = x.argmax(1)
        probabilities = x.softmax(1)

        return x, predictions, probabilities

In [53]:
if 'notebook' in PATH:
    with open('../src/AAidx_dict.pkl', 'rb') as f: 
        AAidx_Dict = pickle.load(f) 

elif 'src' in PATH :
    with open('./AAidx_dict.pkl', 'rb') as f: 
        AAidx_Dict = pickle.load(f) 
else :
    with open('./src/AAidx_dict.pkl', 'rb') as f: 
        AAidx_Dict = pickle.load(f) 
    
ATCHLEY = \
{
    'A': [-0.591, -1.302, -0.733, 1.570, -0.146],
    'C': [-1.343, 0.465, -0.862, -1.020, -0.255],
    'D': [1.050, 0.302, -3.656, -0.259, -3.242],
    'E': [1.357, -1.453, 1.477, 0.113, -0.837],
    'F': [-1.006, -0.590, 1.891, -0.397, 0.412],
    'G': [-0.384, 1.652, 1.330, 1.045, 2.064],
    'H': [0.336, -0.417, -1.673, -1.474, -0.078],
    'I': [-1.239, -0.547, 2.131, 0.393, 0.816],
    'K': [1.831, -0.561, 0.533, -0.277, 1.648],
    'L': [-1.019, -0.987, -1.505, 1.266, -0.912],
    'M': [-0.663, -1.524, 2.219, -1.005, 1.212],
    'N': [0.945, 0.828, 1.299, -0.169, 0.933],
    'P': [0.189, 2.081, -1.628, 0.421, -1.392],
    'Q': [0.931, -0.179, -3.005, -0.503, -1.853],
    'R': [1.538, -0.055, 1.502, 0.440, 2.897],
    'S': [-0.228, 1.399, -4.760, 0.670, -2.647],
    'T': [-0.032, 0.326, 2.213, 0.908, 1.313],
    'V': [ -1.337, -0.279, -0.544, 1.242, -1.262],
    'W': [-0.595, 0.009, 0.672, -2.128, -0.184],
    'Y': [0.260, 0.830, 3.097, -0.838, 1.512]
}

#Merging AAindex and Atchley factors
ds = [ATCHLEY, AAidx_Dict]
merged_dict = {}
for k in ATCHLEY.keys():
    merged_dict[k] = list(np.concatenate(list(d[k] for d in ds)))

with open('../merged_dict.pkl', 'wb') as f :
    pickle.dump(merged_dict, f)
    
merged_dict

{'A': [-0.591,
  -1.302,
  -0.733,
  1.57,
  -0.146,
  -0.970906026408001,
  -0.323681347371203,
  15.7206518221322,
  -0.508840655632075,
  3.7402185826504,
  -0.778796812231878,
  3.38667656384665,
  -0.913062747756598,
  3.00614750780962,
  -2.32919906002667,
  0.787021897929849,
  -1.8743796601396,
  1.53335388267698,
  1.3444609979428,
  3.35815928624532],
 'C': [-1.343,
  0.465,
  -0.862,
  -1.02,
  -0.255,
  -8.36918251369427,
  8.3031934498954,
  -6.6196694564762,
  13.8734139891373,
  8.59531323791867,
  9.79914818190207,
  1.26911186662177,
  -4.63752901352054,
  -0.983921565990354,
  4.3634243590938,
  2.34365256090458,
  0.343445840449375,
  1.18290876288827,
  0.0132558803925318,
  -0.696621502763574],
 'D': [1.05,
  0.302,
  -3.656,
  -0.259,
  -3.242,
  18.126265894029,
  -2.147381547322,
  -0.252169840858429,
  2.31366106621094,
  6.52406371824222,
  -4.88386469358817,
  -9.13828093975311,
  -0.710243755381537,
  -2.10295526053507,
  -1.27865374462701,
  1.4821739799286

In [55]:
def aaidx_atchley_encoding(seq, device):
    """Encodes the AA indices to a given sequence"""
    n_aa = len(seq)
    temp = np.zeros([n_aa, 20], dtype=np.float32)
    for idx in range(n_aa):
        aa = seq[idx]
        temp[idx] = merged_dict[aa]
    temp = np.transpose(temp)
    aa_encoding = torch.from_numpy(temp)
    aa_encoding = aa_encoding.unsqueeze(0)
    if device == torch.device('cuda'):
        aa_encoding = aa_encoding.to(device)
    return aa_encoding

In [62]:
seqs = read_seq('../TrainingData/TumorCDR3.txt')
seq= seqs[2]
feats = aaidx_atchley_encoding(seq,'cpu')
feats = feats.unsqueeze(0)
feats.shape

torch.Size([1, 1, 20, 12])

In [148]:
import torch
import torch.nn as nn
import torch.nn.functional as F

L = 16
X = torch.empty((100,1,20,L))
model = nn.Sequential(nn.Conv2d(1,8, kernel_size = (20,2)), 
                      nn.MaxPool2d(kernel_size= (1,2), stride=(1,1)),
                      nn.Conv2d(8,16, kernel_size = (1,2)),
                      nn.MaxPool2d(kernel_size= (1,2), stride=(1,1)))#,
                      #nn.Conv2d(16,32, kernel_size = (10))
Y = model(X)
print("After convo",Y.shape)
reshaped = Y.view(-1, Y.shape[1]*Y.shape[2]*Y.shape[3])
print("reshaped", reshaped.shape)
#lin = nn.Sequential(nn.Linear(16*2*(L-6), 50),
#                    nn.BatchNorm1d(50),
#                    nn.Linear(50,10),
#                    #nn.BatchNorm1d(10),
#                    nn.Linear(10,2))
#lin(reshaped).shape

After convo torch.Size([100, 16, 1, 12])
reshaped torch.Size([100, 192])


In [146]:
from src.models import richie_net
richie = richie_net(12)
richie(torch.randn((100,1,20,12)))

(tensor([[0.0000e+00, 0.0000e+00],
         [0.0000e+00, 3.9089e-01],
         [0.0000e+00, 0.0000e+00],
         [1.4059e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00],
         [0.0000e+00, 7.5824e-01],
         [0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00],
         [3.6955e-01, 6.5458e-01],
         [0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00],
         [1.5804e+00, 1.1993e+00],
         [0.0000e+00, 0.0000e+00],
         [5.7369e-01, 1.0454e+00],
         [0.0000e+00, 7.8302e-01],
         [0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00],
         [0.0000e+00, 7.5121e-01],
         [0.0000e+00, 1.5973e+00],
         [0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00],
         [0.0000e+00, 7.9313e-01],
         [0.0000e+00, 0.0000e+00],
         [0.0000e+00, 0.0000e+00],
         [7.5070e-02, 2.4723e-01],
         [0.0000e+00