In [1]:
import numpy as np
import pandas as pd
import fastai
from tqdm import tqdm_notebook as tqdm
from fastai.tabular import *
import fastai.text

import torch.nn as nn
import torch.nn.functional as F

np.range = (lambda x:(x.min(), x.max()))

In [2]:
torch.cuda.get_device_properties(0)

_CudaDeviceProperties(name='GeForce GTX 1060', major=6, minor=1, total_memory=6078MB, multi_processor_count=10)

In [3]:
class Dense(nn.Module):
    def __init__(self, ni, no, usebn=True, act=True, p=0., res=False, bias=True):
        """
        layers in order linear, relu, bn, dropout if residual is false
        else linear, relu, drop, add_residual, bn
        """
        super(Dense, self).__init__()
        
        assert ((not res) or (res and ni==no)), "input output sizes are different, res connection not possible"
        
        self.res    = res
        self.usebn  = usebn
        
        self.layers = [nn.Linear(ni, no, bias= bias)]
        if act: self.layers.append(nn.ReLU())
        if usebn and not res: self.layers.append(nn.BatchNorm1d(no))
        if p > 1e-3: self.layers.append(nn.Dropout(p))
            
        self.layers = nn.ModuleList(self.layers)
        
        if usebn and res:
            self.bn = nn.BatchNorm1d(no)
        
    def forward(self, x):
        if self.res: residual = x
        for l in self.layers: x = l(x)
        if self.res: x += residual
        if self.res and self.usebn: x = self.bn(x)
        return x
    

In [4]:
class encoder(nn.Module):
    def __init__(self, d_model=64, p=0.0, n_heads=8):
        super(encoder, self).__init__()
        self.att = fastai.text.models.MultiHeadAttention(n_heads, d_model, d_head=16, resid_p=p, attn_p=p)
        self.ff  = fastai.text.models.feed_forward(d_model, d_model*2, ff_p=p)
        
    def forward(self, x):
        x = self.att(x)
        x = self.ff(x)
        return x

In [7]:
class trfmr(nn.Module):
    def __init__(self, embed_size=64, inp_len=5, n_enc=5, embed_p=0.3, n_heads=8, p=0.1):
        super(trfmr, self).__init__()
        self.layers = []
        self.layers.append(nn.Embedding(6, embed_size, scale_grad_by_freq=True))
        self.layers.append(nn.Dropout(embed_p))
        self.layers = nn.ModuleList(self.layers + [encoder(embed_size+inp_len-1, p, n_heads) for _ in range(n_enc)])
    
    def forward(self, x):
        atoms = x[:,:, 0].type(torch.LongTensor).cuda() #### note
        conts = x[:,:, 1:]
        
        embeds = self.layers[1](self.layers[0](atoms))
        
        x = torch.cat((embeds, conts), dim=-1)
        
        for l in self.layers[2:]:
            x = l(x)
        return x

In [12]:
class model(nn.Module):
    def __init__(self, embed_size=65, inp_len=16, n_enc=5, embed_p=0.3, n_heads=8, p=0.1, out_p=0.3, inp_meta=16, nmeta=64):
        super(model, self).__init__()
        self.tf_nfeats= (embed_size+inp_len-1)*5
        
        self.tf = trfmr(embed_size, inp_len, n_enc, embed_p, n_heads, p)
        self.d1 = Dense(self.tf_nfeats, self.tf_nfeats, res=True, p=p)
        
        self.embed_type_i = nn.Embedding(3, 8, scale_grad_by_freq=True)
        self.embed_type_a = nn.Embedding(3, 8, scale_grad_by_freq=True)
        self.meta1 = Dense(inp_meta, nmeta, bias=False)
        self.meta2 = Dense(nmeta, nmeta, res=True, p=p)
        
        self.d2 = Dense(self.tf_nfeats+nmeta, self.tf_nfeats+nmeta, res=True, p=p)
        self.d3 = Dense(self.tf_nfeats+nmeta, self.tf_nfeats+nmeta, res=True, p=p)
        
        self.d4 = Dense(self.tf_nfeats+nmeta, 128, p=out_p, bias=False)
        self.out = nn.Linear(128, 1, bias=True)
    
    def forward(self, x):
        ## x ===> (indices, meta, mol)
        
        p, q = x[0][:, 0].type(torch.int64), x[0][:, 1].type(torch.int64) ##indices of atom to find scc
        meta = x[1] ##input meta features and other
        x = x[2] ##input structure
        
        x = self.tf(x) ##encode
        extra = x[:, 0, :]
        mask = F.one_hot(p, x.shape[1]).type(torch.bool)
        p = x[mask]
        mask = F.one_hot(q, x.shape[1]).type(torch.bool)
        q = x[mask]
        mx = torch.max(x, 1)[0]
        mn = torch.mean(x, 1)
        
        x = torch.cat([extra, p, q, mx, mn], dim=-1)
        x = self.d1(x)
        
        meta = torch.cat((self.embed_type_i(meta[:, 0]), self.embed_type_a(meta[:, 1])), dim=-1)
        meta = self.meta1(meta)
        meta = self.meta2(meta)
        
        x = torch.cat([x, meta], dim=-1)
        x = self.d2(x)
        x = self.d3(x)
        x = self.d4(x)
        x = self.out(x)
        
        return x

In [16]:
btt, bs, inp_len= 30, 128, 16
inp = np.random.rand(bs, btt, inp_len)
inp[:,:, 0] = np.random.random_integers(0, 3, (bs, btt))
indices = np.random.random_integers(0, 3, (bs, 2))

inp, indices = tensor(inp).type(torch.float32), tensor(indices).type(torch.float32)
meta = tensor(np.random.random_integers(0, 2, (bs,2))).type(torch.LongTensor)
m = model()
m

  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.
  import sys


model(
  (tf): trfmr(
    (layers): ModuleList(
      (0): Embedding(6, 65, scale_grad_by_freq=True)
      (1): Dropout(p=0.3)
      (2): encoder(
        (att): MultiHeadAttention(
          (attention): Linear(in_features=80, out_features=384, bias=True)
          (out): Linear(in_features=128, out_features=80, bias=True)
          (drop_att): Dropout(p=0.1)
          (drop_res): Dropout(p=0.1)
          (ln): LayerNorm(torch.Size([80]), eps=1e-05, elementwise_affine=True)
        )
        (ff): SequentialEx(
          (layers): ModuleList(
            (0): Linear(in_features=80, out_features=160, bias=True)
            (1): ReLU(inplace)
            (2): Dropout(p=0.1)
            (3): Linear(in_features=160, out_features=80, bias=True)
            (4): Dropout(p=0.1)
            (5): MergeLayer()
            (6): LayerNorm(torch.Size([80]), eps=1e-05, elementwise_affine=True)
          )
        )
      )
      (3): encoder(
        (att): MultiHeadAttention(
          (attention)

In [20]:
indices, inp, meta = indices.cuda(), inp.cuda(), meta.cuda()
m = m.to(device='cuda')

In [21]:
m((indices, meta, inp)).std()

tensor(0.3813, device='cuda:0', grad_fn=<StdBackward0>)