In [1]:
import numpy as np
import pandas as pd
import fastai
from tqdm import tqdm_notebook as tqdm
from fastai.tabular import *
import fastai.text

import pickle

from multiprocessing import Pool
from sklearn.preprocessing import LabelEncoder, LabelBinarizer, StandardScaler

import torch.nn as nn
import torch.nn.functional as F

np.range = (lambda x:(x.min(), x.max()))

np.range = (lambda x:(x.min(), x.max()))

In [2]:
with open('train_test_pre.pkl', 'rb') as f:
    train, test = pickle.load(f)
    
with open('molecules.pkl', 'rb') as f:
    molecules_structure, structure_cols, atom_encoder = pickle.load(f)

In [3]:
train = train[:100]

## getting Dataloader ready

In [4]:
train.head()

Unnamed: 0,id,molecule_name,atom_index_0,atom_index_1,scalar_coupling_constant,type_i,type_a
0,0,dsgdb9nsd_000001,2,1,84.8076,0,0
1,1,dsgdb9nsd_000001,2,3,-11.257,1,1
2,2,dsgdb9nsd_000001,2,4,-11.2548,1,1
3,3,dsgdb9nsd_000001,2,5,-11.2543,1,1
4,4,dsgdb9nsd_000001,3,1,84.8074,0,0


In [75]:
class Dataset:
    def __init__(self, x, y=None):
        assert (y is None) or (len(x) == len(y))
        self.x, self.y = x, y
        
    def __getitem__(self, idx):
        if isinstance(idx, slice):
            if self.y is not None: return Dataset(self.x[idx], self.y[idx])
            else: return Dataset(self.x[idx])
        else:
            if self.y is not None: return self.x[idx], self.y[idx]
            else: return self.x[idx]
            
    def __len__(self):
        return len(self.x)
    
    
class Sampler:
    def __init__(self, ds, bs=64, shuffle=True, drop_last=True):
        print('sampler init, n: ', len(ds))
        self.n, self.bs, self.shuffle = len(ds), bs, shuffle
        self.idxs = torch.randperm(self.n) if self.shuffle else torch.arange(self.n)
        if drop_last: self.n = (self.n//self.bs)*self.bs

    def __iter__(self):
        for i in range(0, self.n, self.bs):
            yield self.idxs[i : min(self.n, i+self.bs)]

    def __len__(self):
        return (self.n-1)//self.bs + 1

    

def collate(batch):
    #print(batch)
    x, y = zip(*batch)
    x1, x2, x3 = zip(*x)
    x1, x2, x3, y = torch.stack(x1), torch.stack(x2), torch.stack(x3), torch.stack(y)
    return [np.array([x1.cuda(), x2.cuda(), x3.cuda()]), y.cuda()]



def tfrm_structure(mol, p, q, natoms):
    perm = np.arange(30)
    perm[1: natoms+1] = np.random.permutation(np.arange(1, natoms+1))
    p, = np.where(perm==p)[0]
    q, = np.where(perm==q)[0]
    mol = mol[perm]
    return mol, p, q



class Dataloader:
    def __init__(self, dataset, bs=256, shuffle=True, collate_fn=collate, tfrm=None, drop_last=True):
        self.dataset, self.bs, self.shuffle, self.collate_fn, self.tfrm = dataset, bs, shuffle, collate_fn, tfrm
        self.batch_size = self.bs
        self.sampler = Sampler(self.dataset, self.bs, self.shuffle, drop_last)
        
    def _get(self, i):
        x, y = self.dataset[i]
        #print(x, y)
        mol = molecules_structure[x[0]][1]
        natoms = molecules_structure[x[0]][0]
        if self.tfrm: mol = self.tfrm(mol)
            
        indices = tensor(x[1:3].astype(np.int64))
        meta = tensor(x[3:5].astype(np.int64))
        
        return (indices, meta, mol), tensor(y)
        
    def __iter__(self):
        
        for idxs in self.sampler:
            yield self.collate_fn([self._get(i) for i in idxs])
            
    def __len__(self):
        return self.sampler.__len__()

In [76]:
class Databunch():
    def __init__(self, train_dl, valid_dl, c=1, path = Path('.'), device='cuda'):
        self.train_dl,self.valid_dl,self.c = train_dl,valid_dl,c
        self.path = path
        self.device = 'cuda'
    @property
    def train_ds(self): return self.train_dl.ds
        
    @property
    def valid_ds(self): return self.valid_dl.ds
    
def get_databunch(train, bs=256, split_pct=0.2):
    molecule_names = np.random.permutation(list(set(train.molecule_name)))
    sp = int(split_pct*len(molecule_names))
    valid_names   = molecule_names[:sp]
    valid_indexes = train.molecule_name.apply(lambda x: x in valid_names)
    valid = train[ valid_indexes]
    train = train[~valid_indexes]

    x_train, y_train = train[['molecule_name', 'atom_index_0', 'atom_index_1', 'type_i', 'type_a']].values, train.scalar_coupling_constant.values
    x_valid, y_valid = valid[['molecule_name', 'atom_index_0', 'atom_index_1', 'type_i', 'type_a']].values, valid.scalar_coupling_constant.values

    train_ds = Dataset(x_train, y_train)
    valid_ds = Dataset(x_valid, y_valid)

    return Databunch(Dataloader(train_ds, bs=bs, shuffle=True, drop_last=True), Dataloader(valid_ds, bs=bs*2, drop_last=False))

In [77]:
db = get_databunch(train, bs=3)

it = iter(db.train_dl)

x, y = next(it)
x, y

sampler init, n:  78
sampler init, n:  22


ValueError: only one element tensors can be converted to Python scalars

In [78]:
x1, x2, x3 = x

In [79]:
x2

tensor([[1, 0],
        [1, 1],
        [0, 0]], device='cuda:0')

In [65]:
torch.stack((x1.float(), x2.float(), x3.float()), dim=1)

RuntimeError: invalid argument 0: Tensors must have same number of dimensions: got 4 and 3 at /pytorch/aten/src/THC/generic/THCTensorMath.cu:62

In [41]:
torch.cuda.get_device_properties(0)

_CudaDeviceProperties(name='GeForce GTX 1060', major=6, minor=1, total_memory=6078MB, multi_processor_count=10)

## getting model ready

In [71]:
class Dense(nn.Module):
    def __init__(self, ni, no, usebn=True, act=True, p=0., res=False, bias=True):
        """
        layers in order linear, relu, bn, dropout if residual is false
        else linear, relu, drop, add_residual, bn
        """
        super(Dense, self).__init__()
        
        assert ((not res) or (res and ni==no)), "input output sizes are different, res connection not possible"
        
        self.res    = res
        self.usebn  = usebn
        
        self.layers = [nn.Linear(ni, no, bias= bias)]
        if act: self.layers.append(nn.ReLU())
        if usebn and not res: self.layers.append(nn.BatchNorm1d(no))
        if p > 1e-3: self.layers.append(nn.Dropout(p))
            
        self.layers = nn.ModuleList(self.layers)
        
        if usebn and res:
            self.bn = nn.BatchNorm1d(no)
        
    def forward(self, x):
        if self.res: residual = x
        for l in self.layers: x = l(x)
        if self.res: x += residual
        if self.res and self.usebn: x = self.bn(x)
        return x
    
    
    
    
class encoder(nn.Module):
    def __init__(self, d_model=64, p=0.0, n_heads=8):
        super(encoder, self).__init__()
        self.att = fastai.text.models.MultiHeadAttention(n_heads, d_model, d_head=16, resid_p=p, attn_p=p)
        self.ff  = fastai.text.models.feed_forward(d_model, d_model*2, ff_p=p)
        
    def forward(self, x):
        x = self.att(x)
        x = self.ff(x)
        return x
    
    
    
class trfmr(nn.Module):
    def __init__(self, embed_size=64, inp_len=5, n_enc=5, embed_p=0.3, n_heads=8, p=0.1):
        super(trfmr, self).__init__()
        self.layers = []
        self.layers.append(nn.Embedding(6, embed_size, scale_grad_by_freq=True))
        self.layers.append(nn.Dropout(embed_p))
        self.layers = nn.ModuleList(self.layers + [encoder(embed_size+inp_len-1, p, n_heads) for _ in range(n_enc)])
    
    def forward(self, x):
        atoms = x[:,:, 0].type(torch.LongTensor).cuda() #### note
        conts = x[:,:, 1:]
        
        embeds = self.layers[1](self.layers[0](atoms))
        
        x = torch.cat((embeds, conts), dim=-1)
        
        for l in self.layers[2:]:
            x = l(x)
        return x
    
    
    
class model(nn.Module):
    def __init__(self, embed_size=65, inp_len=16, n_enc=5, embed_p=0.3, n_heads=8, p=0.1, out_p=0.3, inp_meta=16, nmeta=64):
        super(model, self).__init__()
        self.tf_nfeats= (embed_size+inp_len-1)*5
        
        self.tf = trfmr(embed_size, inp_len, n_enc, embed_p, n_heads, p)
        self.d1 = Dense(self.tf_nfeats, self.tf_nfeats, res=True, p=p)
        
        self.embed_type_i = nn.Embedding(3, 8, scale_grad_by_freq=True)
        self.embed_type_a = nn.Embedding(3, 8, scale_grad_by_freq=True)
        self.meta1 = Dense(inp_meta, nmeta, bias=False)
        self.meta2 = Dense(nmeta, nmeta, res=True, p=p)
        
        self.d2 = Dense(self.tf_nfeats+nmeta, self.tf_nfeats+nmeta, res=True, p=p)
        self.d3 = Dense(self.tf_nfeats+nmeta, self.tf_nfeats+nmeta, res=True, p=p)
        
        self.d4 = Dense(self.tf_nfeats+nmeta, 128, p=out_p, bias=False)
        self.out = nn.Linear(128, 1, bias=True)
    
    def forward(self, x):
        ## x ===> (indices, meta, mol)
        
        p, q = x[0][:, 0].type(torch.int64), x[0][:, 1].type(torch.int64) ##indices of atom to find scc
        meta = x[1] ##input meta features and other
        x = x[2] ##input structure
        
        x = self.tf(x) ##encode
        extra = x[:, 0, :]
        mask = F.one_hot(p, x.shape[1]).type(torch.bool)
        p = x[mask]
        mask = F.one_hot(q, x.shape[1]).type(torch.bool)
        q = x[mask]
        mx = torch.max(x, 1)[0]
        mn = torch.mean(x, 1)
        
        x = torch.cat([extra, p, q, mx, mn], dim=-1)
        x = self.d1(x)
        
        e1 = self.embed_type_i(meta[:, 0])
        e2 = self.embed_type_a(meta[:, 1])
        meta_e = torch.cat((e1, e2), dim=-1)
        meta = self.meta1(meta_e)
        meta = self.meta2(meta)
        
        x = torch.cat([x, meta], dim=-1)
        x = self.d2(x)
        x = self.d3(x)
        x = self.d4(x)
        x = self.out(x)
        
        return x

## putting things together

In [72]:
m = model()
m = m.to('cuda')

In [73]:
l = Learner(db, model(), path='.', loss_func=F.mse_loss)

In [74]:
l.lr_find()

LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.


TypeError: forward() takes 2 positional arguments but 4 were given

In [21]:
m(x)

tensor([[-0.3995],
        [ 0.0785],
        [ 0.3243]], device='cuda:0', grad_fn=<AddmmBackward>)

In [16]:
x[1][:, 0]

tensor([-6, -7, -4], device='cuda:0')

In [14]:
m

model(
  (tf): trfmr(
    (layers): ModuleList(
      (0): Embedding(6, 65, scale_grad_by_freq=True)
      (1): Dropout(p=0.3)
      (2): encoder(
        (att): MultiHeadAttention(
          (attention): Linear(in_features=80, out_features=384, bias=True)
          (out): Linear(in_features=128, out_features=80, bias=True)
          (drop_att): Dropout(p=0.1)
          (drop_res): Dropout(p=0.1)
          (ln): LayerNorm(torch.Size([80]), eps=1e-05, elementwise_affine=True)
        )
        (ff): SequentialEx(
          (layers): ModuleList(
            (0): Linear(in_features=80, out_features=160, bias=True)
            (1): ReLU(inplace)
            (2): Dropout(p=0.1)
            (3): Linear(in_features=160, out_features=80, bias=True)
            (4): Dropout(p=0.1)
            (5): MergeLayer()
            (6): LayerNorm(torch.Size([80]), eps=1e-05, elementwise_affine=True)
          )
        )
      )
      (3): encoder(
        (att): MultiHeadAttention(
          (attention)