In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib nbagg
import seaborn as sns


test_features = pd.read_csv('../input/lish-moa/test_features.csv')
test_features['cp_type'] = test_features['cp_type'].map({'trt_cp':0, 'ctl_vehicle':1})
test_features['cp_dose'] = test_features['cp_dose'].map({'D1':0, 'D2':1})

train_features = pd.read_csv('../input/lish-moa/train_features.csv')
train_features['cp_type'] = train_features['cp_type'].map({'trt_cp':0, 'ctl_vehicle':1})
train_features['cp_dose'] = train_features['cp_dose'].map({'D1':0, 'D2':1})

train_targets_scored = pd.read_csv('../input/lish-moa/train_targets_scored.csv')

In [None]:
from tqdm import tqdm_notebook
import time
import os
import itertools
import copy
BATCH_SIZE = 150
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
EPOCHS = 20

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train_features.set_index('sig_id'), train_targets_scored.set_index('sig_id'), random_state=0)

In [None]:
X_train = torch.from_numpy(X_train.to_numpy()).type(torch.FloatTensor)
y_train = torch.from_numpy(y_train.to_numpy()).type(torch.LongTensor)
X_test = torch.from_numpy(X_test.to_numpy()).type(torch.FloatTensor)
y_test = torch.from_numpy(y_test.to_numpy()).type(torch.LongTensor)

In [None]:
train_iterator = DataLoader(torch.utils.data.TensorDataset(X_train, y_train),
                            batch_size=BATCH_SIZE, shuffle=True)
val_iterator = DataLoader(torch.utils.data.TensorDataset(X_test, y_test),
                          batch_size=BATCH_SIZE, shuffle=True)   

In [None]:
TRAIN_BATCHES_LEN, VAL_BATCHES_LEN = int(np.floor(X_train.shape[0]/BATCH_SIZE)), int(np.floor(X_test.shape[0]/BATCH_SIZE))

In [None]:
class neuraln(nn.Module):
    def __init__(self, in_ch, out_ch, num_blocks):
        super().__init__()
        self.num_blocks = num_blocks
        self.in_channels = in_ch
        self.out_channels = out_ch
        for i in range(num_blocks):
            self.in_channels = self.in_channels if i==0 else 2**10
            out_channels = 2 ** 10 
            self.add_module(f"b_n{i + 1}", nn.BatchNorm1d(self.in_channels))
            self.add_module(f'dropout{i+1}',nn.Dropout(0.4))
            self.add_module(f"linear{i + 1}", nn.Linear(self.in_channels, out_channels))
            self.add_module(f"relu{i + 1}", nn.ReLU(out_channels))
        self.add_module(f"b_n{i + 2}", nn.BatchNorm1d(out_channels))
        self.add_module(f"linear{i + 2}", nn.Linear(out_channels, self.out_channels))
        self.add_module(f"sigm", nn.Sigmoid())

    def forward(self, x):
        for i in range(self.num_blocks):
            x = self.__getattr__(f"b_n{i + 1}")(x)
            x = self.__getattr__(f'dropout{i+1}')(x)
            x = self.__getattr__(f"linear{i + 1}")(x)
            x = self.__getattr__(f'relu{i + 1}')(x)
        x = self.__getattr__(f"b_n{i + 2}")(x)
        x = self.__getattr__(f"linear{i + 2}")(x)
        x = self.__getattr__(f"sigm")(x)
        return x

In [None]:
model =  neuraln(X_train.shape[1], y_train.shape[1], 3)
criterion = nn.BCELoss()
DEVICE = 'cuda'
model.cuda()
criterion.cuda()
opt = optim.Adam(params=model.parameters(), lr=1e-3)

In [None]:
train_loss = []
val_loss = []
for epoch in range(1, EPOCHS + 1):
    st = time.perf_counter()
    model.to(DEVICE)
    current_loss = 0
    model.train()
    for batch in train_iterator:
        opt.zero_grad()
        x, y = batch[:-1][0].cuda(), batch[-1].type(torch.FloatTensor).cuda()
        y_pred = model(x).type(torch.FloatTensor).cuda()
        loss = criterion(y_pred, y)
        loss.backward()
        opt.step()
        current_loss += loss.item()/TRAIN_BATCHES_LEN
    train_loss.append(current_loss)
    
    model.eval()
    with torch.no_grad():
        current_loss = 0
        for batch in val_iterator:
            x, y = batch[:-1][0].cuda(), batch[-1].type(torch.FloatTensor).cuda()
            y_pred = model(x)
            loss = criterion(y_pred, y)
            current_loss += loss.item()/VAL_BATCHES_LEN
        val_loss.append(current_loss)
    
    torch.save(model.cpu().state_dict(), 'epoch{}'.format(epoch))
    
    print('Epoch: {}'.format(epoch), 'train loss: {:.5f}'.format(train_loss[-1]), 
         'val loss: {:.5f}'.format(val_loss[-1]), 'Time: {}'.format(time.perf_counter() - st))

In [None]:
with torch.no_grad():
    probs = model(torch.from_numpy(test_features.set_index('sig_id').to_numpy()).type(torch.FloatTensor)).numpy()
    pd.concat([test_features[['sig_id']], pd.DataFrame(probs, columns = train_targets_scored.columns.drop('sig_id'))], axis=1).to_csv('submission.csv', index=None)