In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

train_features_df = pd.read_csv('../input/lish-moa/train_features.csv')
test_features_df = pd.read_csv('../input/lish-moa/test_features.csv')
train_features_nonscored_df = pd.read_csv('../input/lish-moa/train_targets_nonscored.csv')
train_features_scored_df = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
sample_submission_df = pd.read_csv('../input/lish-moa/sample_submission.csv')
train_drug_df = pd.read_csv("../input/lish-moa/train_drug.csv")

In [None]:
print("train_features_df --> ", train_features_df.shape[0]," X ",train_features_df.shape[1])
print("test_features_df --> ", test_features_df.shape[0]," X ",test_features_df.shape[1])
print("train_features_nonscored_df --> ", train_features_nonscored_df.shape[0]," X ",train_features_nonscored_df.shape[1])
print("train_features_scored_df --> ", train_features_scored_df.shape[0]," X ",train_features_scored_df.shape[1])
print("sample_submission_df --> ", sample_submission_df.shape[0]," X ",sample_submission_df.shape[1])
print("train_drug_df --> ", train_drug_df.shape[0]," X ",train_drug_df.shape[1])

In [None]:
train_features_scored_df.info(verbose=True)

In [None]:
train_features_scored_df.head()

In [None]:
class_distribution = dict()
for num in train_features_scored_df.columns[1:]:
    class_distribution[num] = {1: list(train_features_scored_df[num]).count(1) / len(train_features_scored_df) , 
                               0: list(train_features_scored_df[num]).count(0) / len(train_features_scored_df) }
for i in class_distribution.items():
  print(i)

In [None]:
train_features_df.head()

In [None]:
train_features_df.info(verbose=True)

In [None]:
train_features_df.describe()

In [None]:
train_features_df["cp_type"].value_counts()

In [None]:
train_features_df["cp_dose"].value_counts()

In [None]:
train_features_df["cp_time"].value_counts()

In [None]:
train_df = pd.merge(train_features_df, train_features_scored_df, on="sig_id")
train_df.head()

In [None]:
train_df[train_features_scored_df.columns[1:]].values

In [None]:
type(train_features_scored_df.columns[1:])

In [None]:
type(train_df.cp_type.values)

In [None]:
!git clone https://github.com/trent-b/iterative-stratification.git
%cd iterative-stratification
!ls

In [None]:
# create folds
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

df = train_df

df["kfold"] = -1    
df = df.sample(frac=1).reset_index(drop=True)
y = df[train_features_scored_df.columns[1:]].values
kf = MultilabelStratifiedKFold(n_splits=6)

for f, (t_, v_) in enumerate(kf.split(X=df, y=y)):
    df.loc[v_, 'kfold'] = f

In [None]:
cp_type_stored_values , cp_type_label = pd.factorize(df["cp_type"])
cp_type_stored_values

In [None]:
cp_type_label[cp_type_stored_values]

In [None]:
cp_dose_stored_values , cp_dose_label = pd.factorize(df["cp_dose"])
cp_dose_stored_values

In [None]:
cp_dose_label[cp_dose_stored_values]

In [None]:
df["cp_type"] = cp_type_stored_values
df["cp_dose"] = cp_dose_stored_values

In [None]:
# df["cp_dose"] = cp_dose_label[cp_dose_stored_values]  # for reversing

In [None]:
df.head()

In [None]:
col_names = train_features_df.columns[1:]
features = df[col_names]

from sklearn.preprocessing import StandardScaler
features = StandardScaler().fit_transform(features.values)

In [None]:
df[train_features_df.columns[1:]] = features

In [None]:
df.head()

In [None]:
!curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
!python pytorch-xla-env-setup.py --apt-packages libomp5 libopenblas-dev

In [None]:
!pip install torchcontrib

In [None]:
import warnings
import torch_xla
import torch_xla.debug.metrics as met
import torch_xla.distributed.data_parallel as dp
import torch_xla.distributed.parallel_loader as pl
import torch_xla.utils.utils as xu
import torch_xla.core.xla_model as xm
import torch_xla.distributed.xla_multiprocessing as xmp

In [None]:
from torch.utils.data import Dataset
import torch

class MoA_dataset_class(Dataset):
  def __init__(self, id , tabular):
    self.id = id
    self.tabular = tabular

    self.output = tabular[train_features_scored_df.columns[1:]].values
      
  def __len__(self):
    return len(self.id)
  
  def __getitem__(self, index):
    id = self.id[index]
    tabular = self.tabular.iloc[:,:]
    
    X = tabular[train_features_df.columns[1:]]
    X = X.values[index]
    
    
    return {
        'tabular_data' : torch.tensor(X, dtype = torch.float) , 
        'output' : torch.tensor(self.output[index], dtype = torch.float),
    }

In [None]:
fold = 0
df_train = df[df.kfold != fold].reset_index(drop=True)
df_valid = df[df.kfold == fold].reset_index(drop=True)

# prepare transforms standard to MNIST
train_data = MoA_dataset_class([i for i in range(len(df_train))] , df_train)

val_data = MoA_dataset_class([i for i in range(len(df_valid))] , df_valid)

In [None]:
#dry run 
idx = 100 # taking validation data index for 100th tabular data

print(val_data[idx]["tabular_data"])
print(val_data[idx]["output"])

In [None]:
val_data[idx]["tabular_data"].size()

In [None]:
train_sampler = torch.utils.data.distributed.DistributedSampler(
          train_data,
          num_replicas=xm.xrt_world_size(),
          rank=xm.get_ordinal(),
          shuffle=True)

valid_sampler = torch.utils.data.distributed.DistributedSampler(
          val_data,
          num_replicas=xm.xrt_world_size(),
          rank=xm.get_ordinal(),
          shuffle=False)

In [None]:
TRAIN_BATCH_SIZE = 32

from torch.utils.data import DataLoader

training_dataloader = DataLoader(train_data,
                        num_workers=4,
                        batch_size=TRAIN_BATCH_SIZE,
                        sampler=train_sampler,
                        drop_last=True
                       )

val_dataloader = DataLoader(val_data,
                        num_workers=4,
                        batch_size=TRAIN_BATCH_SIZE,
                        sampler=valid_sampler,
                        drop_last=False
                       )

In [None]:
device = xm.xla_device()

In [None]:
# implementing RestNet-18 for regression

class block(torch.nn.Module):
  expansion = 1
  
  def __init__(self , in_channels , out_channels, identity_downsample = None , stride = 1):
    super(block , self).__init__()

    self.dense_layer_1 = torch.nn.Linear(in_channels , out_channels)
    self.batch_norm_1 = torch.nn.BatchNorm1d(out_channels)

    self.dense_layer_2 = torch.nn.Linear(out_channels , out_channels)
    self.batch_norm_2 = torch.nn.BatchNorm1d(out_channels * (self.expansion))

    self.relu = torch.nn.ReLU()

    self.identity_downsample = identity_downsample

  def forward(self, x):
    identity = x

    x = self.dense_layer_1(x)
    x = self.batch_norm_1(x)
    x = self.relu(x)

    x = self.dense_layer_2(x)
    x = self.batch_norm_2(x)
    x = self.relu(x)

    if self.identity_downsample is not None:
      identity = self.identity_downsample(identity)

    x += identity
    x = self.relu(x)

    return x


class RestNet_18_for_Regression(torch.nn.Module):
  def __init__(self, block, number_of_inputs, layers , num_classes):
    super(RestNet_18_for_Regression , self).__init__()

    self.in_channels = 64

    self.Zeroth_dense_layer = torch.nn.Linear(number_of_inputs , self.in_channels)
    self.batch_norm_1 = torch.nn.BatchNorm1d(64)
    self.relu_1 = torch.nn.ReLU()

    self.First_dense_layer = torch.nn.Linear(self.in_channels , 64) # 3 is for number of channels
    self.batch_norm_2 = torch.nn.BatchNorm1d(64)
    self.relu_2 = torch.nn.ReLU()

    self.layer1 = self._layer(block , 64 , layers[0], stride = 1)
    self.layer2 = self._layer(block , 128 , layers[1], stride = 2)
    self.layer3 = self._layer(block , 256 , layers[2], stride = 2)
    self.layer4 = self._layer(block , 512 , layers[3], stride = 2)

    self.fc = torch.nn.Linear(512 * block.expansion , num_classes)
    self.sigmoid = torch.nn.Sigmoid()

  def _layer(self, block, out_channels , num_residual_blocks, stride):
    identity_downsample = None

    layers = []

    if stride != 1 or self.in_channels != out_channels * block.expansion :
      identity_downsample = torch.nn.Sequential(torch.nn.Linear(self.in_channels , out_channels * block.expansion),
                                                torch.nn.BatchNorm1d(out_channels * block.expansion))
      
    layers.append(block(self.in_channels, out_channels, identity_downsample, stride))
    self.in_channels = out_channels * block.expansion

    for i in range(num_residual_blocks - 1):
      layers.append(block(self.in_channels , out_channels))

    return torch.nn.Sequential(*layers) # *layers this will unpack the layers in the list

  def forward(self, x):
    x = self.Zeroth_dense_layer(x)
    x = self.batch_norm_1(x)
    x = self.relu_1(x)

    x = self.First_dense_layer(x)
    x = self.batch_norm_2(x)
    x = self.relu_2(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)

    x = x.reshape(x.shape[0], -1) #dimension change from 2 to 1
    x = self.fc(x)
    x = self.sigmoid(x)

    return x

model = RestNet_18_for_Regression(block, number_of_inputs = 875 , layers = [2, 2, 2, 2] , num_classes = 206)

In [None]:
model

In [None]:
model = model.to(device)

In [None]:
#for Stochastic Weight Averaging in PyTorch
from torchcontrib.optim import SWA

EPOCHS = 25
num_train_steps = int(len(train_data) / TRAIN_BATCH_SIZE / xm.xrt_world_size() * EPOCHS)

# printing the no of training steps for each epoch of our training dataloader  
xm.master_print(f'num_train_steps = {num_train_steps}, world_size={xm.xrt_world_size()}')

loss_fn = torch.nn.BCEWithLogitsLoss()

base_optimizer = torch.optim.Adam(model.parameters(), lr= 1e-4 * xm.xrt_world_size())

optimizer = SWA(base_optimizer, swa_start=5, swa_freq=5, swa_lr=0.05)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience = 5, verbose = True)

In [None]:
# defining the training loop

from sklearn.metrics import log_loss

def train_loop_fn(data_loader, model, optimizer, device, scheduler):
    running_loss = 0.0
    score = 0.0

    model.train()
    
    for batch_index,dataset in enumerate(data_loader):
        tabular_data = dataset["tabular_data"]
        output = dataset["output"]
        
        tabular_data = tabular_data.to(device, dtype=torch.float)
        targets = output.to(device, dtype=torch.float)
        
        optimizer.zero_grad()

        outputs = model(tabular_data)
        print(type(outputs))
        
        loss = loss_fn(outputs , targets)

        loss.backward()
        xm.optimizer_step(optimizer)

        running_loss += loss.item()

    scheduler.step()
            
    train_loss = running_loss / float(len(train_data))

    for i in range(predicted.shape[1]):
        _score = log_loss(Actual[:,i], predicted[:,i])
        score += _score / target.shape[1]

    xm.master_print('training Loss: {:.4f} and Log Loss : {:.4f}'.format(train_loss , score))

In [None]:
def eval_loop_fn(data_loader, model, device):
    running_loss = 0.0
    score = 0.0
    
    model.eval()
    
    for batch_index,dataset in enumerate(data_loader):
        tabular_data = dataset["tabular_data"]
        output = dataset["output"]
        
        tabular_data = tabular_data.to(device, dtype=torch.float)
        targets = output.to(device, dtype=torch.float)

        outputs = model(tabular_data)
        
        loss = loss_fn(outputs , targets)

        running_loss += loss.item()
    
    valid_loss = running_loss / float(len(val_data))

    for i in range(predicted.shape[1]):
        _score = log_loss(Actual[:,i], predicted[:,i])
        score += _score

    xm.master_print('validation Loss: {:.4f}and Log Loss : {:.4f}'.format(valid_loss , score/206))

In [None]:
def _run():
  for epoch in range(EPOCHS):
      xm.master_print(f"Epoch --> {epoch+1} / {EPOCHS}")
      xm.master_print(f"-------------------------------")
      para_loader = pl.ParallelLoader(training_dataloader, [device])
      train_loop_fn(para_loader.per_device_loader(device), model, optimizer, device, scheduler)

      para_loader = pl.ParallelLoader(val_dataloader, [device])
      eval_loop_fn(para_loader.per_device_loader(device), model, device)

In [None]:
def _mp_fn(rank, flags):
    torch.set_default_tensor_type('torch.FloatTensor')
    a = _run()
    optimizer.swap_swa_sgd()
    
# applying multiprocessing so that images get paralley trained in different cores of kaggle-tpu
FLAGS={}
xmp.spawn(_mp_fn, args=(FLAGS,), nprocs=1, start_method='fork')