In [None]:
import sys
import pandas as pd
import numpy as np
sys.path.append('../input/iterativestratification/')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
import torch
from torch import nn, optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

import optuna

In [None]:
# Configuration
TRAIN_BATCH_SIZE = 1024
VALID_BATCH_SIZE = 1024
EPOCHS = 100
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

In [None]:
# Read the Dataset

data_train=pd.read_csv("/kaggle/input/lish-moa/train_features.csv")
data_test = pd.read_csv("/kaggle/input/lish-moa/test_features.csv")
target_scored = pd.read_csv("/kaggle/input/lish-moa/train_targets_scored.csv")
target_nonscored = pd.read_csv("/kaggle/input/lish-moa/train_targets_nonscored.csv")

In [None]:
# First Five rows

data_train.head()

In [None]:
# Shape of the training data

data_train.shape

In [None]:
print('There are  {:} rows in training data.'.format(len(data_train)))
print('There are  {:} rows in test data.'.format(len(data_test)))

In [None]:
# Concatenate training and test Dataset

data_train=data_train[list(data_test)]
all_data=pd.concat((data_train, data_test))
print(data_train.shape, data_test.shape, all_data.shape)


In [None]:
# ## Apply Dummies

# all_data = pd.concat([all_data, pd.get_dummies(all_data['cp_dose'], prefix='cp_dose', dtype=float)],axis=1)
# all_data = pd.concat([all_data, pd.get_dummies(all_data['cp_time'], prefix='cp_time', dtype=float)],axis=1)
# all_data = pd.concat([all_data, pd.get_dummies(all_data['cp_type'], prefix='cp_type', dtype=float)],axis=1)
all_data = all_data.drop(['cp_dose', 'cp_time', 'cp_type'], axis=1)

In [None]:
train=all_data[:len(data_train)]
test=all_data[len(data_train):]
print(train.shape, test.shape)

In [None]:
# Create a column

target_scored.loc[:, "kfold"] = -1
target_scored = target_scored.sample(frac=1).reset_index(drop=True)
targets = target_scored.drop(['sig_id'], axis=1).values
mskf = MultilabelStratifiedKFold(n_splits=5)
for fold, (trn, val) in enumerate(mskf.split(X=target_scored, y=targets)):
    # We always take validation can skip trn from above like:(_,val)
    target_scored.loc[val, "kfold"] = fold


In [None]:
class RKDataset:
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets
        
    def __len__(self):
        return self.inputs.shape[0]
    def __getitem__(self, item):
        return {
            'x': torch.tensor(self.inputs[item, :], dtype=torch.float),
            'y': torch.tensor(self.targets[item, :], dtype=torch.float)
        }

In [None]:
# MODEL CLASS

class FeedNetModel(nn.Module):
    def __init__(self, n_features, n_targets, n_layers, hidden_size, dropout):
        super(FeedNetModel, self).__init__()
        # Layers
        layers=[]
        for l in range(n_layers):
            if len(layers)==0:
                layers.append(nn.Linear(n_features, hidden_size))
                layers.append(nn.BatchNorm1d(hidden_size))
                layers.append(nn.Dropout(dropout))
                layers.append(nn.ReLU())
                
                
                
            else:
                layers.append(nn.Linear(hidden_size, hidden_size))
                layers.append(nn.BatchNorm1d(hidden_size))
                layers.append(nn.Dropout(dropout))
                layers.append(nn.ReLU())
                
                
        layers.append(nn.Linear(hidden_size, n_targets))
        layers.append(nn.ReLU())
        layers.append(nn.BatchNorm1d(n_targets))
        
        self.model = nn.Sequential(*layers)
        

    def forward(self, x):
        return self.model(x)
    
    
    
        
        
        

In [None]:
class Core:
    
    def __init__(self, model, optimizer, device):
        self.model = model
        self.optimizer = optimizer
        
        self.device = device
    @staticmethod
    def loss_fn(outputs, targets):
     
        return nn.BCEWithLogitsLoss()(outputs, targets)

    def train_fn(self, data_loader):
        self.model.train()
        lossess=0
        correct = 0
        for d in data_loader:
            self.optimizer.zero_grad()
            inputs = d["x"].to(self.device)
            targets = d["y"].to(self.device)
            outputs = self.model(inputs)
            
            loss = self.loss_fn(outputs, targets)
            loss.backward()
            self.optimizer.step()
            lossess += loss.item()
        return lossess/len(data_loader)


    def eval_fn(self, data_loader):
        self.model.eval()
        lossess=0
        for d in data_loader:
        
            inputs = d["x"].to(self.device)
            targets = d["y"].to(self.device)
            outputs = self.model(inputs)

            loss = self.loss_fn(outputs, targets)

            lossess += loss.item()
        
        return lossess/len(data_loader)

In [None]:
def main_fn(fold, save_model=False):
    feature_columns = train.drop(['sig_id'], axis=1).columns
    targets_columns = target_scored.drop(["sig_id"], axis=1).columns
    df = train.merge(target_scored, on="sig_id", how="left")
    train_df = df[df.kfold != fold].reset_index(drop=True)
    valid_df = df[df.kfold == fold].reset_index(drop=True)
    xtrain = train_df[feature_columns].to_numpy()
    ytrain = train_df[targets_columns].to_numpy()
    xvalid = valid_df[feature_columns].to_numpy()
    yvalid = valid_df[targets_columns].to_numpy()
    # Dataset
    train_dataset = RKDataset(inputs=xtrain, targets=ytrain)
    valid_dataset = RKDataset(inputs=xvalid, targets=yvalid)
    # DataLoader
    train_data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=TRAIN_BATCH_SIZE, num_workers=8, shuffle=True
    )
    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=VALID_BATCH_SIZE, num_workers=8
    )
   
    # Model
    model = FeedNetModel(
        n_features=xtrain.shape[1],
        n_targets=ytrain.shape[1],
        n_layers=128,
        hidden_size=1024,
        dropout=0.2
    )
    model.to(device)
    
    print(model)
    # optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
#     SGD is slower than Adam
#     optimizer=torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    
    
#     scheduler = ReduceLROnPlateau(
#         optimizer, mode='min', factor=0.1, patience=10, verbose=True)
    core = Core(model, optimizer, device)
    best_loss = np.inf
    early_stopping_iter = 10
    early_stopping_counter = 0
    
    for epoch in range(EPOCHS):
        print(f'Fold {fold}')
        print(f'Epoch {epoch + 1}/{EPOCHS}')
        print('-' * 10)

        train_loss = core.train_fn(train_data_loader)
        print(f'Train loss {train_loss}')

        val_loss = core.eval_fn(valid_data_loader)
        print(f'Val loss {val_loss}')
        print()
      
        if val_loss < best_loss:
            best_loss = val_loss
            if save_model:
                torch.save(model.state_dict(), f'model_{fold}.bin')
        else:
            early_stopping_counter += 1
        if early_stopping_counter > early_stopping_iter:
            break
    return best_loss

In [None]:
all_losses = []
for f in range(5):
    temp_loss = main_fn(f, save_model=False)
    all_losses.append(temp_loss)
print(f"Mean Lossses:- {np.mean(all_losses)}")