## About

Few parts of this book is based on,

* https://www.kaggle.com/namanj27/new-baseline-pytorch-moa
* https://www.kaggle.com/felipebihaiek/prediction-with-swap-auto-encoder-features-0-01865
* https://www.kaggle.com/frtgnn/introduction-to-pytorch-a-very-gentle-start

In [None]:
!pip install -r /kaggle/input/python-library/wheelhouse/requirements.txt --no-index --find-links /kaggle/input/python-library/wheelhouse

In [None]:
import os
import random

import numpy as np
import pandas as pd
import torch
import torch_optimizer
import lambda_networks
import torchsummary
import iterstrat

print(np.__version__)
print(pd.__version__)
print(torch.__version__)
print(torch_optimizer.__version__)
# print(lambda_networks.__version__)
# print(torchsummary.__version__)
print(iterstrat.__version__)

In [None]:
SEED = 42
SEEDS = [4, SEED, 6, 89]

def set_seed(seed):
    os.environ['PYTHONHASHSEED']=str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
set_seed(SEED)

N_EPOCHS = 250
N_FOLDS = 10
BATCH_SIZE = 128

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

# Load dataset

In [None]:
X = pd.read_csv('/kaggle/input/lish-moa/train_features.csv')
y = pd.read_csv('/kaggle/input/lish-moa/train_targets_scored.csv')
X_test = pd.read_csv('/kaggle/input/lish-moa/test_features.csv')

In [None]:
df_submission = pd.DataFrame(columns=y.columns)
df_submission['sig_id'] = X_test['sig_id']

# Preprocess

## Preprocess - One Hot Encode

In [None]:
del X['sig_id']
del y['sig_id']
del X_test['sig_id']

In [None]:
cat_feature = ['cp_type', 'cp_dose']
for cat in cat_feature:
    val_list = X[cat].unique()
    for val in val_list:
        X[f'{cat}_{val}'] = X[cat].apply(lambda i:i == val)
        X_test[f'{cat}_{val}'] = X_test[cat].apply(lambda i:i == val)
        
    del X[cat]
    del X_test[cat]
    
print(X.columns[1:773])
print(X.columns[773:873])

## Preprocess - Convert to Tensor

In [None]:
'''
1. Convert DataFrame data type to float32
2. Convert DataFrame to Numpy Array
3. Convert Numpy Array to Torch Tensor
'''

X = torch.tensor(X.astype(np.float32).to_numpy())
y = torch.tensor(y.astype(np.float32).to_numpy())
X_test = torch.tensor(X_test.astype(np.float32).to_numpy())

In [None]:
class MoADataset(torch.utils.data.Dataset):
    def __init__(self, X, y=None):
        if y is not None:
            assert X.shape[0] == y.shape[0]

        self.X = X
        self.y = y
        
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        if self.y is not None:
            return self.X[idx], self.y[idx]
        else:
            return self.X[idx]

In [None]:
train_ds = MoADataset(X, y)
test_ds = MoADataset(X_test)

# Autoencoder

## Autoencoder - Train function

In [None]:
def train_ae(mode, whole_dl):
    # Loss
    train_loss = 0
    lowest_loss = np.Inf
    
    last_epoch_with_lowest_loss = 0

    # Net
    if mode == 'AE_Gene':
        print('Train autoencoder gene!')
        net = AE_Gene_Net().float().to(device)
        criterion = nn.MSELoss()
        optimizer = torch_optimizer.RAdam(net.parameters(), lr=0.002, weight_decay=0.0000125)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer=optimizer, factor=0.5, patience=8, min_lr = 0.00005, verbose=True
        )
    else:
        print('Train autoencoder cell!')
        net = AE_Cell_Net().float().to(device)
        criterion = nn.MSELoss()
        optimizer = torch_optimizer.RAdam(net.parameters(), lr=0.002, weight_decay=0.00001)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer=optimizer, factor=0.25, patience=11, min_lr = 0.000025, verbose=True
        )

    # Train
    net.train()
    for epoch in range(N_EPOCHS):
        for X_local in whole_dl:
            X_local = X_local.to(device)

            optimizer.zero_grad()
            output = net(X_local)

            loss = criterion(output, X_local)          
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        # post
        train_loss /= len(whole_dl)
        scheduler.step(train_loss)

        print(f'Epoch: {epoch+1:02d}/{N_EPOCHS:02d} | Train loss: {train_loss:.08f}{" (Saving model state!)" if train_loss < lowest_loss else ""}')
        if train_loss < lowest_loss:
            torch.save(net.state_dict(), f'./model_{mode}.state_dict')
            lowest_loss = train_loss
            last_epoch_with_lowest_loss = epoch
        if last_epoch_with_lowest_loss + 50 < epoch:
            print('Early stopping!')
            break

        train_loss = 0
    
    if mode == 'AE_Gene':
        net = AE_Gene_Net().float().to(device)
    else:
        net = AE_Cell_Net().float().to(device)
    net.load_state_dict(torch.load(f'./model_{mode}.state_dict'))
        
    return net, lowest_loss

## Autoencoder - Model

In [None]:
from torch import nn
from torch.nn import functional as F
from torchsummary import summary

class AE_Gene_Net(nn.Module):
    def __init__(self):
        super().__init__()
        
        # start
        self.encoder = nn.Sequential(
            nn.BatchNorm1d(772),
            nn.utils.weight_norm(nn.Linear(772, 541)),
            nn.ReLU6(),

            nn.BatchNorm1d(541),
            nn.utils.weight_norm(nn.Linear(541, 309)),
            nn.ReLU6(),
        )
        self.decoder = nn.Sequential(
            nn.BatchNorm1d(309),
            nn.utils.weight_norm(nn.Linear(309, 541)),
            nn.ReLU6(),

            nn.BatchNorm1d(541),
            nn.utils.weight_norm(nn.Linear(541, 772)),
        )

    def forward(self, x, mode='reconstruct'):
        x = self.encoder(x)
        if mode == 'reconstruct':
            x = self.decoder(x)
        
        return x
    
ae_gene_net = AE_Gene_Net().float().to(device)
print(ae_gene_net)
summary(ae_gene_net, (772, ))

In [None]:
class AE_Cell_Net(nn.Module):
    def __init__(self):
        super().__init__()
        
        # start
        self.encoder = nn.Sequential(
            nn.BatchNorm1d(100),
            nn.utils.weight_norm(nn.Linear(100, 70)),
            nn.ReLU6(),

            nn.BatchNorm1d(70),
            nn.utils.weight_norm(nn.Linear(70, 40)),
            nn.ReLU6(),
        )
        self.decoder = nn.Sequential(
            nn.BatchNorm1d(40),
            nn.utils.weight_norm(nn.Linear(40, 70)),
            nn.ReLU6(),

            nn.BatchNorm1d(70),
            nn.utils.weight_norm(nn.Linear(70, 100)),
        )

    def forward(self, x, mode='reconstruct'):
        x = self.encoder(x)
        if mode == 'reconstruct':
            x = self.decoder(x)
        
        return x
    
ae_cell_net = AE_Cell_Net().float().to(device)
print(ae_cell_net)
summary(ae_cell_net, (100, ))

## Autoencoder - Train AE Gene

In [None]:
X_whole = torch.cat((X[:, 1:773], X_test[:, 1:773]), 0)

whole_ds = MoADataset(X_whole)
whole_dl = torch.utils.data.DataLoader(whole_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)

ae_gene_net, ae_gene_min_loss = train_ae('AE_Gene', whole_dl)

## Autoencoder - Train AE Cell

In [None]:
X_whole = torch.cat((X[:, 773:873], X_test[:, 773:873]), 0)

whole_ds = MoADataset(X_whole)
whole_dl = torch.utils.data.DataLoader(whole_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)

ae_cell_net, ae_cell_min_loss = train_ae('AE_Cell', whole_dl)

## Autoencoder - Transform Dataset

In [None]:
X_AE_Gene = torch.zeros([X.shape[0], 309])
X_AE_Cell = torch.zeros([X.shape[0], 40])

X_test_AE_Gene = torch.zeros([X_test.shape[0], 309])
X_test_AE_Cell = torch.zeros([X_test.shape[0], 40])

with torch.no_grad():
    for i in range(0, X.shape[0], BATCH_SIZE):
        start = i
        if start+BATCH_SIZE >= X.shape[0]:
            end = X.shape[0]
        else:
            end = start+BATCH_SIZE

        X_AE_Gene[start:end] = ae_gene_net(X[start:end, 1:773].to(device), mode='').cpu()
        X_AE_Cell[start:end] = ae_cell_net(X[start:end, 773:873].to(device), mode='').cpu()
    for i in range(0, X_test.shape[0], BATCH_SIZE):
        start = i
        if start+BATCH_SIZE >= X_test.shape[0]:
            end = X_test.shape[0]
        else:
            end = start+BATCH_SIZE

        X_test_AE_Gene[start:end] = ae_gene_net(X_test[start:end, 1:773].to(device), mode='').cpu()
        X_test_AE_Cell[start:end] = ae_cell_net(X_test[start:end, 773:873].to(device), mode='').cpu()

X = torch.cat((X, X_AE_Gene, X_AE_Cell), 1)
X_test = torch.cat((X_test, X_test_AE_Gene, X_test_AE_Cell), 1)

## Autoencoder - Feature Selection

In [None]:
from sklearn.feature_selection import VarianceThreshold

X_length, X_test_length = X.shape[0], X_test.shape[0]
X_temp = torch.cat((X, X_test), 0)
print('Shape before:', X_temp.shape)

selector = VarianceThreshold(0.5)
X_temp = selector.fit_transform(X_temp)
X_temp = torch.tensor(X_temp)
print('Shape after:', X_temp.shape)

X = X_temp[0:X_length]
X_test = X_temp[X_length:X_length+X_test_length]

# MLP

## MLP - Train function

In [None]:
def train_mlp(train_dl, val_dl, seed, fold):
    # Loss
    train_loss = 0
    val_loss = 0
    lowest_val_loss = np.Inf
    
    last_epoch_with_lowest_val_loss = 0

    # Net
    net = MLPNet().float().to(device)
    criterion = nn.BCELoss()
#     optimizer = torch_optimizer.RAdam(net.parameters(), lr=0.001, weight_decay=0.00002)
#     scheduler = torch.optim.lr_scheduler.OneCycleLR(
#         optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
#         max_lr=0.01, epochs=N_EPOCHS, steps_per_epoch=len(train_dl)
#     )
    optimizer = torch_optimizer.AdaBelief(net.parameters(), lr=0.1, weight_decay=0.00002, rectify=False, weight_decouple=False)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
        max_lr=1.0, epochs=N_EPOCHS, steps_per_epoch=len(train_dl)
    )

    # Train/Eval
    for epoch in range(N_EPOCHS):
        # Train
        net.train()
        for X_local, y_local in train_dl:
            X_local = X_local.to(device)
            y_local = y_local.to(device)

            optimizer.zero_grad()
            output = net(X_local)

            loss = criterion(output, y_local)                
            loss.backward()
            optimizer.step()
            scheduler.step()

            train_loss += loss.item()

        # Val
        net.eval()
        for X_local, y_local in val_dl:
            X_local = X_local.to(device)
            y_local = y_local.to(device)

            output = net(X_local)
            loss = criterion(output, y_local)   

            val_loss += loss.item()

        # post
        train_loss /= len(train_dl)
        val_loss /= len(val_dl)
#         scheduler.step(val_loss)

        print(f'Epoch: {epoch+1:02d}/{N_EPOCHS:02d} | Train loss: {train_loss:.08f} - Val loss: {val_loss:.08f}{" (Saving model state!)" if val_loss < lowest_val_loss else ""}')
        if val_loss < lowest_val_loss:
            torch.save(net.state_dict(), f'./model_mlp_{seed:02d}_{fold:02d}.state_dict')
            lowest_val_loss = val_loss
            last_epoch_with_lowest_val_loss = epoch
        if last_epoch_with_lowest_val_loss + 30 < epoch:
            print('Early stopping!')
            # stop training
            break

        train_loss = 0
        val_loss = 0

    net = MLPNet().float().to(device)
    net.load_state_dict(torch.load(f'./model_mlp_{seed:02d}_{fold:02d}.state_dict'))
        
    return net, lowest_val_loss

## MLP - Model

In [None]:
class MLPNet(nn.Module):
    def __init__(self):
        super().__init__()
        
        # start
        self.bn1 = nn.BatchNorm1d(X.shape[1])
        self.drop1 = nn.Dropout(0.2)
        self.fc1 = nn.utils.weight_norm(nn.Linear(X.shape[1], 1280))

        self.bn2 = nn.BatchNorm1d(1280)
        self.drop2 = nn.Dropout(0.5)
        self.fc2 = nn.utils.weight_norm(nn.Linear(1280, 1280))


        self.bn3 = nn.BatchNorm1d(1280)
        self.drop3 = nn.Dropout(0.5)
        self.fc3 = nn.utils.weight_norm(nn.Linear(1280, y.shape[1]))

        # generic
        self.relu = nn.ReLU6()
        self.sigmoid = nn.Sigmoid()
        

    def forward(self, x):
        x = self.bn1(x)
        x = self.drop1(x)
        x = self.fc1(x)
        x = self.relu(x)

        x = self.bn2(x)
        x = self.drop2(x)
        x = self.fc2(x)
        x = self.relu(x)

        x = self.bn3(x)
        x = self.drop3(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        
        return x
    
mlp_net = MLPNet().float().to(device)
print(mlp_net)
summary(mlp_net, (X.shape[1], ))

## MLP - Train

In [None]:
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

mlp_nets = []
mlp_min_losses = []

for seed in SEEDS:
    set_seed(seed)
    skf = MultilabelStratifiedKFold(n_splits=N_FOLDS, random_state=seed, shuffle=True)

    for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
        print(f'Seed: {seed}, fold: {fold+1}/{N_FOLDS}')

        X_train, y_train = X[train_idx], y[train_idx]
        train_ds = MoADataset(X_train, y_train)
        train_dl = torch.utils.data.DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)

        X_val, y_val = X[val_idx], y[val_idx]
        val_ds = MoADataset(X_val, y_val)
        val_dl = torch.utils.data.DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

        mlp_net, mlp_min_loss = train_mlp(train_dl, val_dl, seed, fold)
        mlp_nets.append(mlp_net)
        mlp_min_losses.append(mlp_min_loss)
                
        print('='*82)
print('Training Ended! ')

# Post-training

In [None]:
print(f'AE Gene loss: {ae_gene_min_loss}')
print(f'AE Cell loss: {ae_cell_min_loss}')

print('MLP CV min loss:')
[print(l) for l in mlp_min_losses]

for i in range(len(SEEDS)):
    print(f'MLP average CV min loss (SEED {SEEDS[i]}): {sum(mlp_min_losses[i*N_FOLDS:i*N_FOLDS+N_FOLDS])/N_FOLDS}')
print(f'MLP average CV min loss: {sum(mlp_min_losses)/len(mlp_min_losses)}')

In [None]:
with torch.no_grad():
    for i in range(0, X_test.shape[0], BATCH_SIZE):
        start = i
        if start+BATCH_SIZE >= X_test.shape[0]:
            end = X_test.shape[0]
        else:
            end = start+BATCH_SIZE

        test_results = torch.zeros([end-start, y.shape[1]])
        for mlp_net in mlp_nets:
            test_results += mlp_net(X_test[start:end].to(device)).cpu()
        test_results /= int(N_FOLDS * len(SEEDS))

        df_submission.iloc[start:end, 1:] = test_results

In [None]:
df_submission

In [None]:
df_submission.to_csv('submission.csv', index=False)