In [None]:
import sys
sys.path.append('../input/iterative-stratification/iterative-stratification-master')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

In [None]:
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import os
import copy
import seaborn as sns

from sklearn import preprocessing
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import warnings
warnings.filterwarnings('ignore')

import xgboost
from xgboost import XGBClassifier
from sklearn.model_selection import KFold
from sklearn import model_selection
from category_encoders import CountEncoder
from sklearn.pipeline import Pipeline


from sklearn.multioutput import MultiOutputClassifier

import sklearn
import warnings
from sklearn.preprocessing import QuantileTransformer

In [None]:
train_features = pd.read_csv('../input/lish-moa/train_features.csv')
train_targets_scored = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
train_targets_nonscored = pd.read_csv('../input/lish-moa/train_targets_nonscored.csv')

test_features = pd.read_csv('../input/lish-moa/test_features.csv')
sample_submission = pd.read_csv('../input/lish-moa/sample_submission.csv')

GENES = [col for col in train_features.columns if col.startswith('g-')]
CELLS = [col for col in train_features.columns if col.startswith('c-')]
# %% [markdown]
# **train set before using RankGauss**
# It may be a too simple idea, it appears that the gene expression data and cell viability data 、
# can be controlled by the experimenter, so it is safe to assume that these data are independent of each other.
# 
# Also, since the shape of the distribution is close to normal distribution to begin with, 、
# I don't think there is much of a problem if it is forced to be transformed into a Gaussian distribution.
# %% [code]
#RankGauss
for col in (GENES + CELLS):
    transformer = QuantileTransformer(n_quantiles=100,random_state=0, output_distribution="normal")
    vec_len = len(train_features[col].values)
    vec_len_test = len(test_features[col].values)
    raw_vec = train_features[col].values.reshape(vec_len, 1)
    test_vec = test_features[col].values.reshape(vec_len_test, 1)
    transformer.fit(raw_vec)

    train_features[col] = transformer.transform(raw_vec).reshape(1, vec_len)[0]
    test_features[col] = transformer.transform(test_vec).reshape(1, vec_len_test)[0]
# %% [markdown]
# We can confirme that the shapes of data got close to the normal distribution.
# 
# **train set after using RankGauss**
# %% [code]
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)
# %% [markdown]
# It appears that we were able to transform the distribution of each data to resemble a normal distribution, as intended.
# 
# So, let's enter the data into the benchmarking method to see the improvement.

# # PCA features + Existing features

In [None]:
n_comp = 600  #<--Update

data = pd.concat([pd.DataFrame(train_features[GENES]), pd.DataFrame(test_features[GENES])])
data2 = (PCA(n_components=n_comp, random_state=42).fit_transform(data[GENES]))
train2 = data2[:train_features.shape[0]]; test2 = data2[-test_features.shape[0]:]

train2 = pd.DataFrame(train2, columns=[f'pca_G-{i}' for i in range(n_comp)])
test2 = pd.DataFrame(test2, columns=[f'pca_G-{i}' for i in range(n_comp)])

# drop_cols = [f'c-{i}' for i in range(n_comp,len(GENES))]
train_features = pd.concat((train_features, train2), axis=1)
test_features = pd.concat((test_features, test2), axis=1)

n_comp = 50  #<--Update

data = pd.concat([pd.DataFrame(train_features[CELLS]), pd.DataFrame(test_features[CELLS])])
data2 = (PCA(n_components=n_comp, random_state=42).fit_transform(data[CELLS]))
train2 = data2[:train_features.shape[0]]; test2 = data2[-test_features.shape[0]:]

train2 = pd.DataFrame(train2, columns=[f'pca_C-{i}' for i in range(n_comp)])
test2 = pd.DataFrame(test2, columns=[f'pca_C-{i}' for i in range(n_comp)])

# drop_cols = [f'c-{i}' for i in range(n_comp,len(CELLS))]
train_features = pd.concat((train_features, train2), axis=1)
test_features = pd.concat((test_features, test2), axis=1)

print(train_features.shape)
# %% [code]
# # feature Selection using Variance Encoding
from sklearn.feature_selection import VarianceThreshold
var_thresh = VarianceThreshold(0.8)  #<-- Update
data = train_features.append(test_features)

feature_cols = train_features.columns.values[4:]
data_transformed = var_thresh.fit_transform(data.iloc[:, 4:])
train_features_transformed = data_transformed[ : train_features.shape[0]]
test_features_transformed = data_transformed[-test_features.shape[0] : ]

train_features_str = pd.DataFrame(train_features[['sig_id','cp_type','cp_time','cp_dose']].values.reshape(-1, 4),\
                              columns=['sig_id','cp_type','cp_time','cp_dose'])
    
train_features_new = pd.concat([train_features_str,pd.DataFrame(train_features_transformed, columns=feature_cols[var_thresh.get_support()])], axis=1)
test_features_str = pd.DataFrame(test_features[['sig_id','cp_type','cp_time','cp_dose']].values.reshape(-1, 4),\
                             columns=['sig_id','cp_type','cp_time','cp_dose'])
test_features_new = pd.concat([test_features_str,pd.DataFrame(test_features_transformed, columns=feature_cols[var_thresh.get_support()])], axis=1)

# print(train_features_new.shape)
# %% [code]
train_all = train_features_new.merge(train_targets_scored, on='sig_id')
trt_cp_index = train_all[train_all['cp_type']!='ctl_vehicle'].index
ctl_vehicle_index = train_all[train_all['cp_type']=='ctl_vehicle'].index
train_crl = train_all[train_all['cp_type']=='ctl_vehicle'].reset_index(drop=True)
train_trt = train_all[train_all['cp_type']!='ctl_vehicle'].reset_index(drop=True)
train_nc_all = train_features_new.merge(train_targets_nonscored, on='sig_id')
train_nc_crl = train_nc_all[train_nc_all['cp_type']=='ctl_vehicle'].reset_index(drop=True)
train_nc_trt = train_nc_all[train_nc_all['cp_type']!='ctl_vehicle'].reset_index(drop=True)

test_all = test_features_new.merge(sample_submission,on='sig_id')
test_crl = test_all[test_all['cp_type']=='ctl_vehicle'].reset_index(drop=True)
test_trt = test_all[test_all['cp_type']!='ctl_vehicle'].reset_index(drop=True)

target = train_trt[train_targets_scored.columns]
train_trt = train_trt.drop('cp_type', axis=1)
test_trt = test_trt.drop('cp_type', axis=1)
target_cols = target.drop('sig_id', axis=1).columns.values.tolist()
#%%
folds = train_trt.copy()

mskf = MultilabelStratifiedKFold(n_splits=7)

for f, (t_idx, v_idx) in enumerate(mskf.split(X=train_trt, y=target)):
    folds.loc[v_idx, 'kfold'] = int(f)

folds['kfold'] = folds['kfold'].astype(int)

print(train_trt.shape)
print(folds.shape)
print(test_trt.shape)
print(target.shape)
print(sample_submission.shape)
# %% [code]
# # Preprocessing steps
def process_data(data):
    data = pd.get_dummies(data, columns=['cp_time','cp_dose'])
    return data
# %% [code]
# # CV folds
feature_cols = [c for c in process_data(folds).columns if c not in target_cols]
feature_cols = [c for c in feature_cols if c not in ['kfold','sig_id']]
len(feature_cols)

gen_cols = [c for c in feature_cols if 'g-' == c[:2]]
cell_cols = [c for c in feature_cols if 'c-' == c[:2]]
pca_gen_cols = [c for c in feature_cols if 'pca_G' == c[:5]]
pca_cell_cols = [c for c in feature_cols if 'pca_C' == c[:5]]
ohe_cols = [c for c in feature_cols if c not in gen_cols+cell_cols+pca_gen_cols+pca_cell_cols]
print(len(gen_cols), len(cell_cols), len(pca_gen_cols), len(pca_cell_cols),len(ohe_cols))


# HyperParameters
DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
EPOCHS = 25
BATCH_SIZE = 128
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-5
NFOLDS = 7           
EARLY_STOPPING_STEPS = 10
EARLY_STOP = False

num_features=len(feature_cols)
num_targets=len(target_cols)
hidden_size=1500

In [None]:
# %% [code]
# # Dataset Classes
class MoADataset:
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float),
            'y' : torch.tensor(self.targets[idx, :], dtype=torch.float)            
        }
        return dct
    
class TestDataset:
    def __init__(self, features):
        self.features = features
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float)
        }
        return dct

def train_fn(model, optimizer, scheduler, loss_fn, dataloader, device):
    model.train()
    final_loss = 0
    
    for data in dataloader:
        optimizer.zero_grad()
        inputs, targets = data['x'].to(device), data['y'].to(device)
#         print(inputs.shape)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        final_loss += loss.item()
        
    final_loss /= len(dataloader)
    
    return final_loss

def valid_fn(model, loss_fn, dataloader, device):
    model.eval()
    final_loss = 0
    valid_preds = []
    
    for data in dataloader:
        inputs, targets = data['x'].to(device), data['y'].to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        
        final_loss += loss.item()
        valid_preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    final_loss /= len(dataloader)
    valid_preds = np.concatenate(valid_preds)
    
    return final_loss, valid_preds

def inference_fn(model, dataloader, device):
    model.eval()
    preds = []
    
    for data in dataloader:
        inputs = data['x'].to(device)

        with torch.no_grad():
            outputs = model(inputs)
        
        preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    preds = np.concatenate(preds)
    
    return preds
import torch
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

class Model(nn.Module):
    def __init__(self, num_features, num_targets, hidden_size):
        super(Model, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, hidden_size))
        
        self.batch_norm2 = nn.BatchNorm1d(hidden_size)
        self.dropout2 = nn.Dropout(0.2619422201258426)
        self.dense2 = nn.utils.weight_norm(nn.Linear(hidden_size, hidden_size))
        
        self.batch_norm3 = nn.BatchNorm1d(hidden_size)
        self.dropout3 = nn.Dropout(0.2619422201258426)
        self.dense3 = nn.utils.weight_norm(nn.Linear(hidden_size, num_targets))
    
    def forward(self, x):
        x = self.batch_norm1(x)
        x = F.leaky_relu(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = F.leaky_relu(self.dense2(x))
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = self.dense3(x)
        
        return x

class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.0, dim=-1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            # true_dist = pred.data.clone()
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))    

def run_training(fold, seed):
    

       
    seed_everything(seed)
    
    train = process_data(folds)
    test_ = process_data(test_trt)
    
    trn_idx = train[train['kfold'] != fold].index
    val_idx = train[train['kfold'] == fold].index
    
    train_df = train[train['kfold'] != fold].reset_index(drop=True)
    valid_df = train[train['kfold'] == fold].reset_index(drop=True)
    
    x_train, y_train  = train_df[feature_cols].values, train_df[target_cols].values
    x_valid, y_valid =  valid_df[feature_cols].values, valid_df[target_cols].values
    
    train_dataset = MoADataset(x_train, y_train)
    valid_dataset = MoADataset(x_valid, y_valid)
    trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    model = Model(
        num_features=num_features,
        num_targets=num_targets,
        hidden_size=hidden_size,
    )
    
    model.to(DEVICE)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
                                              max_lr=1e-2, epochs=EPOCHS, steps_per_epoch=len(trainloader))
    
    loss_fn = nn.BCEWithLogitsLoss()
    loss_tr = SmoothBCEwLogits(smoothing =0.001)
    
    early_stopping_steps = EARLY_STOPPING_STEPS
    early_step = 0
   
    oof = np.zeros((len(train), target.iloc[:, 1:].shape[1]))
    best_loss = np.inf
    
    for epoch in range(EPOCHS):
        
        train_loss = train_fn(model, optimizer,scheduler, loss_tr, trainloader, DEVICE)
        print(f"FOLD: {fold}, EPOCH: {epoch}, train_loss: {train_loss}")
        valid_loss, valid_preds = valid_fn(model, loss_fn, validloader, DEVICE)
        print(f"FOLD: {fold}, EPOCH: {epoch}, valid_loss: {valid_loss}")
        
        if valid_loss < best_loss:
            
            best_loss = valid_loss
            oof[val_idx] = valid_preds
            torch.save(model.state_dict(), f"FOLD{fold}_.pth")
        
        elif(EARLY_STOP == True):
            
            early_step += 1
            if (early_step >= early_stopping_steps):
                break
            
    
    #--------------------- PREDICTION---------------------
    x_test = test_[feature_cols].values
    testdataset = TestDataset(x_test)
    testloader = torch.utils.data.DataLoader(testdataset, batch_size=BATCH_SIZE, shuffle=False)
    
    model = Model(
        num_features=num_features,
        num_targets=num_targets,
        hidden_size=hidden_size,

    )
    
    model.load_state_dict(torch.load(f"FOLD{fold}_.pth"))
    model.to(DEVICE)
    
    predictions = np.zeros((len(test_), target.iloc[:, 1:].shape[1]))
    predictions = inference_fn(model, testloader, DEVICE)
    
    return oof, predictions

def run_k_fold(NFOLDS, seed):
    oof = np.zeros((len(folds), len(target_cols)))
    predictions = np.zeros((len(test_trt), len(target_cols)))
    
    for fold in range(NFOLDS):
        oof_, pred_ = run_training(fold, seed)
        
        predictions += pred_ / NFOLDS
        oof += oof_
        
    return oof, predictions

In [None]:
class Model_MH(nn.Module):
    def __init__(self, num_gen_features, num_cell_features,num_targets, hidden_size):
        super(Model_MH, self).__init__()
        self.gen_batch_norm1 = nn.BatchNorm1d(num_gen_features)
        self.gen_dropout1 = nn.Dropout(0.2)
        self.gen_dense1 = nn.utils.weight_norm(nn.Linear(num_gen_features, hidden_size))
        self.gen_batch_norm2 = nn.BatchNorm1d(hidden_size)
        self.gen_dropout2 = nn.Dropout(0.3)
        self.gen_dense2 = nn.utils.weight_norm(nn.Linear(hidden_size, hidden_size))
        
        self.cell_batch_norm1 = nn.BatchNorm1d(num_cell_features)
        self.cell_dropout1 = nn.Dropout(0.2)
        self.cell_dense1 = nn.utils.weight_norm(nn.Linear(num_cell_features, int(hidden_size/2)))
        self.cell_batch_norm2 = nn.BatchNorm1d(int(hidden_size/2))
        self.cell_dropout2 = nn.Dropout(0.3)
        self.cell_dense2 = nn.utils.weight_norm(nn.Linear(int(hidden_size/2), int(hidden_size/2)))
        
        self.batch_norm3 = nn.BatchNorm1d(hidden_size + int(hidden_size/2) )
        self.dropout3 = nn.Dropout(0.25)
        self.dense3 = nn.utils.weight_norm(nn.Linear(hidden_size + int(hidden_size/2), num_targets))
        
        
    
    def forward(self, gen_x, cell_x):
#         print(cell_x.shape)
        gen_x = self.gen_batch_norm1(gen_x)
        gen_x = self.gen_dropout1(gen_x)
        gen_x = F.relu(self.gen_dense1(gen_x))
        gen_x = self.gen_batch_norm2(gen_x)
        gen_x = self.gen_dropout2(gen_x)
        gen_x = F.relu(self.gen_dense2(gen_x))

        cell_x = self.cell_batch_norm1(cell_x)
        cell_x = self.cell_dropout1(cell_x)
        cell_x = F.relu(self.cell_dense1(cell_x))
        cell_x = self.cell_batch_norm2(cell_x)
        cell_x = self.cell_dropout2(cell_x)
        cell_x = F.relu(self.cell_dense2(cell_x))
                
        x = torch.cat((gen_x,cell_x),dim=1)
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = self.dense3(x)
        return x    

class MoADataset_MH:
    def __init__(self, gen_features, cell_features, targets):
        self.gen_features = gen_features
        self.cell_features = cell_features
        self.targets = targets
        
    def __len__(self):
        return (self.gen_features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'gen_x' : torch.tensor(self.gen_features[idx, :], dtype=torch.float),
            'cell_x' : torch.tensor(self.cell_features[idx, :], dtype=torch.float),
            'y' : torch.tensor(self.targets[idx, :], dtype=torch.float)            
        }
        return dct
    
class TestDataset_MH:
    
    def __init__(self, gen_features, cell_features):
        self.gen_features = gen_features
        self.cell_features = cell_features
        
    def __len__(self):
        return (self.gen_features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'gen_x' : torch.tensor(self.gen_features[idx, :], dtype=torch.float),
            'cell_x' : torch.tensor(self.cell_features[idx, :], dtype=torch.float)  
        }
        return dct

import time
def train_fn_MH(model, optimizer, scheduler, loss_fn, dataloader, device):
    model.train()
    final_loss = 0
    
    for data in dataloader:
        optimizer.zero_grad()
        gen_x, cell_x, targets = data['gen_x'].to(device), data['cell_x'].to(device), data['y'].to(device)
        outputs = model(gen_x, cell_x)
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        final_loss += loss.item()
        
    final_loss /= len(dataloader)
    
    return final_loss

def valid_fn_MH(model, loss_fn, dataloader, device):
    model.eval()
    final_loss = 0
    valid_preds = []
    
    for data in dataloader:
        gen_x, cell_x, targets = data['gen_x'].to(device), data['cell_x'].to(device),  data['y'].to(device)
        outputs = model(gen_x, cell_x)
        loss = loss_fn(outputs, targets)
        
        final_loss += loss.item()
        valid_preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    final_loss /= len(dataloader)
    valid_preds = np.concatenate(valid_preds)
    
    return final_loss, valid_preds

def inference_fn_MH(model, dataloader, device):
    model.eval()
    preds = []
    
    for data in dataloader:
        gen_x, cell_x = data['gen_x'].to(device), data['cell_x'].to(device)

        with torch.no_grad():
            outputs = model(gen_x, cell_x)
        
        preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    preds = np.concatenate(preds)
    
    return preds

def run_training_MH(fold, seed):
    
    seed_everything(seed)
    
    train = process_data(folds)
    test_ = process_data(test_trt)
    
    trn_idx = train[train['kfold'] != fold].index
    val_idx = train[train['kfold'] == fold].index

    train_df = train[train['kfold'] != fold].reset_index(drop=True)
    valid_df = train[train['kfold'] == fold].reset_index(drop=True)

    gen_x_train, cell_x_train, y_train  = train_df[gen_cols + pca_gen_cols + ohe_cols].values, train_df[cell_cols + pca_cell_cols + ohe_cols].values, train_df[target_cols].values
    gen_x_valid, cell_x_valid, y_valid =  valid_df[gen_cols + pca_gen_cols + ohe_cols].values, valid_df[cell_cols + pca_cell_cols + ohe_cols].values, valid_df[target_cols].values
    #     print("len target_cols", len(target_cols), train_df[target_cols].shape, train_df[target_cols].reset_index(drop=True).shape)
    #     print("shapes", x_train.shape, y_train.shape, x_valid.shape, y_valid.shape)

    train_dataset = MoADataset_MH(gen_x_train, cell_x_train, y_train)
    valid_dataset = MoADataset_MH(gen_x_valid, cell_x_valid, y_valid)
    trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
#     print('# of target cols', len(target_cols))
    
    model = Model_MH(
        num_gen_features=len(gen_cols + pca_gen_cols + ohe_cols),
        num_cell_features=len(cell_cols + pca_cell_cols + ohe_cols),
        num_targets=len(target_cols),
        hidden_size=hidden_size
    )
    
    model.to(DEVICE)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
                                              max_lr=1e-2, epochs=EPOCHS, steps_per_epoch=len(trainloader))
    
    loss_fn = nn.BCEWithLogitsLoss()
    
    early_stopping_steps = EARLY_STOPPING_STEPS
    early_step = 0
    
    oof = np.zeros((len(train), target.iloc[:, 1:].shape[1]))
    best_loss = np.inf
    
    for epoch in range(EPOCHS):
        start = time.time()
        train_loss = train_fn_MH(model, optimizer,scheduler, loss_fn, trainloader, DEVICE)
        print(f"FOLD: {fold}, EPOCH: {epoch}, train_loss: {train_loss}")
        valid_loss, valid_preds = valid_fn_MH(model, loss_fn, validloader, DEVICE)
        elapse = time.time() - start
        print(f"FOLD: {fold}, EPOCH: {epoch}, valid_loss: {valid_loss} {elapse:.2f} seconds")
        
        if valid_loss < best_loss:
            
            best_loss = valid_loss
            oof[val_idx] = valid_preds
            torch.save(model.state_dict(), f"FOLD{fold}_.pth")
        
        elif(EARLY_STOP == True):
            
            early_step += 1
            if (early_step >= early_stopping_steps):
                break
            
    
    #--------------------- PREDICTION---------------------
    gen_x_test, cell_x_test = test_[gen_cols + pca_gen_cols + ohe_cols].values, test_[cell_cols + pca_cell_cols + ohe_cols].values
    testdataset = TestDataset_MH(gen_x_test, cell_x_test)
    testloader = torch.utils.data.DataLoader(testdataset, batch_size=BATCH_SIZE, shuffle=False)
    
    model = Model_MH(
        num_gen_features=len(gen_cols + pca_gen_cols + ohe_cols),
        num_cell_features=len(cell_cols + pca_cell_cols + ohe_cols),
        num_targets=len(target_cols),
        hidden_size=hidden_size
    )
    
    model.load_state_dict(torch.load(f"FOLD{fold}_.pth"))
    model.to(DEVICE)
    
    predictions = np.zeros((len(test_), target.iloc[:, 1:].shape[1]))
    predictions = inference_fn_MH(model, testloader, DEVICE)
    
    return oof, predictions

def run_k_fold_MH(NFOLDS, seed):
    oof = np.zeros((len(folds), len(target_cols)))
    predictions = np.zeros((len(test_trt), len(target_cols)))
    
    for fold in range(NFOLDS):
        oof_, pred_ = run_training_MH(fold, seed)
        
        predictions += pred_ / NFOLDS
        oof += oof_
        
    return oof, predictions

In [None]:
SEED = [0] #<-- Update
oof_nn = np.zeros((len(folds), len(target_cols)))
predictions_nn = np.zeros((len(test_trt), len(target_cols)))

for seed in SEED:   
    oof_, predictions_ = run_k_fold(NFOLDS, seed)
    oof_nn += oof_ / len(SEED)
    predictions_nn += predictions_ / len(SEED)

In [None]:
SEED = [0] #<-- Update
oof_nn_mh = np.zeros((len(folds), len(target_cols)))
predictions_nn_mh = np.zeros((len(test_trt), len(target_cols)))

for seed in SEED:   
    oof_, predictions_ = run_k_fold_MH(NFOLDS, seed)
    oof_nn_mh += oof_ / len(SEED)
    predictions_nn_mh += predictions_ / len(SEED)

In [None]:
train_trt_nn = train_trt.copy()
train_trt_nn[target_cols] = oof_nn
test_trt_nn = test_trt.copy()
test_trt_nn[target_cols] = predictions_nn

train_trt_nn_mh = train_trt.copy()
train_trt_nn_mh[target_cols] = oof_nn_mh
test_trt_nn_mh = test_trt.copy()
test_trt_nn_mh[target_cols] = predictions_nn_mh
#%%
def out_sampe_result(train_trt_result):
    valid_results = train_targets_scored.drop(columns=target_cols).merge(train_trt_result[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)
    
    return valid_results

def valid_score(valid_results):
    
    y_true = train_targets_scored[target_cols].values
    y_pred = valid_results[target_cols].values
    
    score = 0
    for i in range(len(target_cols)):
        score_ = log_loss(y_true[:, i], y_pred[:, i])
        score += score_ / target.shape[1]
        
    print("CV log_loss: ", score)

def log_loss_metric(y_true, y_pred):
    metrics = []
    for _target in train_targets_scored.columns:
        metrics.append(log_loss(y_true.loc[:, _target], y_pred.loc[:, _target].astype(float), labels = [0,1]))
    return np.mean(metrics)
#%%
df_valid_nn = out_sampe_result(train_trt_nn)[['sig_id']+target_cols]
df_valid_nn_mh = out_sampe_result(train_trt_nn_mh)[['sig_id']+target_cols]
valid_score(df_valid_nn)
valid_score(df_valid_nn_mh)

# df_valid_nn.to_csv('df_valid_nn_11292020.csv',index = False)
# df_valid_nn_mh.to_csv('df_valid_nn_mh_11292020.csv', index =False)
# %% [code]
sub_nn = sample_submission.drop(columns=target_cols).merge(test_trt_nn[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)
# sub_nn.to_csv('submission_nn_11292020.csv', index=False)
sub_nn_md = sample_submission.drop(columns=target_cols).merge(test_trt_nn_mh[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)

In [None]:
NFOLDS_xgb = 5

DATA_DIR = '../input/lish-moa/'
train_xgb = pd.read_csv(DATA_DIR + 'train_features.csv')
lbl1 = preprocessing.LabelEncoder()
lbl1.fit(train_xgb['cp_type'].astype(str))
lbl1_dict = dict(zip(lbl1.classes_, range(len(lbl1.classes_))))
train_xgb['cp_type'] = lbl1.transform(train_xgb['cp_type'].astype(str))
lbl2 = preprocessing.LabelEncoder()
lbl2.fit(train_xgb['cp_dose'].astype(str))
train_xgb['cp_dose'] = lbl2.transform(train_xgb['cp_dose'].astype(str))

targets_xgb = pd.read_csv(DATA_DIR + 'train_targets_scored.csv')
test_xgb = pd.read_csv(DATA_DIR + 'test_features.csv')
sub_xgb = pd.read_csv(DATA_DIR + 'sample_submission.csv')
test_xgb['cp_type'] = lbl1.transform(test_xgb['cp_type'].astype(str))
test_xgb['cp_dose'] = lbl2.transform(test_xgb['cp_dose'].astype(str))

X_xgb = train_xgb.iloc[:,1:].to_numpy()
X_test_xgb = test_xgb.iloc[:,1:].to_numpy()
y_xgb = targets_xgb.iloc[:,1:].to_numpy() 
# %% [code]
classifier = MultiOutputClassifier(XGBClassifier(tree_method='gpu_hist'))

clf = Pipeline([('encode', CountEncoder(cols=[0, 2])),
                ('classify', classifier)
               ])

params = {'classify__estimator__colsample_bytree': 0.6522,
          'classify__estimator__gamma': 3.6975,
          'classify__estimator__learning_rate': 0.0503,
          'classify__estimator__max_delta_step': 2.0706,
          'classify__estimator__max_depth': 10,
          'classify__estimator__min_child_weight': 31.5800,
          'classify__estimator__n_estimators': 166,
          'classify__estimator__subsample': 0.8639
         }

_ = clf.set_params(**params)

# ## Train the model
# 
# Framing this problem as a binary classification problem has the disadvantage that you need to train as many models as you have classes. \
# For this problem this means training 206 models per fold, for the large number of features included in this dataset this may take a long time...
#%%
oof_xgb = np.zeros(y_xgb.shape)
predictions_xgb = np.zeros((test_xgb.shape[0], y_xgb.shape[1]))
oof_xgb_losses = []
kf = KFold(n_splits=NFOLDS_xgb)
for fn, (trn_idx, val_idx) in enumerate(kf.split(X_xgb, y_xgb)):
    print('Starting fold: ', fn)
    X_train, X_val = X_xgb[trn_idx], X_xgb[val_idx]
    y_train, y_val = y_xgb[trn_idx], y_xgb[val_idx]
    
    # drop where cp_type==ctl_vehicle (baseline)
    ctl_mask = X_train[:,0]==lbl1_dict['ctl_vehicle']
    X_train = X_train[~ctl_mask,:]
    y_train = y_train[~ctl_mask]
    
    clf.fit(X_train, y_train)
    val_preds = clf.predict_proba(X_val) # list of preds per class
    val_preds = np.array(val_preds)[:,:,1].T # take the positive class
    oof_xgb[val_idx] = val_preds
    
    loss = log_loss(np.ravel(y_val), np.ravel(val_preds))
    oof_xgb_losses.append(loss)
    preds = clf.predict_proba(X_test_xgb)
    preds = np.array(preds)[:,:,1].T # take the positive class
    predictions_xgb += preds / NFOLDS_xgb
    
print(oof_xgb_losses)
print('Mean OOF loss across folds', np.mean(oof_xgb_losses))
print('STD OOF loss across folds', np.std(oof_xgb_losses))

In [None]:
#%%
# set control train preds to 0
control_mask = train_xgb['cp_type']=='ctl_vehicle'
oof_xgb[control_mask] = 0
print('OOF log loss: ', log_loss(np.ravel(y_xgb), np.ravel(oof_xgb)))

# ## Analysis of OOF preds
# set control test preds to 0
control_mask = test_xgb['cp_type']=='ctl_vehicle'
predictions_xgb[control_mask] = 0

# create the submission file
sub_xgb.iloc[:,1:] = predictions_xgb
# sub_xgb.to_csv('submission_xgb.csv', index=False)
#%%
df_valid_xgb = train_targets_scored.copy()
df_valid_xgb[target_cols] = oof_xgb

In [None]:
def drop_id(df):
    if 'sig_id' in df.columns:
        df.drop(columns = ['sig_id'],inplace=True)
    
    return df

X_stack = pd.concat([drop_id(df_valid_nn),drop_id(df_valid_nn_mh),drop_id(df_valid_xgb)],axis = 1)
X_stack = X_stack.to_numpy()

X_test = pd.concat([drop_id(sub_nn),drop_id(sub_nn_md),drop_id(sub_xgb)],axis =1)
X_test = X_test.to_numpy()

In [None]:
from sklearn.neural_network import MLPClassifier
def search_model(train_x, train_y, est, param_grid, n_jobs, cv, refit=False):
    # Grid Search for the best model,
    model = model_selection.GridSearchCV(estimator=est,
                                         param_grid=param_grid,
                                         scoring='neg_log_loss',
                                         verbose=1,
                                         n_jobs=n_jobs,
                                         iid=True,
                                         refit=refit,
                                         cv=cv)
    # Fit Grid Search Model,
    model.fit(train_x, train_y)
    print("est score: %0.3f" % (model.best_score_))
    print("Best parameters set:" , model.best_params_)
    print("Scores:" , model.cv_results_)
    return model


param_grid = {
              "hidden_layer_sizes":[50]
              }

model = search_model(X_stack
                                         , y_xgb
                                         , MLPClassifier()
                                         , param_grid
                                         , n_jobs=-1
                                         , cv=4
                                         , refit=True)   

print ("best subsample:", model.best_params_)

In [None]:
sub_final = pd.read_csv(DATA_DIR + 'sample_submission.csv')
predictions_final = model.predict_proba(X_test)
sub_final.iloc[:,1:] = predictions_final
sub_final.to_csv('submission.csv', index=False)