This notebook is based on thehemen's great notebook:

https://www.kaggle.com/thehemen/pytorch-transfer-learning-with-k-folds-by-drug-ids

Updated the following points:
* Adding LayerNorm in the first layer of the model (-0.00010). See the discussion thead for more details (https://www.kaggle.com/c/lish-moa/discussion/201051)
* Removing all-zero colums from nonscored target (-0.00001)
* Using Cris' CV (-0.00001)

This is inference notebook. If you want to train the model, please set True on TRAINING object.

In [None]:
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import os
import copy
import seaborn as sns
 
from sklearn import preprocessing
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import QuantileTransformer
from sklearn.decomposition import PCA
 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
 
import warnings
warnings.filterwarnings('ignore')

In [None]:
TRAINING = False # False: inference, True: training

In [None]:
!pip install /kaggle/input/iterative-stratification/iterative-stratification-master/
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

In [None]:
SEED_VALUE = 42

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=SEED_VALUE)

In [None]:
MDL_NAME = 'trans_learn_drop_zero_target'
MDL_DIR = '../input/pytorch-transfer-learn-dropping-zero-targets'

In [None]:
data_dir = '../input/lish-moa/'

train_features = pd.read_csv(data_dir + 'train_features.csv')
train_targets_scored = pd.read_csv(data_dir + 'train_targets_scored.csv')
train_targets_nonscored = pd.read_csv(data_dir + 'train_targets_nonscored.csv')
train_drug = pd.read_csv(data_dir + 'train_drug.csv')
test_features = pd.read_csv(data_dir + 'test_features.csv')
sample_submission = pd.read_csv(data_dir + 'sample_submission.csv')

print('train_features: {}'.format(train_features.shape))
print('train_targets_scored: {}'.format(train_targets_scored.shape))
print('train_targets_nonscored: {}'.format(train_targets_nonscored.shape))
print('train_drug: {}'.format(train_drug.shape))
print('test_features: {}'.format(test_features.shape))
print('sample_submission: {}'.format(sample_submission.shape))

## Dropping all-zero columns in nonscored target

In [None]:
tmp = train_targets_nonscored.iloc[:, 1:].sum()
drop_cols = list(tmp[tmp==0].index)
train_targets_nonscored.drop(drop_cols, axis=1, inplace=True)
print(f'{len(drop_cols)} targets are dropped')

## Keep original features

In [None]:
GENES = [col for col in train_features.columns if col.startswith('g-')]
CELLS = [col for col in train_features.columns if col.startswith('c-')]

# print('GENES: {}'.format(GENES[:10]))
# print('CELLS: {}'.format(CELLS[:10]))

In [None]:
train_genes = train_features[GENES].copy()
train_cells = train_features[CELLS].copy()
test_genes = test_features[GENES].copy()
test_cells = test_features[CELLS].copy()

# RankGauss

In [None]:
for col in (GENES + CELLS):
    transformer = QuantileTransformer(n_quantiles=100,random_state=0, output_distribution="normal")
    vec_len = len(train_features[col].values)
    vec_len_test = len(test_features[col].values)
    raw_vec = train_features[col].values.reshape(vec_len, 1)
    transformer.fit(raw_vec)

    train_features[col] = transformer.transform(raw_vec).reshape(1, vec_len)[0]
    test_features[col] = transformer.transform(test_features[col].values.reshape(vec_len_test, 1)).reshape(1, vec_len_test)[0]

# PCA

In [None]:
# GENES
n_comp = 157

pca = PCA(n_components=n_comp, random_state=SEED_VALUE)
train2 = pca.fit_transform(train_features[GENES])
test2 = pca.transform(test_features[GENES])

train2 = pd.DataFrame(train2, columns=[f'pca_G-{i}' for i in range(n_comp)])
test2 = pd.DataFrame(test2, columns=[f'pca_G-{i}' for i in range(n_comp)])

train_features = pd.concat((train_features, train2), axis=1)
test_features = pd.concat((test_features, test2), axis=1)

print('train_features: {}'.format(train_features.shape))
print('test_features: {}'.format(test_features.shape))

In [None]:
# CELLS
n_comp = 7

pca = PCA(n_components=n_comp, random_state=SEED_VALUE)
train2 = pca.fit_transform(train_features[CELLS])
test2 = pca.transform(test_features[CELLS])

train2 = pd.DataFrame(train2, columns=[f'pca_C-{i}' for i in range(n_comp)])
test2 = pd.DataFrame(test2, columns=[f'pca_C-{i}' for i in range(n_comp)])

train_features = pd.concat((train_features, train2), axis=1)
test_features = pd.concat((test_features, test2), axis=1)

print('train_features: {}'.format(train_features.shape))
print('test_features: {}'.format(test_features.shape))

# Stats features

In [None]:
def fe_stats(train, test):
    
    GENES = [col for col in train.columns if col.startswith('g-')]
    CELLS = [col for col in train.columns if col.startswith('c-')]
    
    for df in train, test:
        df['g_mean'] = df[GENES].mean(axis = 1)
        df['g_std'] = df[GENES].std(axis = 1)
        df['g_kurt'] = df[GENES].kurtosis(axis = 1)
        df['g_skew'] = df[GENES].skew(axis = 1)
        
        df['c_mean'] = df[CELLS].mean(axis = 1)
        df['c_std'] = df[CELLS].std(axis = 1)
        df['c_kurt'] = df[CELLS].kurtosis(axis = 1)
        df['c_skew'] = df[CELLS].skew(axis = 1)
        
    return train, test

train_features, test_features = fe_stats(train_features, test_features)

# Merging data and dropping control

In [None]:
train = train_features.merge(train_targets_scored, on='sig_id', how='left')
train = train.merge(train_targets_nonscored, on='sig_id', how='left')
train = train.merge(train_drug, on='sig_id', how='left')

train_genes = train_genes[train['cp_type'] != 'ctl_vehicle'].reset_index(drop=True)
train_cells = train_cells[train['cp_type'] != 'ctl_vehicle'].reset_index(drop=True)
test_genes = test_genes[test_features['cp_type'] != 'ctl_vehicle'].reset_index(drop=True)
test_cells = test_cells[test_features['cp_type'] != 'ctl_vehicle'].reset_index(drop=True)

train = train[train['cp_type'] != 'ctl_vehicle'].reset_index(drop=True)
test = test_features[test_features['cp_type'] != 'ctl_vehicle'].reset_index(drop=True)

train = train.drop('cp_type', axis=1)
test = test.drop('cp_type', axis=1)

In [None]:
target_cols = [x for x in train_targets_scored.columns if x != 'sig_id']
aux_target_cols = [x for x in train_targets_nonscored.columns if x != 'sig_id']
all_target_cols = target_cols + aux_target_cols

num_targets = len(target_cols)
num_aux_targets = len(aux_target_cols)
num_all_targets = len(all_target_cols)

print('num_targets: {}'.format(num_targets))
print('num_aux_targets: {}'.format(num_aux_targets))
print('num_all_targets: {}'.format(num_all_targets))

# Dataset Classes

In [None]:
class MoADataset:
    def __init__(self, features, genes, cells, targets):
        self.features = features
        self.genes = genes
        self.cells = cells
        self.targets = targets
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x1' : torch.tensor(self.features[idx, :], dtype=torch.float),
            'x2' : torch.tensor(self.genes[idx, :], dtype=torch.float),
            'x3' : torch.tensor(self.cells[idx, :], dtype=torch.float),
            'y' : torch.tensor(self.targets[idx, :], dtype=torch.float)
        }
        
        return dct
    
class TestDataset:
    def __init__(self, features, genes, cells):
        self.features = features
        self.genes = genes
        self.cells = cells
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x1' : torch.tensor(self.features[idx, :], dtype=torch.float),
            'x2' : torch.tensor(self.genes[idx, :], dtype=torch.float),
            'x3' : torch.tensor(self.cells[idx, :], dtype=torch.float)
        }

        return dct

In [None]:
def train_fn(model, optimizer, scheduler, loss_fn, dataloader, device):
    model.train()
    final_loss = 0
    
    for data in dataloader:
        optimizer.zero_grad()
        x1, x2, x3, targets = data['x1'].to(device), data['x2'].to(device), \
        data['x3'].to(device), data['y'].to(device)
        outputs = model(x1, x2, x3)
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        scheduler.step()

        final_loss += loss.item()
        
    final_loss /= len(dataloader)
    return final_loss

def valid_fn(model, loss_fn, dataloader, device):
    model.eval()
    final_loss = 0
    valid_preds = []
    
    for data in dataloader:
        x1, x2, x3, targets = data['x1'].to(device), data['x2'].to(device), \
        data['x3'].to(device), data['y'].to(device)
        outputs = model(x1, x2, x3)
        loss = loss_fn(outputs, targets)

        final_loss += loss.item()
        valid_preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    final_loss /= len(dataloader)
    valid_preds = np.concatenate(valid_preds)
    return final_loss, valid_preds

def inference_fn(model, dataloader, device):
    model.eval()
    preds = []
    
    for data in dataloader:
        x1, x2, x3 = data['x1'].to(device), data['x2'].to(device), data['x3'].to(device)

        with torch.no_grad():
            outputs = model(x1, x2, x3)
        
        preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    preds = np.concatenate(preds)
    return preds

In [None]:
import torch
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1

        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
            
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

# Model

In [None]:
class Model(nn.Module):
    def __init__(self, num_features, num_genes, num_cells, num_targets):
        super(Model, self).__init__()
        self.hidden_size = [1500, 1250, 1000, 750]
        self.dropout_value = [0.5, 0.35, 0.3, 0.25]

        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dense1 = nn.Linear(num_features, self.hidden_size[0])
        
        self.layer_norm1_2 = nn.LayerNorm(num_genes)
        self.dense1_2 = nn.Linear(num_genes, self.hidden_size[0])
        
        self.layer_norm1_3 = nn.LayerNorm(num_cells)
        self.dense1_3 = nn.Linear(num_cells, self.hidden_size[0]//2)
        
        self.batch_norm2 = nn.BatchNorm1d(self.hidden_size[0]*2+self.hidden_size[0]//2)
        self.dropout2 = nn.Dropout(self.dropout_value[0])
        self.dense2 = nn.Linear(self.hidden_size[0]*2+self.hidden_size[0]//2, self.hidden_size[1])

        self.batch_norm3 = nn.BatchNorm1d(self.hidden_size[1])
        self.dropout3 = nn.Dropout(self.dropout_value[1])
        self.dense3 = nn.Linear(self.hidden_size[1], self.hidden_size[2])

        self.batch_norm4 = nn.BatchNorm1d(self.hidden_size[2])
        self.dropout4 = nn.Dropout(self.dropout_value[2])
        self.dense4 = nn.Linear(self.hidden_size[2], self.hidden_size[3])

        self.batch_norm5 = nn.BatchNorm1d(self.hidden_size[3])
        self.dropout5 = nn.Dropout(self.dropout_value[3])
        self.dense5 = nn.utils.weight_norm(nn.Linear(self.hidden_size[3], num_targets))
    
    def forward(self, x1, x2, x3):
        x1 = self.batch_norm1(x1)
        x1 = F.leaky_relu(self.dense1(x1))
        
        x2 = self.layer_norm1_2(x2)
        x2 = F.leaky_relu(self.dense1_2(x2))
        
        x3 = self.layer_norm1_3(x3)
        x3 = F.leaky_relu(self.dense1_3(x3))
        
        x = torch.cat((x1, x2, x3), 1)
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = F.leaky_relu(self.dense2(x))

        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = F.leaky_relu(self.dense3(x))

        x = self.batch_norm4(x)
        x = self.dropout4(x)
        x = F.leaky_relu(self.dense4(x))

        x = self.batch_norm5(x)
        x = self.dropout5(x)
        x = self.dense5(x)
        return x

In [None]:
class FineTuneScheduler:
    def __init__(self, epochs):
        self.epochs = epochs
        self.epochs_per_step = 0
        self.frozen_layers = []

    def copy_without_top(self, model, num_features, num_targets, num_targets_new):
        self.frozen_layers = []

        model_new = Model(num_features, len(GENES), len(CELLS), num_targets)
        model_new.load_state_dict(model.state_dict())

        # Freeze all weights
        for name, param in model_new.named_parameters():
            layer_index = name.split('.')[0][-1]

            if layer_index == 5:
                continue

            param.requires_grad = False

            # Save frozen layer names
            if layer_index not in self.frozen_layers:
                self.frozen_layers.append(layer_index)

        self.epochs_per_step = self.epochs // len(self.frozen_layers)

        # Replace the top layers with another ones
        model_new.batch_norm5 = nn.BatchNorm1d(model_new.hidden_size[3])
        model_new.dropout5 = nn.Dropout(model_new.dropout_value[3])
        model_new.dense5 = nn.utils.weight_norm(nn.Linear(model_new.hidden_size[-1], num_targets_new))
        model_new.to(DEVICE)
        return model_new

    def step(self, epoch, model):
        if len(self.frozen_layers) == 0:
            return

        if epoch % self.epochs_per_step == 0:
            last_frozen_index = self.frozen_layers[-1]
            
            # Unfreeze parameters of the last frozen layer
            for name, param in model.named_parameters():
                layer_index = name.split('.')[0][-1]

                if layer_index == last_frozen_index:
                    param.requires_grad = True

            del self.frozen_layers[-1]  # Remove the last layer as unfrozen

# Preprocessing steps

In [None]:
def process_data(data):
    data = pd.get_dummies(data, columns=['cp_time','cp_dose'])
    return data

In [None]:
feature_cols = [c for c in process_data(train).columns if c not in all_target_cols]
feature_cols = [c for c in feature_cols if c not in ['kfold', 'sig_id', 'drug_id']]
num_features = len(feature_cols)
num_features

In [None]:
# HyperParameters

DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
EPOCHS = 24
BATCH_SIZE = 128

WEIGHT_DECAY = {'ALL_TARGETS': 1e-5, 'SCORED_ONLY': 3e-6}
MAX_LR = {'ALL_TARGETS': 1e-2, 'SCORED_ONLY': 3e-3}
DIV_FACTOR = {'ALL_TARGETS': 1e3, 'SCORED_ONLY': 1e2}
PCT_START = 0.1

In [None]:
# Show model architecture
model = Model(num_features, len(GENES), len(CELLS), num_all_targets)
model

# Single fold training

In [None]:
from sklearn.model_selection import KFold

def make_cv_folds(train, SEEDS, NFOLDS, DRUG_THRESH):
    vc = train.drug_id.value_counts()
    vc1 = vc.loc[vc <= DRUG_THRESH].index.sort_values()
    vc2 = vc.loc[vc > DRUG_THRESH].index.sort_values()

    for seed_id in range(SEEDS):
        kfold_col = 'kfold_{}'.format(seed_id)
        
        # STRATIFY DRUGS 18X OR LESS
        dct1 = {}
        dct2 = {}

        skf = MultilabelStratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=seed_id)
        tmp = train.groupby('drug_id')[target_cols].mean().loc[vc1]

        for fold,(idxT, idxV) in enumerate(skf.split(tmp, tmp[target_cols])):
            dd = {k: fold for k in tmp.index[idxV].values}
            dct1.update(dd)

        # STRATIFY DRUGS MORE THAN 18X
        skf = MultilabelStratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=seed_id)
        tmp = train.loc[train.drug_id.isin(vc2)].reset_index(drop=True)

        for fold,(idxT, idxV) in enumerate(skf.split(tmp, tmp[target_cols])):
            dd = {k: fold for k in tmp.sig_id[idxV].values}
            dct2.update(dd)

        # ASSIGN FOLDS
        train[kfold_col] = train.drug_id.map(dct1)
        train.loc[train[kfold_col].isna(), kfold_col] = train.loc[train[kfold_col].isna(), 'sig_id'].map(dct2)
        train[kfold_col] = train[kfold_col].astype('int8')
        
    return train

SEEDS = 7
NFOLDS = 7
DRUG_THRESH = 18

train = make_cv_folds(train, SEEDS, NFOLDS, DRUG_THRESH)
# train.head()

In [None]:
def run_training(fold_id, seed_id):
    seed_everything(seed_id)
    
    train_ = process_data(train)
    test_ = process_data(test)
    
    kfold_col = f'kfold_{seed_id}'
    trn_idx = train_[train_[kfold_col] != fold_id].index
    val_idx = train_[train_[kfold_col] == fold_id].index
    
    train_df = train_[train_[kfold_col] != fold_id].reset_index(drop=True)
    valid_df = train_[train_[kfold_col] == fold_id].reset_index(drop=True)
    
    train_genes_df = train_genes[train_[kfold_col] != fold_id].reset_index(drop=True)
    valid_genes_df = train_genes[train_[kfold_col] == fold_id].reset_index(drop=True)
    train_cells_df = train_cells[train_[kfold_col] != fold_id].reset_index(drop=True)
    valid_cells_df = train_cells[train_[kfold_col] == fold_id].reset_index(drop=True)
    
    def train_model(model, tag_name, target_cols_now, fine_tune_scheduler=None):
        x_train, y_train  = train_df[feature_cols], train_df[target_cols_now]
        x_valid, y_valid =  valid_df[feature_cols], valid_df[target_cols_now]
        
        train_dataset = MoADataset(x_train.values, train_genes_df.values, train_cells_df.values, y_train.values)
        valid_dataset = MoADataset(x_valid.values, valid_genes_df.values, valid_cells_df.values, y_valid.values)

        trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)
        
        loss_fn = nn.BCEWithLogitsLoss()
        loss_tr = SmoothBCEwLogits(smoothing=0.001)
        
        oof = np.zeros((len(train), len(target_cols_now)))
        
        if TRAINING:
        
            optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=WEIGHT_DECAY[tag_name])
            scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer,
                                                      steps_per_epoch=len(trainloader),
                                                      pct_start=PCT_START,
                                                      div_factor=DIV_FACTOR[tag_name], 
                                                      max_lr=MAX_LR[tag_name],
                                                      epochs=EPOCHS)

            best_loss = np.inf
            best_epoch = None

            for epoch in range(EPOCHS):
                if fine_tune_scheduler is not None:
                    fine_tune_scheduler.step(epoch, model)

                train_loss = train_fn(model, optimizer, scheduler, loss_tr, trainloader, DEVICE)
                valid_loss, valid_preds = valid_fn(model, loss_fn, validloader, DEVICE)
                print(f"SEED: {seed_id}, FOLD: {fold_id}, {tag_name}, EPOCH: {epoch}, train_loss: {train_loss:.6f}, valid_loss: {valid_loss:.6f}")

                if np.isnan(valid_loss):
                    break

                if valid_loss < best_loss:
                    best_loss = valid_loss
                    best_epoch = epoch
                    oof[val_idx] = valid_preds
                    torch.save(model.state_dict(), f"./{MDL_NAME}_{tag_name}_SEED{seed_id}_FOLD{fold_id}_.pth")
                    
            print(f'### SEED: {seed_id}, FOLD: {fold_id}, {tag_name}, BEST EPOCH:{best_epoch}, valid_loss: {best_loss:.6f}')

        else:
            valid_loss, valid_preds = valid_fn(model, loss_fn, validloader, DEVICE)
            oof[val_idx] = valid_preds

            print(f'### SEED: {seed_id}, FOLD: {fold_id}, {tag_name}, valid_loss: {valid_loss:.6f}')

        return oof
    
    if TRAINING:

        fine_tune_scheduler = FineTuneScheduler(EPOCHS)

        pretrained_model = Model(num_features, len(GENES), len(CELLS), num_all_targets)
        pretrained_model.to(DEVICE)

        # Train on scored + nonscored targets
        train_model(pretrained_model, 'ALL_TARGETS', all_target_cols)

        # Load the pretrained model with the best loss
        pretrained_model = Model(num_features, len(GENES), len(CELLS), num_all_targets)
        pretrained_model.load_state_dict(torch.load(f"./{MDL_NAME}_ALL_TARGETS_SEED{seed_id}_FOLD{fold_id}_.pth"))
        pretrained_model.to(DEVICE)

        # Copy model without the top layer
        final_model = fine_tune_scheduler.copy_without_top(pretrained_model, num_features, num_all_targets, num_targets)
        
        # Fine-tune the model on scored targets only
        oof = train_model(final_model, 'SCORED_ONLY', target_cols, fine_tune_scheduler)

        # Load the fine-tuned model with the best loss
        model = Model(num_features, len(GENES), len(CELLS), num_targets)
        model.load_state_dict(torch.load(f"./{MDL_NAME}_SCORED_ONLY_SEED{seed_id}_FOLD{fold_id}_.pth"))
        model.to(DEVICE)
    
    else:
        # Load the fine-tuned model with the best loss
        model = Model(num_features, len(GENES), len(CELLS), num_targets)
        model.load_state_dict(torch.load(f"{MDL_DIR}/{MDL_NAME}_SCORED_ONLY_SEED{seed_id}_FOLD{fold_id}_.pth"))
        model.to(DEVICE)
    
        # validating the model on scored targets only
        oof = train_model(model, 'SCORED_ONLY', target_cols)#, fine_tune_scheduler)

    #--------------------- PREDICTION---------------------
    x_test = test_[feature_cols]
    testdataset = TestDataset(x_test.values, test_genes.values, test_cells.values)
    testloader = torch.utils.data.DataLoader(testdataset, batch_size=BATCH_SIZE, shuffle=False)
    
    predictions = np.zeros((len(test_), num_targets))
    predictions = inference_fn(model, testloader, DEVICE)
    return oof, predictions

In [None]:
def run_k_fold(NFOLDS, seed_id):
    oof = np.zeros((len(train), len(target_cols)))
    predictions = np.zeros((len(test), len(target_cols)))
    
    for fold_id in range(NFOLDS):
        oof_, pred_ = run_training(fold_id, seed_id)
        predictions += pred_ / NFOLDS
        oof += oof_
        
    score = 0
    for i in range(len(target_cols)):
        _score = log_loss(train.loc[:,target_cols[i]].values, oof[:,i])
        score += _score / len(target_cols)
    print(f"### SEED: {seed_id}, CV score: {score:.6f}")
        
    return oof, predictions, score

In [None]:
from time import time

# Averaging on multiple SEEDS
SEED = [0, 1, 2, 3, 4, 5, 6]
oof = np.zeros((len(train), len(target_cols)))
predictions = np.zeros((len(test), len(target_cols)))
scores = []

time_begin = time()

for seed_id in SEED:
    oof_, predictions_, score_ = run_k_fold(NFOLDS, seed_id)
    oof += oof_ / len(SEED)
    predictions += predictions_ / len(SEED)
    scores.append(score_)

print('-'*80)
for i in range(len(scores)):
    print(f'### SEED {i} CV score: {scores[i]:.6f}')
    
score = 0
for i in range(len(target_cols)):
    _score = log_loss(train.loc[:,target_cols[i]].values, oof[:,i])
    score += _score / len(target_cols)
print(f"#### Seed Averaged CV score: {score:.6f}")

time_diff = time() - time_begin

train[target_cols] = oof
test[target_cols] = predictions

In [None]:
from datetime import timedelta
str(timedelta(seconds=time_diff))

In [None]:
train[['sig_id']+target_cols].to_csv(f'oof_{MDL_NAME}.csv', index=False)

In [None]:
len(target_cols)

In [None]:
valid_results = train_targets_scored.drop(columns=target_cols).merge(train[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)

y_true = train_targets_scored[target_cols].values
y_pred = valid_results[target_cols].values

score = 0

for i in range(len(target_cols)):
    score += log_loss(y_true[:, i], y_pred[:, i])

print("CV log_loss: ", score / y_pred.shape[1])

In [None]:
sub = sample_submission.drop(columns=target_cols).merge(test[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)
sub.to_csv(f'submission.csv', index=False)