In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#!pip3 install iterative-stratification
import sys
sys.path.append('../input/iterset/iterstrat')
from ml_stratifiers import MultilabelStratifiedKFold


In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GroupKFold, StratifiedKFold, KFold, train_test_split,  cross_val_score
from sklearn.metrics import log_loss, make_scorer
from sklearn.decomposition import PCA
from sklearn.preprocessing import QuantileTransformer

import torch 
import matplotlib.pyplot as plt
import random

In [None]:
def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        
seed_everything(42)


In [None]:
test_features = pd.read_csv("/kaggle/input/lish-moa/test_features.csv")
train_drug = pd.read_csv("/kaggle/input/lish-moa/train_drug.csv")
train_features = pd.read_csv("/kaggle/input/lish-moa/train_features.csv")
train_targets_scored = pd.read_csv("/kaggle/input/lish-moa/train_targets_scored.csv")
train_targets_nonscored = pd.read_csv("/kaggle/input/lish-moa/train_targets_nonscored.csv")

In [None]:
print(train_features.shape, train_targets_scored.shape, train_targets_nonscored.shape,  test_features.shape)
train_features.head(5)

In [None]:
common_names = [name for name in train_targets_nonscored.columns 
                if name in train_targets_scored.columns]
common_names

In [None]:
cp_indexes_train = train_features[train_features["cp_type"] == "ctl_vehicle"].index
cp_indexes_test = test_features[test_features["cp_type"] == "ctl_vehicle"].index
print(len(cp_indexes_train), len(cp_indexes_test))

In [None]:

#train_features = train_features.drop(cp_indexes_train).reset_index(drop=True)
#test_dropped_rows = test_features.iloc[cp_indexes_test]

#test_features = test_features.drop(cp_indexes_test).reset_index(drop=True)
#train_targets_scored = train_targets_scored.drop(cp_indexes_train).reset_index(drop=True)
#train_targets_nonscored = train_targets_nonscored.drop(cp_indexes_train).reset_index(drop=True)

In [None]:
#train_features = train_features.drop("cp_type", axis=1)
#test_features = test_features.drop("cp_type",  axis=1)
#print(train_features.shape, test_features.shape)

Добавим drug_id в train_features

In [None]:
train_drug.head(5)

Preprocession_block

In [None]:
GENES = [colname for colname in train_features.columns 
         if colname.startswith("g-")]

CELLS = [colname for colname in train_features.columns
        if colname.startswith("c-")]

In [None]:
#map categorical values
def map_dataset(df):
    df['cp_type'] = df['cp_type'].map({'ctl_vehicle': 0, 'trt_cp':1})
    df['cp_dose'] = df['cp_dose'].map({"D1":0, "D2":1})
    
    return df

In [None]:
train_features = map_dataset(train_features)
test_features = map_dataset(test_features)

In [None]:
def add_PCA(train, test):
    n_gs = 200 # No of PCA comps to include
    n_cs = 20 # No of PCA comps to include
    
    pca_cs = PCA(n_components = n_cs)
    pca_gs = PCA(n_components = n_gs)

    united = pd.concat([train[GENES + CELLS], test[GENES+CELLS]], axis=0)
    
    united_pca_gs = pca_gs.fit_transform(united[GENES])
    united_pca_cs = pca_cs.fit_transform(united[CELLS])
    
    
    united_c_mean = np.expand_dims(united[CELLS].mean(axis=1), axis=1)
    united_g_mean = np.expand_dims(united[GENES].mean(axis=1), axis=1)
    
    generated_features = pd.DataFrame(np.concatenate([united_pca_gs, united_pca_cs,
                                         united_g_mean, united_c_mean], axis=1))
    
    train_generated = generated_features.iloc[:train.shape[0]].reset_index(drop=True)
    test_generated = generated_features.iloc[train.shape[0]:].reset_index(drop=True)

    train = pd.concat([train, train_generated], axis=1)
    print("before:", test.shape)
    print(test_generated.shape, test.shape)
    test = pd.concat([test, test_generated], axis=1)
    print("After:", test.shape)
    return train, test


In [None]:
print(train_features.shape, test_features.shape)
train_features, test_features = add_PCA(train_features, test_features)
print(train_features.shape, test_features.shape)


In [None]:
test_features.tail(10)

In [None]:
def QuantileTransform(train, test, transform_type):
    transformer = QuantileTransformer(n_quantiles=100,random_state=0, output_distribution=transform_type)
    united = pd.concat([train[GENES+CELLS], test[GENES+CELLS]], axis=0)
    united = transformer.fit_transform(united)
    train[GENES+CELLS] = united[:train.shape[0]]
    test[GENES+CELLS] = united[train.shape[0]:]
    
    return train, test

In [None]:
plt.figure(figsize=(30, 30))
train_features[GENES[::25] + CELLS[::25]].hist(figsize=(20, 20))
plt.show()

In [None]:
train_features, test_features = QuantileTransform(train_features, test_features, 'normal')

In [None]:
train_features[GENES[::25] + CELLS[::25]].hist(figsize=(20, 20))
plt.show()

In [None]:
train_features = train_features.merge(train_drug, on='sig_id')
train_features['drug_id'] = LabelEncoder().fit_transform(train_features['drug_id'])

In [None]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch import optim
from torch.utils.data import Dataset, DataLoader

In [None]:
#!pip3 install pytorch-tabnet
sys.path.append('../input/tabnet')

In [None]:
from pytorch_tabnet.tab_model import TabNetRegressor

In [None]:
class MoADataset(Dataset):
    def __init__(self, X, y=None):
        if y is not None:
            assert X.shape[0] == y.shape[0]
        
        self.X = torch.tensor(X, dtype=torch.float)
        self.y = None
        if y is not None:
            self.y = torch.tensor(y, dtype=torch.float)
        
    def __getitem__(self, idx):

        tensor_X = self.X[idx]
        if self.y is not None:
            tensor_y = self.y[idx]
            return tensor_X, tensor_y
        else:
            return tensor_X
    
    def __len__(self):
        return len(self.X)

In [None]:
class ModelMLP(nn.Module):
    def __init__(self, num_features, num_targets, hidden_size):
        super(ModelMLP, self).__init__()
        self.hidden_size = hidden_size
        
        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dropout1 = nn.Dropout(0.3)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, 2* hidden_size))
        
        self.batch_norm2 = nn.BatchNorm1d(2*hidden_size)
        self.dropout2 = nn.Dropout(0.2)
        self.dense2 = nn.utils.weight_norm(nn.Linear(2*hidden_size,  hidden_size))
        
        self.batch_norm3 = nn.BatchNorm1d(hidden_size)
        self.dropout3 = nn.Dropout(0.1)
        self.dense3 = nn.utils.weight_norm(nn.Linear(hidden_size, hidden_size))
        
        self.batch_norm4 = nn.BatchNorm1d(hidden_size)
        self.dropout4 = nn.Dropout(0.1)
        self.dense4 = nn.utils.weight_norm(nn.Linear(hidden_size, num_targets))
        
    def get_freeze(self, target_size, freeze_all=True):

        if freeze_all:
            for param in self.parameters():
                param.requires_grad = False

        self.dense3 = nn.Linear(self.hidden_size, self.hidden_size)
        self.dense4 = nn.Linear(self.hidden_size, target_size)
        
    def forward(self, x):
        #x = self.dropout1(x)
        #x = self.batch_norm1(x)
        x = F.leaky_relu(self.dense1(x), 1e-2)
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x_1 = F.relu(self.dense2(x))
        
        x=self.batch_norm3(x_1)
        x = self.dropout3(x)
        
        
        x = self.dense4(x + x_1)
        #x = self.batch_norm3(x)
        #x = self.dropout3(x)
        

        
        return x

In [None]:
class ResidualNet(nn.Module):
    
    def __init__(self, num_features, num_targets, n_hidden):
        super(ResidualNet, self).__init__()
        self.block1 = nn.Sequential(
            nn.Linear(num_features, n_hidden),
            nn.ELU(),
            nn.BatchNorm1d(n_hidden),
            nn.Dropout(0.3),
            nn.Linear(n_hidden, n_hidden),
            nn.ELU(),
            nn.BatchNorm1d(n_hidden),
            nn.Dropout(0.3),
            
        )

        self.block2 = nn.Sequential(
            nn.Linear(num_features + n_hidden, n_hidden),
            nn.ELU(),
            nn.BatchNorm1d(n_hidden),
            nn.Dropout(0.2),
            
            nn.Linear(n_hidden, n_hidden),
            nn.ELU(),
            nn.BatchNorm1d(n_hidden),
            nn.Dropout(0.2),
            
            nn.Linear(n_hidden, n_hidden),
            nn.ELU(),
            nn.Dropout(0.2),
            nn.BatchNorm1d(n_hidden),
            
            )

        self.block3 = nn.Sequential(
            nn.Linear(n_hidden + n_hidden, n_hidden),
            nn.ELU(),
            nn.BatchNorm1d(n_hidden),
            nn.Dropout(0.1),
            nn.Linear(n_hidden, n_hidden),
            nn.ELU(),
            nn.BatchNorm1d(n_hidden),
            nn.Dropout(0.1),
            nn.Linear(n_hidden, num_targets)
        )


    def get_freeze(self, target_size, freeze_all=True):
        
        if freeze_all:
            for param in self.parameters():
                param.requires_grad = False
            
        self.block3 = nn.Sequential(
            nn.Linear(512 + 256, 512),
            nn.BatchNorm1d(512),
            nn.Dropout(0.1),
            nn.ELU(),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.Dropout(0.1),
            nn.ELU(),
            nn.Linear(256, target_size)
        )

    def forward(self, x):

        x_1 = self.block1(x)
        x_1_cat = torch.cat([x, x_1], axis=-1)
        x_2 = self.block2(x_1_cat)
        x_2_cat = torch.cat([x_1, x_2], axis=-1)
        output = self.block3(x_2_cat)

        return output

In [None]:
class Model(nn.Module):
    def __init__(self, num_features, num_targets, hidden_size):
        super(Model, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dropout1 = nn.Dropout(0.2)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, hidden_size))
        
        self.batch_norm2 = nn.BatchNorm1d(hidden_size)
        self.dropout2 = nn.Dropout(0.2)
        self.dense2 = nn.utils.weight_norm(nn.Linear(hidden_size + num_features, hidden_size))
        
        self.batch_norm3 = nn.BatchNorm1d(hidden_size)
        self.dropout3 = nn.Dropout(0.2)
        self.dense3 = nn.utils.weight_norm(nn.Linear(hidden_size, num_targets))
    
    def forward(self, inputs):
        x = self.batch_norm1(inputs)
        #x = self.dropout1(x)
        x_1 = F.leaky_relu(self.dense1(x), 1e-2)
        
        x = self.batch_norm2(x_1)
        x_1 = self.dropout2(x)
        x = torch.cat([x_1, inputs], axis=-1)
        
        x_2 = F.leaky_relu(self.dense2(x), 1e-2)
        
        x = self.batch_norm3(x_2)
        x_2 = self.dropout3(x)
        x = self.dense3(x_2)
        
        return x

In [None]:
class SimpleNet(nn.Module):
    def __init__(self, num_features, num_targets, n_hidden):
        super(SimpleNet, self).__init__()
        self.layer1 = nn.Linear(num_features, n_hidden)
        self.layer2 = nn.Linear(n_hidden, num_targets)
    def forward(self, inputs):
        
        x = self.layer1(inputs)
        x = F.relu(x)
        x = self.layer2(x)
        
        return x

In [None]:
PARAMS = {
    "BATCH_SIZE": 1024,
    "EPOCHS": 100,
    "DEVICE": torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
    "PATIENCE": 10,
    "NUM_FOLDS": 10,
    "TARGETS_OUTPUT": 206,
    "LEARNING_RATE": 1e-2,
    "WEIGHT_DECAY" : 1e-5
}

In [None]:
MAX_EPOCH=200
tabnet_params = dict(n_d=24, n_a=24, n_steps=1, gamma=1.3,
                     lambda_sparse=0, optimizer_fn=torch.optim.Adam,
                     optimizer_params=dict(lr=3e-2, weight_decay=PARAMS["WEIGHT_DECAY"]),
                     mask_type='entmax',
                     scheduler_params=dict(mode="min",
                                           patience=5,
                                           min_lr=1e-5,
                                           factor=0.8,),
                     scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
                     verbose=5,
                     )

In [None]:
from pytorch_tabnet.metrics import Metric

In [None]:
class LogitsLogLoss(Metric):
    """
    LogLoss with sigmoid applied
    """

    def __init__(self):
        self._name = "logits_ll"
        self._maximize = False

    def __call__(self, y_true, y_pred):
        """
        Compute LogLoss of predictions.

        Parameters
        ----------
        y_true: np.ndarray
            Target matrix or vector
        y_score: np.ndarray
            Score matrix or vector

        Returns
        -------
            float
            LogLoss of predictions vs targets.
        """
       # self.smooth = 0.0002
       # y_pred = y_pred * (1.0 - self.smooth) + 0.5 * self.smooth
        
        logits = 1 / (1 + np.exp(-y_pred))
        aux = (1-y_true)*np.log(1-logits+1e-15) + y_true*np.log(logits+1e-10)
        return np.mean(-aux)

In [None]:
class SmoothBCEwLogits(nn.Module):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0005):
        super(SmoothBCEwLogits, self).__init__()
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction
 
    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0005):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets
 
    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)
 
        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()
 
        return loss




def validation_fn(model, val_dataloader, criterion=F.binary_cross_entropy_with_logits):
    net = model['model']
    total_val_loss = 0
    outputs = []
    with torch.no_grad():
        for  idx, (X, y) in enumerate(val_dataloader):
            X, y = X.to(PARAMS["DEVICE"]), y.to(PARAMS["DEVICE"])
            preds = net(X)
            total_val_loss += criterion(preds[:, :206], y[:, :206]).item()
            outputs.append(preds.sigmoid().detach().cpu().numpy())
        
        
        outputs = np.concatenate(outputs)
        
        return outputs, total_val_loss / len(val_dataloader)
    
def predict_fn(model, test_X, model_name=None):
    model.eval()
    test_dataset = MoADataset(test_X)
    test_loader = DataLoader(test_dataset, batch_size=PARAMS["BATCH_SIZE"], shuffle=False)
    outputs = []
    if model_name != 'TabNet':
        with torch.no_grad():
            for idx, X in enumerate(test_loader):
                X = X.to(PARAMS["DEVICE"])
                preds = model(X)
                outputs.append(preds.sigmoid().detach().cpu().numpy())

            return np.concatenate(outputs)
    else:
        preds = model['model'].predict(test_X)
        preds = 1/ (1 + np.exp(-model.predict(X_val)))
        
        return preds
   
    
def train_fn(model, optimizer, loss, train_loader, val_loader, scheduler, fold):
    
    net, name = model['model'].to(PARAMS["DEVICE"]), model['name']
    no_improvement_steps = 0
    min_loss_val = 1
    for epoch in range(PARAMS["EPOCHS"]):
        if no_improvement_steps == PARAMS["PATIENCE"]:
            print("Early stopping!")
            break
            
        total_loss_train = 0

        for idx, (X, y) in enumerate(train_loader):
            optimizer.zero_grad()
            X, y = X.to(PARAMS["DEVICE"]), y.to(PARAMS["DEVICE"])
            preds = net(X)

            loss_val = loss(preds, y)
            loss_val.backward()
            optimizer.step()
            scheduler.step()
            total_loss_train += loss_val.item()

        total_loss_train /= len(train_loader)
        outputs, total_loss_val = validation_fn(model, val_loader)
        if total_loss_val < min_loss_val:
            min_loss_val = total_loss_val
            torch.save(net.state_dict(), "{}_{}.pth".format(name, fold))
            no_improvement_steps = 0
        else:
            no_improvement_steps += 1
            
            
        print("Fold: %d Epoch: %d, train loss:%.6f | validation loss: %.6f"%(fold, epoch, total_loss_train, total_loss_val))
    model['model'].load_state_dict(torch.load("{}_{}.pth".format(name, fold)))
    return model['model'], outputs 

#------------------------------------------------------
def return_model(model_name, input_size, target_size, hidden_size):
    print(model_name)
    if model_name == 'ResNet':
        return {"name":model_name, 
                "model":ResidualNet(input_size, target_size, 512)}
    
    elif model_name == 'ModelMLP':
        return {"name": model_name,
                "model": ModelMLP(input_size, target_size, hidden_size)
               }
    elif model_name == 'Model':
        return {'name': model_name,
                'model': Model(input_size, target_size, hidden_size)}
    
    elif model_name == 'simple':
        return {"name":'simple',
                'model': SimpleNet(input_size, target_size, hidden_size)}
    else:
        raise RuntimeError("Such network  doesn't exists")

#--------------------------------------------------------------------------------


def train(model_names, train_X, train_y, groups, n_seeds, transfer_learning=None):
    print("Train shape:", train_X.shape, " train_y shape:", train_y.shape)
    models_predictions = []
    trained_models = []
    
    for model_name in model_names:
        val = np.random.randint(8, 30)
        n_hidden = np.random.randint(512, 1524)
        group_kfold = MultilabelStratifiedKFold(n_splits=PARAMS["NUM_FOLDS"], shuffle=True)
        preds = np.zeros((train_y.shape[0], train_y.shape[1]), dtype=float)
        loss = SmoothBCEwLogits()
        folds_models = []
        
        for idx, (train_index, test_index) in enumerate(group_kfold.split(train_X, train_y)):
            
            X_train, y_train = train_X[train_index], train_y[train_index]
            X_val, y_val = train_X[test_index], train_y[test_index]
            
            if model_name != "TabNet":
                if transfer_learning is not None:
                    model = return_model(model_name, transfer_learning['in_size'], transfer_learning['out_size'])
                    model["model"].load_state_dict(torch.load("{}_{}.pth".format(model["name"], idx)))
                    model["model"].get_freeze(train_y.shape[-1], True)

                else:
                    model = return_model(model_name, X_train.shape[-1], y_train.shape[-1], n_hidden)
                    
                print("Train - Test:", len(train_index), len(test_index))
                train_dataset = MoADataset(X_train, y_train)
                val_dataset = MoADataset(X_val, y_val)

                train_loader = DataLoader(train_dataset, batch_size=PARAMS["BATCH_SIZE"], shuffle=True)
                val_loader = DataLoader(val_dataset, batch_size=PARAMS["BATCH_SIZE"], shuffle=False)

                optimizer = Adam(model["model"].parameters(), lr=PARAMS["LEARNING_RATE"], weight_decay=PARAMS["WEIGHT_DECAY"])
                scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, 
                                                          pct_start=0.1, div_factor=1e3, 
                                                          max_lr=1e-2, epochs=PARAMS["EPOCHS"], 
                                                          steps_per_epoch=len(train_loader))

                model, outputs = train_fn(model, optimizer, loss, train_loader, val_loader, scheduler, idx)

                preds[test_index] = outputs
                
            else:

                
                tabnet_params["n_a"] = val
                tabnet_params['n_d'] = val
                model = TabNetRegressor(**tabnet_params)
                
                model.fit(X_train=X_train,
                          y_train=y_train[:, :206],
                          eval_set=[(X_val, y_val[:, :206])],
                          eval_name = ["validation loss"],
                          eval_metric = ["logits_ll"],
                          max_epochs=MAX_EPOCH,
                          patience=PARAMS['PATIENCE'], batch_size=PARAMS["BATCH_SIZE"],
                          virtual_batch_size=128,
                          num_workers=1, drop_last=False,
                          loss_fn=SmoothBCEwLogits())
                
                preds[test_index] = 1/ (1 + np.exp(-model.predict(X_val)))
                
            folds_models.append(model)
            
        trained_models.append({'name': model_name, 'fold_models':folds_models})                   
        models_predictions.append(preds)
        
    return trained_models, models_predictions

In [None]:
print("preds dimensions:",  train_targets_scored.shape,train_targets_nonscored.shape)
aux_train_y = train_targets_nonscored.iloc[:, 1:].to_numpy()
merged_train = train_targets_scored.merge(train_targets_nonscored, on='sig_id').iloc[:, 1:].to_numpy()

train_X = train_features.iloc[:, 1:-1].to_numpy()
train_y = train_targets_scored.iloc[:, 1:].to_numpy()
test_X = test_features.iloc[:, 1:].to_numpy()
groups = train_features['drug_id']

In [None]:
train_X.shape, train_y.shape

In [None]:
input_size, target_size = train_X.shape[-1], train_y.shape[-1]

MLP = ModelMLP(input_size, target_size, 1024)
ResNet = ResidualNet(input_size, target_size, 512)
#TabNet = TabNetMultiTaskClassifier()

model_names = [ 'simple','TabNet', "Model", "TabNet", 'simple', 'Model', 'TabNet', 'simple', "Model"]

In [None]:
print(train_X.shape, aux_train_y.shape)
print(aux_train_y.shape)
#models, predictions = train(model_names, train_X, aux_train_y, groups, 30, 1)

In [None]:
transfer_learning = {'in_size':train_X.shape[-1],
                     'out_size': aux_train_y.shape[-1]}

models, predictions = train(model_names, train_X, train_y, groups, 1)

In [None]:
print(predictions)

In [None]:
#######    Try to Stack ########
def make_folds_predictions(models, test_X):
    all_predictions = []
    for model in models:
        name = model['name']
        folds_models = model['fold_models']
        predictions = []
        for fold_model in folds_models:
            predictions.append(predict_fn(fold_model, test_X, name))
        all_predictions.append(sum(predictions) / len(predictions))
        
    return all_predictions


def stack_predict(meta_models, weak_models, test_X, meta_with_source=False):
    
    weak_preds = []
    weak_preds = make_folds_predictions(weak_models, test_X)
    
    if meta_with_source:
        stacked_input = np.concatenate(test_X + weak_preds, axis=-1)
    else:
        stacked_input = np.concatenate(weak_preds, axis=-1)
    
    meta_predictions = make_folds_predictions(meta_models, stacked_input)
    
    final_predictions = sum(meta_predictions) / len(meta_predictions)
    
    return final_predictions

def blend_stack_models(all_models, test_X):
    predictions = []
    for models in all_models:
        predictions.append(stack_predict(models['meta_models'],
                                         models['weak_models'],test_X))
        
    return sum(predictions) / len(predictions)


def train_stack_models(weak_learners, meta_learners, train_X, train_y, meta_params, weak_params):
        control_fold = KFold(n_splits=3, shuffle=True, random_state=0)
        predictions = np.zeros(train_y.shape)
        all_models = []
        for fold_num, (train_index_ctl, test_index_ctl) in enumerate(control_fold.split(train_X, train_y)):
            weak_folds = MultilabelStratifiedKFold(n_splits=3, shuffle=True, random_state=1)
            train_X_ctl, train_y_ctl = train_X[train_index_ctl], train_y[train_index_ctl]
            test_X_ctl, test_y_ctl = train_X[test_index_ctl], train_y[test_index_ctl]
            weak_meta_models = []
            for train_index, test_index in weak_folds.split(train_X_ctl, train_y_ctl):
                weak_train_X, weak_train_y = train_X_ctl[train_index], train_y_ctl[train_index]
                meta_train_X, meta_train_y = train_X_ctl[test_index], train_y_ctl[test_index]

                PARAMS['NUM_FOLDS'] = 3
                PARAMS['EPOCHS'] = 30
                print("-------------TRAIN WEAK MODELS-------------")
                weak_models, _ = train(weak_learners, weak_train_X, weak_train_y, None, 1)
                weak_predictions = make_folds_predictions(weak_models, meta_train_X)
                #weak_predictions.append(meta_train_X)
                weak_predictions = np.concatenate(weak_predictions, axis=-1)

                print("------------TRAIN META MODELS--------------")
                PARAMS['EPOCHS'] = 100
                meta_models, _ = train(meta_learners, weak_predictions, meta_train_y, None, 1)
                weak_meta_models.append({ 'weak_models': weak_models,
                                            'meta_models': meta_models})
                
            all_models.append(weak_meta_models)
            preds = blend_stack_models(weak_meta_models, test_X_ctl)
            print("Preds shape", preds.shape, predictions[test_index_ctl].shape)
            predictions[test_index_ctl] = preds
            
        return [predictions], all_models
    

In [None]:
#predictions, all_models = train_stack_models(['Model'], ['simple'], train_X, train_y, None, None)

In [None]:
#print(models)

#simple_prediction = sum([pred[:, :206] for  pred in predictions]) / len(predictions)

def compute_score(y_pred, y_true):
    assert y_pred.shape == y_true.shape
    score = 0
    scores = []
    for i in range(PARAMS["TARGETS_OUTPUT"]):
        
        score_ = log_loss(y_true[:, i], y_pred[:, i])
        #print("target {}:{}".format(i, score_))
        score += score_
        scores.append(score_)
        
    return score / PARAMS["TARGETS_OUTPUT"], scores

In [None]:
def find_best(scores):
    num_targets = len(scores[0])
    target_model = {}
    for i in range(num_targets):
        neurals_target_scores = list([scores[j][i] for j in range(len(scores))])
        #print(neurals_target_scores)
        min_score = min(neurals_target_scores)
        index = neurals_target_scores.index(min_score)
        target_model[i] = index
    return target_model

def create_final_prediction(models_predictions, target_model):
    final_prediction = np.zeros(models_predictions[0].shape)
    
    for i in range(final_prediction.shape[-1]):
        final_prediction[:, i] = models_predictions[target_model[i]][:, i]
        
    return final_prediction

In [None]:
#print(len(predictions))
ensemble_prediction = sum([pred[:, :206] for  pred in predictions]) / len(predictions)
total_predictions = predictions + [ensemble_prediction]
print(len(total_predictions), len(predictions))

In [None]:
score = [compute_score(preds, train_y)[0] for preds in total_predictions]
scores = [compute_score(preds, train_y)[-1] for preds in total_predictions]
print(len(scores))

In [None]:
target_model = find_best(scores)
final_prediction = create_final_prediction(total_predictions, target_model)

In [None]:
#print(simple_prediction.shape, aux_train_y.shape)
compute_score(final_prediction, train_y)[0]

In [None]:
print(models)

In [None]:
def final_preds(all_models, test_X, weights=None):
    if weights is None:
        weights = [1/len(model_names) 
                   for  i in range(len(model_names))]

    models_preds = []
    for models in all_models:
        model_name = models['name']
        fold_preds = []
        for fold_model in models['fold_models']:
            if model_name == 'TabNet':
                preds = fold_model.predict(test_X)
                preds = 1/(1 + np.exp(-preds))
            else:
                preds = predict_fn(fold_model, test_X)
                
            fold_preds.append(preds[:, :206])
        print(len(fold_preds))
        models_preds.append(sum(fold_preds)/len(fold_preds))
    return models_preds
    #return sum([w*p for w, p in zip(weights, models_preds)])


In [None]:
print(test_X.shape, test_features.shape)
print(train_X.shape)
test_preds = final_preds(models, test_X)
ensemble_preds = sum(test_preds) / len(test_preds)
total_test_preds = test_preds + [ensemble_preds]


In [None]:
test_preds = create_final_prediction(total_test_preds, target_model)

In [None]:
print(test_preds.shape)
predictions = pd.concat([test_features['sig_id'],
                         pd.DataFrame(test_preds, columns=train_targets_scored.columns[1:])], 
                        axis=1)

#predictions = predictions.merge(test_dropped_rows['sig_id'], how='outer', on='sig_id').fillna(0)
print(predictions.shape)

In [None]:
predictions.to_csv("submission.csv", index=False)

In [None]:
####### bosting ensemle ########

In [None]:
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor

from hyperopt import Trials, STATUS_OK, tpe, hp, fmin

In [None]:
class ObjectiveFunction():
    def __init__(self, clf, X, y, cv=5, scoring=None):
        self.X = X
        self.y = y
        self.cv = cv
        self.scoring = scoring
        self.clf = clf
        
        
    def change_model(self, clf):
        self.clf = clf
    
        return self
    
    def __call__(self, space):
        clf  = self.clf(**space)
        scores = cross_val_score(clf, self.X, self.y, cv=self.cv, scoring=self.scoring)
        return  sum(scores) / len(scores)

def train_classifiers(clf, X_train, y_train, space, clf_params):
    
    predictions = []
    predictors = []
    X_train,X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.3, shuffle=True)
    num_targets = y_train.shape[-1]
    for target in range(num_targets):
        print("target :{}".format(target))
        fit_labels = y_train[:, target]

        predictor = clf(**clf_params)
        predictor.fit(X_train, y=fit_labels, eval_set = (X_val, y_val[:, target]))
        predictors.append(predictor)

        preds = predictor.predict(X_val)
        loss =  LoglossMetric().evaluate([preds], y_val[:, target])[0] / len(preds)
        print("loss:", loss)
        predictions.append(loss)
    return predictors, predictions
        

            
            

In [None]:
space_gbm = {

    'max_depth' : hp.choice('max_depth', range(5, 30, 1)),
    'learning_rate' : hp.uniform('learning_rate', 0.01, 1),
    'n_estimators' : hp.choice('n_estimators', range(20, 200, 1)),
    'gamma' : hp.uniform('gamma', 0, 1),
    'min_child_weight' : hp.uniform('min_child_weight', 1, 10),
    'subsample' : hp.uniform('subsample', 0.1, 1),
    'colsample_bytree' : hp.quniform('colsample_bytree', 0.1, 1.0, 0.01),
    'reg_alpha' : hp.uniform('aplha', 0, 1),
    'reg_lambda' : hp.uniform('lambda', 0, 1)}

space_xgb = {
    'max_depth' : hp.choice('max_depth', range(2, 30, 1)),
    'learning_rate' : hp.uniform('learning_rate', 0.01, 1),
    'n_estimators' : hp.choice('n_estimators', range(20, 200, 1)),
    'gamma' : hp.uniform('gamma', 0, 1),
    'min_child_weight' : hp.uniform('min_child_weight', 0, 10),
    'subsample' : hp.uniform('subsample', 0.01, 1),
    'lambda': hp.uniform('lambda', 0.00, 10) }


In [None]:
class LoglossObjective(object):
    def calc_ders_range(self, approxes, targets, weights=None):
        #print("APP:", approxes, "targets:", targets)
        assert len(approxes) == len(targets)
        if weights is not None:
            assert len(weights) == len(approxes)
        
        result = []
        for index in range(len(targets)):
            e = np.exp(approxes[index])
            p = e / (1 + e)
            der1 = targets[index] - p
            der2 = -p * (1 - p)

            if weights is not None:
                der1 *= weights[index]
                der2 *= weights[index]

            result.append((der1, der2))
        return result
    
class LoglossMetric(object):
    def get_final_error(self, error, weight):
        return error / (weight + 1e-38)

    def is_max_optimal(self):
        return False

    def evaluate(self, approxes, target, weight=None):
        assert len(approxes) == 1
        assert len(target) == len(approxes[0])

        approx = approxes[0]

        error_sum = 0.0
        weight_sum = 0.0
        
        for i in range(len(approx)):
            e = np.exp(approx[i])
            p = e / (1 + e)
            w = 1.0 if weight is None else weight[i]
            weight_sum += w
            error_sum += -w * (target[i] * np.log(p) + (1 - target[i]) * np.log(1 - p))

        return error_sum, weight_sum

In [None]:
#clf =  xgb.XGBClassifier
#clf = lgb.LGBMRegressor
clf = CatBoostRegressor
clf_params = {'iterations':5, 'learning_rate':2, 'loss_function': LoglossObjective(), 'eval_metric':LoglossMetric()}
#predictors, predictions_cat = train_classifiers(clf, train_X, train_y, space_xgb, clf_params)

In [None]:

#predictions_cat = np.asarray(predictions)

In [None]:
print(predictions_cat)

In [None]:
def predict_predictors(predictors, test_X):

    for target_predictor in predictor:
        preds = target_predictor.predict(test_X)
        predictions.append(preds)
       
    
    return np.asarray(predictions)
        

In [None]:

def compute_score(y_pred, y_true):
    assert y_pred.shape == y_true.shape
    score = 0
    scores = []
    for i in range(PARAMS["TARGETS_OUTPUT"]):
        
        score_ = log_loss(y_true[:, i], y_pred[:, i])
        print("target {}:{}".format(i, score_))
        score += score_
        scores.append(score_)
    return score / PARAMS["TARGETS_OUTPUT"], scores

In [None]:
def choose_model(neural_nets_scores, catboost_predictors_scores):
        
    target_model = {}
    for  target in range(206):
        neural_scores = [neural_nets_scores[i][target] for i in range(len(neural_nets_scores))]
        neural_min = min(neural_scores)
        argmin_neural = neural_scors.index(neural_min)
        if neural_min < cat_boost_predictors[target]:
            target_model[target] = argmin_neural
        else:
            target_model[target] = -1
    
    return target_model
        