In [None]:
import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append('../input/iterative-stratification/iterative-stratification-master')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

import os
import gc
import math
import random
import datetime
import numpy as np
import pandas as pd
from joblib import dump, load
import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras.layers as layers
from tensorflow.keras.callbacks import Callback, ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
import tensorflow_addons as tfa
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import log_loss
from hyperopt import hp, fmin, tpe, Trials
from hyperopt.pyll.base import scope
from tqdm.notebook import tqdm
from time import time

In [None]:
def create_folds(num_starts, num_splits):
    
    folds = []
    
    # LOAD FILES
    train_feats = pd.read_csv('../input/lish-moa/train_features.csv')
    scored = pd.read_csv('/kaggle/input/lish-moa/train_targets_scored.csv')
    drug = pd.read_csv('/kaggle/input/lish-moa/train_drug.csv')
    scored = scored.loc[train_feats['cp_type'] == 'trt_cp', :]
    drug = drug.loc[train_feats['cp_type'] == 'trt_cp', :]
    targets = scored.columns[1:]
    scored = scored.merge(drug, on = 'sig_id', how = 'left') 

    # LOCATE DRUGS
    vc = scored.drug_id.value_counts()
    vc1 = vc.loc[vc <= 18].index.sort_values()
    vc2 = vc.loc[vc > 18].index.sort_values()
    
    for seed in range(num_starts):

        # STRATIFY DRUGS 18X OR LESS
        dct1 = {}; dct2 = {}
        skf = MultilabelStratifiedKFold(n_splits = num_splits, shuffle = True, random_state = seed)
        tmp = scored.groupby('drug_id')[targets].mean().loc[vc1]
        for fold,(idxT,idxV) in enumerate(skf.split(tmp,tmp[targets])):
            dd = {k:fold for k in tmp.index[idxV].values}
            dct1.update(dd)

        # STRATIFY DRUGS MORE THAN 18X
        skf = MultilabelStratifiedKFold(n_splits = num_splits, shuffle = True, random_state = seed)
        tmp = scored.loc[scored.drug_id.isin(vc2)].reset_index(drop = True)
        for fold,(idxT,idxV) in enumerate(skf.split(tmp,tmp[targets])):
            dd = {k:fold for k in tmp.sig_id[idxV].values}
            dct2.update(dd)

        # ASSIGN FOLDS
        scored['fold'] = scored.drug_id.map(dct1)
        scored.loc[scored.fold.isna(),'fold'] =\
            scored.loc[scored.fold.isna(),'sig_id'].map(dct2)
        scored.fold = scored.fold.astype('int8')
        folds.append(scored.fold.values)
        
        del scored['fold']
        
    return np.stack(folds)

# Preprocessing

In [None]:
train_features = pd.read_csv('../input/lish-moa/train_features.csv')
train_targets = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
train_targets_nonscored = pd.read_csv('../input/lish-moa/train_targets_nonscored.csv')
test_features = pd.read_csv('../input/lish-moa/test_features.csv')

ss = pd.read_csv('../input/lish-moa/sample_submission.csv')

cols = [c for c in ss.columns.values if c != 'sig_id']
GENES = [col for col in train_features.columns if col.startswith('g-')]
CELLS = [col for col in train_features.columns if col.startswith('c-')]

In [None]:
def preprocess(df):
    df.loc[:, 'cp_type'] = df.loc[:, 'cp_type'].map({'trt_cp': 0, 'ctl_vehicle': 1})
#     df.loc[:, 'cp_time'] = df.loc[:, 'cp_time'].map({24: 0, 48: 1, 72: 2})
    df.loc[:, 'cp_dose'] = df.loc[:, 'cp_dose'].map({'D1': 0, 'D2': 1})
    del df['sig_id']
    return df

def log_loss_metric(y_true, y_pred):
    y_pred_clip = np.clip(y_pred, 1e-15, 1 - 1e-15)
    return - np.mean(y_true * np.log(y_pred_clip) + (1 - y_true) * np.log(1 - y_pred_clip))

train = preprocess(train_features)
test = preprocess(test_features)

del train_targets['sig_id']
del train_targets_nonscored['sig_id']

In [None]:
from sklearn.preprocessing import QuantileTransformer

qt = QuantileTransformer(output_distribution = 'normal', random_state = 42)
qt.fit(pd.concat([pd.DataFrame(train[GENES+CELLS]), pd.DataFrame(test[GENES+CELLS])]))
train[GENES+CELLS] = qt.transform(train[GENES+CELLS])
test[GENES+CELLS] = qt.transform(test[GENES+CELLS])

In [None]:
from sklearn.decomposition import PCA

# GENES
n_comp_genes = 600  #<--Update

data = pd.concat([pd.DataFrame(train[GENES]), pd.DataFrame(test[GENES])])
pca_genes = PCA(n_components=n_comp_genes, random_state = 42)
data2 = pca_genes.fit_transform(data[GENES])
train2 = data2[:train.shape[0]]; test2 = data2[-test.shape[0]:]

train2 = pd.DataFrame(train2, columns=[f'pca_G-{i}' for i in range(n_comp_genes)])
test2 = pd.DataFrame(test2, columns=[f'pca_G-{i}' for i in range(n_comp_genes)])

train = pd.concat((train, train2), axis=1)
test = pd.concat((test, test2), axis=1)

#CELLS
n_comp_cells = 50  #<--Update

data = pd.concat([pd.DataFrame(train[CELLS]), pd.DataFrame(test[CELLS])])
pca_cells = PCA(n_components=n_comp_cells, random_state = 42)
data2 = pca_cells.fit_transform(data[CELLS])
train2 = data2[:train.shape[0]]; test2 = data2[-test.shape[0]:]

train2 = pd.DataFrame(train2, columns=[f'pca_C-{i}' for i in range(n_comp_cells)])
test2 = pd.DataFrame(test2, columns=[f'pca_C-{i}' for i in range(n_comp_cells)])

train = pd.concat((train, train2), axis=1)
test = pd.concat((test, test2), axis=1)

In [None]:
from sklearn.feature_selection import VarianceThreshold

var_thresh = VarianceThreshold(0.8)  #<-- Update
data = train.append(test)
data_transformed = var_thresh.fit_transform(data.iloc[:, 3:])

train_transformed = data_transformed[ : train.shape[0]]
test_transformed = data_transformed[-test.shape[0] : ]

train = pd.DataFrame(train[['cp_type','cp_time','cp_dose']].values.reshape(-1, 3),\
            columns=['cp_type','cp_time','cp_dose'])

train = pd.concat([train, pd.DataFrame(train_transformed)], axis=1)

test = pd.DataFrame(test[['cp_type','cp_time','cp_dose']].values.reshape(-1, 3),\
            columns=['cp_type','cp_time','cp_dose'])

test = pd.concat([test, pd.DataFrame(test_transformed)], axis=1)

print(train.shape)
print(test.shape)

In [None]:
train_targets = train_targets.loc[train['cp_type'] == 0].reset_index(drop = True)
train_targets_nonscored = train_targets_nonscored.loc[train['cp_type'] == 0].reset_index(drop = True)
train = train.loc[train['cp_type'] == 0].reset_index(drop = True)

print(train.shape)

In [None]:
top_feats = np.arange(1, train.shape[1])
print(top_feats)

In [None]:
train.head()

In [None]:
ss_pseudo = pd.read_csv('../input/drug-sub/submission_pbest.csv').drop('sig_id', axis = 1)
pseudo_targets = ss_pseudo.loc[test['cp_type'] == 0, cols].values
pseudo_train = test.loc[test['cp_type'] == 0, test.columns].values

# Model Function

In [None]:
class GroupLinear(tf.keras.layers.Layer):
    def __init__(self, in_dim, out_dim, group, **kwargs):
        super(GroupLinear, self).__init__()
        self.reshape = tf.keras.layers.Reshape((group, in_dim // group))
        self.linear = tf.keras.layers.Dense(out_dim // group)
        self.flatten = tf.keras.layers.Flatten()
        
    def call(self, x):
        x = self.reshape(x)
        x = self.linear(x)
        x = self.flatten(x)
        return x

In [None]:
def create_model(num_columns, num_labels, hidden_units, dropout_rates, num_groups):
    
    inp = tf.keras.layers.Input(shape = (num_columns, ), name = 'inp')
    
    x0 = tf.keras.Sequential(
        [
            tf.keras.layers.BatchNormalization(name = 'bn0'), 
            tf.keras.layers.Dropout(dropout_rates[0], name = 'dp0'), 
            tf.keras.layers.Dense(hidden_units[0], name = 'd0'), 
            GroupLinear(hidden_units[0], hidden_units[1], num_groups[0], name = 'gl0'), 
            tf.keras.layers.Activation(tf.keras.activations.swish, name = 'a0'), 
            tf.keras.layers.Dropout(dropout_rates[1], name = 'dp1'), 
        ], 
        name = 'x0')(inp)
    
    x1 = tf.keras.Sequential(
        [
            tf.keras.layers.Dense(hidden_units[2], name = 'd1'),
            GroupLinear(hidden_units[2], hidden_units[1], num_groups[1], name = 'gl1'),
            tf.keras.layers.BatchNormalization(name = 'bn2'),
            tf.keras.layers.Activation(tf.keras.activations.swish, name = 'a1'),
            tf.keras.layers.Dropout(dropout_rates[2], name = 'dp2'), 
        ],
        name = 'r0')(x0)
    x1 = tf.keras.layers.Add(name = 'add0')([x0, x1])
    
    x2 = tf.keras.Sequential(
        [
            tf.keras.layers.Dense(hidden_units[3], name = 'd2'),
            GroupLinear(hidden_units[3], hidden_units[1], num_groups[2], name = 'gl2'),
            tf.keras.layers.BatchNormalization(name = 'bn3'),
            tf.keras.layers.Activation(tf.keras.activations.swish, name = 'a2'),
            tf.keras.layers.Dropout(dropout_rates[3], name = 'dp3'), 
        ],
        name = 'r1')(x1)
    x2 = tf.keras.layers.Add(name = 'add1')([x1, x2])
    
    x3 = tf.keras.Sequential(
        [
            tf.keras.layers.Dense(hidden_units[4], name = 'd3'),
            GroupLinear(hidden_units[4], hidden_units[1], num_groups[3], name = 'gl3'),
            tf.keras.layers.BatchNormalization(name = 'bn4'),
            tf.keras.layers.Activation(tf.keras.activations.swish, name = 'a3'),
            tf.keras.layers.Dropout(dropout_rates[4], name = 'dp4'), 
        ],
        name = 'r2')(x2)
    x3 = tf.keras.layers.Add(name = 'add2')([x2, x3])
        
    x = tf.keras.layers.Dense(num_labels, 
                              bias_initializer = tf.keras.initializers.Constant(6.3), 
                              name = f'output_d{num_labels}')(x3)
    out = tf.keras.layers.Activation('sigmoid', name = f'output_a{num_labels}')(x)
    
    model = tf.keras.models.Model(inputs = inp, outputs = out)
    model.compile(optimizer = tfa.optimizers.AdamW(weight_decay = 1e-5, learning_rate = 1e-3),
                  loss = tf.keras.losses.BinaryCrossentropy(label_smoothing = 0.0008), 
                  metrics = tf.keras.losses.BinaryCrossentropy(name = 'mean_loss'), 
                 )
    
    return model

# Train Model

In [None]:
def train_model(X_train, Y_train_2, Y_nonscored, features, folds, model_name, save_path, num_seeds, 
                num_splits, model_params, X_test = None, sample_sub_path = None, pseudo_labeling = True, verbose = 0):
    start_time_all = time()
    oof = Y_train_2.copy()
    oof.loc[:, Y_train_2.columns] = 0
    if X_test is not None:
        sub = pd.read_csv(sample_sub_path)
        sub.loc[:, Y_train_2.columns] = 0
    else:
        sub = None
    for nums, seed in enumerate(range(num_seeds)):
        start_time_seed = time()
        tf.random.set_seed(seed)
        np.random.seed(seed)
        random.seed(seed) 
        mean_score = 0
#         skf = MultilabelStratifiedKFold(n_splits = num_splits, random_state = seed, shuffle = True)
        for n, foldno in enumerate(set(folds[nums])):
            start_time_fold = time()
            tr = folds[nums] != foldno
            te = folds[nums] == foldno
            x_tr, x_val = X_train.values[tr][:, features], X_train.values[te][:, features]
            y_tr, y_val = Y_train_2.values[tr], Y_train_2.values[te]
            if pseudo_labeling:
                x_tr = np.concatenate([x_tr, pseudo_train[:, features]])
                y_tr = np.concatenate([y_tr, pseudo_targets])

            if X_test is not None:
                x_tt = X_test.values[:, features]
                
            x_tr_ns, x_val_ns = X_train.values[tr][:, features], X_train.values[te][:, features]
            y_tr_ns, y_val_ns = Y_nonscored.values[tr], Y_nonscored.values[te]
            
            ckp_path = save_path + f'{model_name}_Seed_{seed}_Fold_{n}.hdf5'
            # Nonscored
            model = create_model(x_tr.shape[1], 402, **model_params)            
            rlr = ReduceLROnPlateau(monitor = 'val_mean_loss', factor = 0.1, patience = 3, verbose = verbose, 
                                    min_delta = 1e-4, mode = 'min')
            ckp = ModelCheckpoint(ckp_path, monitor = 'val_mean_loss', verbose = 0, 
                                  save_best_only = True, save_weights_only = True, mode = 'min')
            es = EarlyStopping(monitor = 'val_mean_loss', min_delta = 1e-4, patience = 10, mode = 'min', 
                               baseline = None, restore_best_weights = True, verbose = 0)
            model.fit(x_tr_ns, y_tr_ns, validation_data = (x_val_ns, y_val_ns), epochs = 1000, batch_size = 128,
                      callbacks = [rlr, ckp, es], verbose = verbose)
            K.clear_session()
            
            # Scored
            model = create_model(x_tr.shape[1], 206, **model_params)            
            model.load_weights(ckp_path, by_name = True)
            rlr = ReduceLROnPlateau(monitor = 'val_mean_loss', factor = 0.1, patience = 3, 
                                    verbose = verbose, min_delta = 1e-4, mode = 'min')
            ckp = ModelCheckpoint(ckp_path, monitor = 'val_mean_loss', verbose = 0, 
                                  save_best_only = True, save_weights_only = True, mode = 'min')
            es = EarlyStopping(monitor = 'val_mean_loss', min_delta = 1e-4, patience = 10, mode = 'min', 
                               baseline = None, restore_best_weights = True, verbose = 0)
            history = model.fit(x_tr, y_tr, validation_data=(x_val, y_val), epochs = 1000, 
                                batch_size = 128, callbacks=[rlr, ckp, es], verbose = verbose)
            hist = pd.DataFrame(history.history)
            model.load_weights(ckp_path)
        
            val_predict = model.predict(x_val)
            fold_score = hist['val_mean_loss'].min()
#             fold_score = log_loss_metric(y_val, val_predict)
            mean_score += fold_score / num_splits
            oof.loc[te, Y_train_2.columns] += val_predict / num_seeds
            if X_test is not None:
                test_predict = model.predict(x_tt)
                sub.loc[:, Y_train_2.columns] += test_predict / (num_splits * num_seeds)
            print(f'[{str(datetime.timedelta(seconds = time() - start_time_fold))[0:7]}] {model_name} Seed {seed}, Fold {n}:', fold_score)
            
            del model
            x = gc.collect()
            K.clear_session()

        print(f'[{str(datetime.timedelta(seconds = time() - start_time_seed))[0:7]}] {model_name} Seed {seed} Mean Score:', mean_score)
        
    oof.loc[X_train['cp_type'] == 1, Y_train_2.columns] = 0
    overall_score = log_loss_metric(Y_train_2.values, oof[Y_train_2.columns].values)
    print(f'[{str(datetime.timedelta(seconds = time() - start_time_all))[0:7]}] {model_name} OOF Score:', overall_score)
    if X_test is not None:
        sub.loc[X_test['cp_type'] == 1, Y_train_2.columns] = 0
    return overall_score, oof, sub

In [None]:
N_STARTS = 3
N_SPLITS = 5

folds_split = create_folds(N_STARTS, N_SPLITS)
print(folds_split)

In [None]:
model_params = [{'hidden_units': [128, 128, 512, 512, 128],  
                 'dropout_rates': [0.3782307809831188, 0.49668628489698075, 0.2152127463143538, 0.4146007588097611, 0.32126943893048127],  
                 'num_groups': [16, 8, 8, 8],
                }, ]

for m in range(len(model_params)):
    print(model_params[m])
    Pseudo_Labeling = True
    VERBOSE = 0
    model_name = f'ResDT{m}'
    save_path = ''
    sample_sub_path = '../input/lish-moa/sample_submission.csv'
    oof_score, res, ss = train_model(train, train_targets, train_targets_nonscored, top_feats, folds_split, model_name, save_path, 
                                     N_STARTS, N_SPLITS, model_params[m], test, sample_sub_path, Pseudo_Labeling, VERBOSE)
    np.save(f'{model_name}_oof.npy', res[cols].values)
    np.save(f'{model_name}_sub.npy', ss[cols].values)
    ss.to_csv(f'submission_GroupCV_PBestPre_{model_name}.csv', index = False)

# Ensemble

In [None]:
def train_ensemble(X_train, Y_train_2, folds, model_name, save_path, num_seeds, 
                   num_splits, model_params, X_test = None, sample_sub_path = None, verbose = 0):
    start_time_all = time()
    oof = Y_train_2.copy()
    oof.loc[:, Y_train_2.columns] = 0
    if X_test is not None:
        sub = pd.read_csv(sample_sub_path)
        sub.loc[:, Y_train_2.columns] = 0
    else:
        sub = None
    for nums, seed in enumerate(range(num_seeds)):
        start_time_seed = time()
        tf.random.set_seed(seed)
        np.random.seed(seed)
        random.seed(seed) 
        mean_score = 0
#         skf = MultilabelStratifiedKFold(n_splits = num_splits, random_state = seed, shuffle = True)
        for n, foldno in enumerate(set(folds[nums])):
            start_time_fold = time()
            tr = folds[nums] != foldno
            te = folds[nums] == foldno
            x_tr, x_val = X_train[tr], X_train[te]
            y_tr, y_val = Y_train_2.values[tr], Y_train_2.values[te]

            if X_test is not None:
                x_tt = X_test

            ckp_path = save_path + f'{model_name}_Seed_{seed}_Fold_{n}.hdf5'
            model = create_model(x_tr.shape[1], 206, **model_params)            
            rlr = ReduceLROnPlateau(monitor = 'val_mean_loss', factor = 0.1, patience = 3, 
                                    verbose = verbose, min_delta = 1e-4, mode = 'min')
            ckp = ModelCheckpoint(ckp_path, monitor = 'val_mean_loss', verbose = verbose, 
                                  save_best_only = True, save_weights_only = True, mode = 'min')
            es = EarlyStopping(monitor = 'val_mean_loss', min_delta = 1e-4, patience = 10, mode = 'min', 
                               baseline = None, restore_best_weights = True, verbose = verbose)
            history = model.fit(x_tr, y_tr, validation_data=(x_val, y_val), epochs = 1000, 
                                batch_size = 128, callbacks=[rlr, ckp, es], verbose = verbose)
            hist = pd.DataFrame(history.history)
            model.load_weights(ckp_path)
            
            val_predict = model.predict(x_val)
            fold_score = hist['val_mean_loss'].min()
            # fold_score = log_loss_metric(y_val, val_predict)
            mean_score += fold_score / num_splits
            oof.loc[te, Y_train_2.columns] += val_predict / num_seeds
            if X_test is not None:
                test_predict = model.predict(x_tt)
                sub.loc[:, Y_train_2.columns] += test_predict / (num_splits * num_seeds)
            print(f'[{str(datetime.timedelta(seconds = time() - start_time_fold))[0:7]}] {model_name} Seed {seed}, Fold {n}:', fold_score)
            
            del model
            x = gc.collect()
            K.clear_session()

        print(f'[{str(datetime.timedelta(seconds = time() - start_time_seed))[0:7]}] {model_name} Seed {seed} Mean Score:', mean_score)
        
    oof.loc[train['cp_type'] == 1, Y_train_2.columns] = 0
    overall_score = log_loss_metric(Y_train_2.values, oof[Y_train_2.columns].values)
    print(f'[{str(datetime.timedelta(seconds = time() - start_time_all))[0:7]}] {model_name} OOF Score:', overall_score)
    if X_test is not None:
        sub.loc[test['cp_type'] == 1, Y_train_2.columns] = 0
    return overall_score, oof, sub

In [None]:
# # oof1 = np.load('../input/moa-mlp/Model0_oof.npy')
# # oof2 = np.load('../input/moa-mlp/Model1_oof.npy')
# # oof3 = np.load('../input/moa-mlp/Model2_oof.npy')

# # sub1 = np.load('../input/moa-mlp/Model0_sub.npy')
# # sub2 = np.load('../input/moa-mlp/Model1_sub.npy')
# # sub3 = np.load('../input/moa-mlp/Model2_sub.npy')

# oof1 = np.load('./Model0_oof.npy')
# oof2 = np.load('./Model1_oof.npy')
# oof3 = np.load('./Model2_oof.npy')

# sub1 = np.load('./Model0_sub.npy')
# sub2 = np.load('./Model1_sub.npy')
# sub3 = np.load('./Model2_sub.npy')

# train_new = np.concatenate([oof1, oof2, oof3], axis = 1)
# test_new = np.concatenate([sub1, sub2, sub3], axis = 1)

# print('OOF 1:', log_loss_metric(train_targets.values, oof1))
# print('OOF 2:', log_loss_metric(train_targets.values, oof2))
# print('OOF 3:', log_loss_metric(train_targets.values, oof3))
# print('Blend OOF:', log_loss_metric(train_targets.values, 0.33 * oof1 + 0.33 * oof2 + 0.34 * oof3))

In [None]:
# model_params = {'hidden_units': [1920, 768],  
#                 'dropout_rates': [0.36130273975713795, 0.38130486900003896, 0.44485672673556004], 
#                 }

# N_STARTS = 3
# N_SPLITS = 5
# VERBOSE = 0
# model_name = 'EModel_Stack'
# save_path = ''
# sample_sub_path = '../input/lish-moa/sample_submission.csv'
# oof_score, res, ss = train_ensemble(train_new, train_targets, folds_split, model_name, save_path, 
#                                     N_STARTS, N_SPLITS, model_params, test_new, sample_sub_path, VERBOSE)
# np.save(f'{model_name}_oof.npy', res[cols].values)
# np.save(f'{model_name}_sub.npy', ss[cols].values)
# ss.to_csv('submission_GroupCV_MLP.csv', index = False)

# Hyperopt

In [None]:
def train_onefold_model(X_train, Y_train_2, features, folds, model_name, save_path, num_seeds, 
                        num_splits, model_params, X_test = None, sample_sub_path = None, verbose = 0):
    start_time_all = time()
    oof = Y_train_2.copy()
    oof.loc[:, Y_train_2.columns] = 0
    if X_test is not None:
        sub = pd.read_csv(sample_sub_path)
        sub.loc[:, Y_train_2.columns] = 0
    else:
        sub = None
    for nums, seed in enumerate(range(num_seeds)):
        start_time_seed = time()
        tf.random.set_seed(seed)
        np.random.seed(seed)
        random.seed(seed) 
        mean_score = 0
#         skf = MultilabelStratifiedKFold(n_splits = num_splits, random_state = seed, shuffle = True)
        for n, foldno in enumerate(set(folds[nums])):
            start_time_fold = time()
            tr = folds[nums] != foldno
            te = folds[nums] == foldno
            x_tr, x_val = X_train.values[tr][:, features], X_train.values[te][:, features]
            y_tr, y_val = Y_train_2.values[tr], Y_train_2.values[te]

            if X_test is not None:
                x_tt = X_test.values[:, features]

            ckp_path = save_path + f'{model_name}_Seed_{seed}_Fold_{n}.hdf5'
            model = create_model(x_tr.shape[1], 206, **model_params)
            rlr = ReduceLROnPlateau(monitor = 'val_mean_loss', factor = 0.1, patience = 3, 
                                    verbose = verbose, min_delta = 1e-4, mode = 'min')
            ckp = ModelCheckpoint(ckp_path, monitor = 'val_mean_loss', verbose = verbose, 
                                  save_best_only = True, save_weights_only = True, mode = 'min')
            es = EarlyStopping(monitor = 'val_mean_loss', min_delta = 1e-4, patience = 10, mode = 'min', 
                               baseline = None, restore_best_weights = True, verbose = verbose)
            history = model.fit(x_tr, y_tr, validation_data=(x_val, y_val), epochs = 1000, 
                                batch_size = 128, callbacks=[rlr, ckp, es], verbose = verbose)
            hist = pd.DataFrame(history.history)
            model.load_weights(ckp_path)
            
            val_predict = model.predict(x_val)
            fold_score = hist['val_mean_loss'].min()
            # fold_score = log_loss_metric(y_val, val_predict)
            mean_score += fold_score / num_splits
            oof.loc[te, Y_train_2.columns] += val_predict / num_seeds
            if X_test is not None:
                test_predict = model.predict(x_tt)
                sub.loc[:, Y_train_2.columns] += test_predict / (num_splits * num_seeds)
            print(f'[{str(datetime.timedelta(seconds = time() - start_time_fold))[0:7]}] {model_name} Seed {seed}, Fold {n}:', fold_score)
            
            del model
            x = gc.collect()
            K.clear_session()

            break
        break
    return fold_score

In [None]:
# def train_onefold_ensemble(X_train, Y_train_2, folds, model_name, save_path, num_seeds, 
#                            num_splits, model_params, X_test = None, sample_sub_path = None, verbose = 0):
#     start_time_all = time()
#     oof = Y_train_2.copy()
#     oof.loc[:, Y_train_2.columns] = 0
#     if X_test is not None:
#         sub = pd.read_csv(sample_sub_path)
#         sub.loc[:, Y_train_2.columns] = 0
#     else:
#         sub = None
#     for nums, seed in enumerate(range(num_seeds)):
#         start_time_seed = time()
#         tf.random.set_seed(seed)
#         np.random.seed(seed)
#         random.seed(seed) 
#         mean_score = 0
# #         skf = MultilabelStratifiedKFold(n_splits = num_splits, random_state = seed, shuffle = True)
#         for n, foldno in enumerate(set(folds[nums])):
#             start_time_fold = time()
#             tr = folds[nums] != foldno
#             te = folds[nums] == foldno
#             x_tr, x_val = X_train[tr], X_train[te]
#             y_tr, y_val = Y_train_2.values[tr], Y_train_2.values[te]

#             if X_test is not None:
#                 x_tt = X_test

#             ckp_path = save_path + f'{model_name}_Seed_{seed}_Fold_{n}.hdf5'
#             model = create_model(x_tr.shape[1], 206, **model_params)
#             rlr = ReduceLROnPlateau(monitor = 'val_mean_loss', factor = 0.1, patience = 3, 
#                                     verbose = verbose, min_delta = 1e-4, mode = 'min')
#             ckp = ModelCheckpoint(ckp_path, monitor = 'val_mean_loss', verbose = verbose, 
#                                   save_best_only = True, save_weights_only = True, mode = 'min')
#             es = EarlyStopping(monitor = 'val_mean_loss', min_delta = 1e-4, patience = 10, mode = 'min', 
#                                baseline = None, restore_best_weights = True, verbose = verbose)
#             history = model.fit(x_tr, y_tr, validation_data=(x_val, y_val), epochs = 1000, 
#                                 batch_size = 128, callbacks=[rlr, ckp, es], verbose = verbose)
#             hist = pd.DataFrame(history.history)
#             model.load_weights(ckp_path)
            
#             val_predict = model.predict(x_val)
#             fold_score = hist['val_mean_loss'].min()
#             # fold_score = log_loss_metric(y_val, val_predict)
#             mean_score += fold_score / num_splits
#             oof.loc[te, Y_train_2.columns] += val_predict / num_seeds
#             if X_test is not None:
#                 test_predict = model.predict(x_tt)
#                 sub.loc[:, Y_train_2.columns] += test_predict / (num_splits * num_seeds)
#             print(f'[{str(datetime.timedelta(seconds = time() - start_time_fold))[0:7]}] {model_name} Seed {seed}, Fold {n}:', fold_score)
            
#             del model
#             x = gc.collect()
#             K.clear_session()

#             break
#         break
#     return fold_score

In [None]:
def optimise(params):
    print(params)
    hidden_units = []
    dropout_rates = []
    num_groups = []
    for key in params.keys():
        if 'hidden_unit' in key:
            hidden_units.append(params[key])
        elif 'dropout_rate' in key:
            dropout_rates.append(params[key])
        elif 'num_groups' in key:
            num_groups.append(params[key])

    N_STARTS = 1
    N_SPLITS = 5
    model_name = 'HModel'
    save_path = ''
    model_params = {'hidden_units': hidden_units,  
                    'dropout_rates': dropout_rates, 
                    'num_groups': num_groups, 
                   }
    oof_score = train_onefold_model(train, train_targets, top_feats, folds_split, model_name, save_path, 
                                    N_STARTS, N_SPLITS, model_params)
    return oof_score

In [None]:
# def optimise_ensemble(params):
#     print(params)
# #     hidden_units = [params['hidden_unit_1'], params['hidden_unit_2']]
# #     dropout_rates = [params['dropout_rate_1'], params['dropout_rate_2'], params['dropout_rate_3']]
#     hidden_units = []
#     dropout_rates = []
#     for key in params.keys():
#         if 'hidden_unit' in key:
#             hidden_units.append(params[key])
#         elif 'dropout_rate' in key:
#             dropout_rates.append(params[key])

#     N_STARTS = 1
#     N_SPLITS = 5
#     model_name = 'HModel'
#     save_path = ''
#     model_params = {'hidden_units': hidden_units,  
#                     'dropout_rates': dropout_rates, 
#                     'n_highway_layers': params['n_highway_layers'], 
#                    }
#     oof_score = train_onefold_ensemble(train_new, train_targets, folds_split, model_name, save_path, 
#                                        N_STARTS, N_SPLITS, model_params)
#     return oof_score

In [None]:
# N_STARTS = 3
# N_SPLITS = 5

# folds_split = create_folds(N_STARTS, N_SPLITS)
# print(folds_split)

In [None]:
# param_space = {'hidden_unit_1': scope.int(hp.quniform('hidden_unit_1', 128, 512, 128)), 
#                'hidden_unit_2': scope.int(hp.quniform('hidden_unit_2', 128, 512, 128)), 
#                'hidden_unit_3': scope.int(hp.quniform('hidden_unit_3', 128, 512, 128)),
#                'hidden_unit_4': scope.int(hp.quniform('hidden_unit_4', 128, 512, 128)),
#                'hidden_unit_5': scope.int(hp.quniform('hidden_unit_5', 128, 512, 128)),
#                'dropout_rate_1': hp.uniform('dropout_rate_1', 0.1, 0.5),  
#                'dropout_rate_2': hp.uniform('dropout_rate_2', 0.1, 0.5),
#                'dropout_rate_3': hp.uniform('dropout_rate_3', 0.1, 0.5), 
#                'dropout_rate_4': hp.uniform('dropout_rate_4', 0.1, 0.5),
#                'dropout_rate_5': hp.uniform('dropout_rate_5', 0.1, 0.5),
#                'num_groups_1': hp.choice('num_groups_1', [4, 8, 16]), 
#                'num_groups_2': hp.choice('num_groups_2', [4, 8, 16]),
#                'num_groups_3': hp.choice('num_groups_3', [4, 8, 16]),
#                'num_groups_4': hp.choice('num_groups_4', [4, 8, 16]),
#               }

# trials = Trials()

# hopt = fmin(fn = optimise, 
#             space = param_space, 
#             algo = tpe.suggest, 
#             max_evals = 50, 
#             trials = trials, 
#            )

In [None]:
# dump(trials, 'trials_DT.pkl', compress = True)
# print(hopt)

In [None]:
# scores = []
# sets = []
# for item in trials.trials:
#     scores.append(item['result']['loss'])
#     sets.append(item['misc']['vals'])

# max_values = np.argsort(scores)[:10]
# for m in max_values:
#     print(scores[m])
#     print(sets[m])
#     print('-' * 50)