In [None]:
KAGGLE_KEY = ""

In [None]:
class Utils:
  @staticmethod
  def install(module, pypi_name=None, path=None):
    if path != None:
      if os.path.isdir(path):
        if path not in sys.path:
          sys.path.append(path)
          return
    pypi_name = module if pypi_name == None else pypi_name
    if module not in sys.modules:
      !pip install {pypi_name}

  @staticmethod
  def get_basepath(project, test_file='train.csv', key=''):
    kaggle_base_path = f'../input/{project}/'
    if os.path.isdir(kaggle_base_path): # We are in kaggle
      return kaggle_base_path
    colab_base_path = './'
    if not os.path.isfile(f'{colab_base_path}{test_file}'):
      Utils.download_kaggle(project, key)
    return colab_base_path

  @staticmethod
  def download_kaggle(project, key, type='competitions'):
    os.environ['KAGGLE_USERNAME'] = "sudhanshuraheja"
    os.environ['KAGGLE_KEY'] = key
    !kaggle {type} download -c {project}
    !unzip \*.zip  && rm *.zip

  @staticmethod
  def get_strategy(enable_mixed_precision=False, enable_xla_accelerate=True):
    device = 'TPU'
    replicas = 0
    # Detect hardware, return appropriate distribution strategy
    try:
      tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
      # TPU detection. No parameters necessary if TPU_NAME environment variable is set.
      # On Kaggle this is always the case.
      device = f'TPU {tpu.master()}'
    except ValueError:
      tpu = None

    if tpu:
      tf.config.experimental_connect_to_cluster(tpu)
      tf.tpu.experimental.initialize_tpu_system(tpu)
      strategy = tf.distribute.experimental.TPUStrategy(tpu)
    else:
      strategy = tf.distribute.get_strategy()
      # default distribution strategy in Tensorflow. Works on CPU and single GPU.
      device = 'GPU' if len(tf.config.list_physical_devices('GPU')) > 0 else 'CPU'
    replicas = strategy.num_replicas_in_sync

    if enable_mixed_precision:
      from tensorflow.keras.mixed_precision import experimental as mixed_precision
      if tpu: policy = tf.keras.mixed_precision.experimental.Policy('mixed_bfloat16')
      else: policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16')
      mixed_precision.set_policy(policy)
      tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})

    if enable_xla_accelerate:
      tf.config.optimizer.set_jit(True)

    print(f'Running on {device} with {replicas} replicas, mixed_precision: {enable_mixed_precision}, accelerate: {enable_xla_accelerate}')

    return strategy

  @staticmethod
  def seed_everything(seed=0):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)


In [None]:
# The usual shebang
import os
import gc
import sys
import json
import math
import time
import random
import warnings
warnings.filterwarnings("ignore")

# Data sciency stuff
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display
sns.set()
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', 60)
pd.set_option('display.memory_usage', True)
pd.set_option('show_dimensions', True)

# Tensorflow
import tensorflow as tf
import tensorflow_addons as tfa

# Sklearn
from sklearn.model_selection import KFold
from sklearn.metrics import *
from sklearn.preprocessing import StandardScaler, MaxAbsScaler
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import VarianceThreshold
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier

# iterstrat
Utils.install(
    'iterstrat', 
    pypi_name='iterative-stratification', 
    path='../input/iterative-stratification/iterative-stratification-master')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold, RepeatedMultilabelStratifiedKFold

if KAGGLE_KEY != "":
    %load_ext google.colab.data_table

In [None]:
Utils.seed_everything()
strategy = Utils.get_strategy()

# Load Data

In [None]:
class Metrics:
  @staticmethod
  def log_loss(y_true, y_pred):
    return tf.keras.losses.BinaryCrossentropy().__call__(y_true, y_pred).numpy()

  @staticmethod
  def f1_weighted(y_true, y_pred, threshold=0.5):
    return f1_score(y_true, (y_pred > threshold).astype('int'), average='weighted', zero_division=0)

  @staticmethod
  def classification_report(y_true, y_pred, cols, threshold=0.5):
    return classification_report(
      y_true, 
      (y_pred > threshold).astype('int'),
      output_dict=True,
      zero_division=0,
      target_names=cols,
    )

  @staticmethod
  def best_f1_weighted(y_true, y_pred, splits=10):
    start = time.time()
    best_threshold = 0
    best_score = 0
    for threshold in np.linspace(0, 1, num=splits+1):
      score = Metrics.f1_weighted(y_true, y_pred, threshold=threshold)
      if score > best_score:
        best_score = score
        best_threshold = threshold
    return best_score, best_threshold

  @staticmethod
  def display_f1(classification_report):
    scores = pd.DataFrame.from_dict(classification_report).T.sort_values(by='support', ascending=False)
    scores = scores.drop(['samples avg', 'macro avg', 'weighted avg'], axis=0)
    scores['TP'] = round(scores['recall'] * scores['support'])
    scores['FN'] = scores['support'] - scores['TP']
    scores['FP'] = (scores['TP'] / scores['precision']) - scores['TP']
    scores = scores.fillna(0)
    scores = scores.iloc[:25,:]
    return scores

  @staticmethod
  def display_column_log_loss(y_true_df, y_pred):
    y_true = y_true_df.values
    y_pred = np.clip(y_pred.astype('float32'), 1e-7, 1 - 1e-7)
    total_loss = 0
    scores = []
    for i in range(y_pred.shape[1]):
      col_loss = - np.mean(y_true[:, i] * np.log(y_pred[:, i]) + (1 - y_true[:, i]) * np.log(1 - y_pred[:, i]))
      scores.append([y_true[:,i].sum(), col_loss])
      total_loss += col_loss
    total_loss = total_loss / y_pred.shape[1]
    df = pd.DataFrame(np.array(scores), index=y_true_df.columns, columns=['support', 'loss'])
    df['delta'] = df['loss'] - total_loss
    df = df.sort_values(by='support', ascending=False)
    return df


In [None]:
BASE_PATH = Utils.get_basepath('lish-moa', test_file='train_features.csv', key=KAGGLE_KEY)

# Read files
train_features = pd.read_csv(BASE_PATH + 'train_features.csv')
test_features = pd.read_csv(BASE_PATH + 'test_features.csv')

train_targets = pd.read_csv(BASE_PATH + 'train_targets_scored.csv')
train_targets_nonscored = pd.read_csv(BASE_PATH + 'train_targets_nonscored.csv')

submission = pd.read_csv(BASE_PATH + 'sample_submission.csv')

# Get cp types
train_cptypes = train_features['cp_type']
test_cptypes = test_features['cp_type']

# Visualise Data

In [None]:
display(train_features.head())

# Pre Process

In [None]:
class Features:
  def __init__(self):
    self.length = 0
    self.hist = []
  def update(self, length, why='Unknown reason'):
    initial_length = self.length
    self.length = length
    message = f'Features updated: {initial_length}+{self.length-initial_length}={self.length} because:{why}'
    print(message)
    self.hist.append(message)
  def len(self): return self.length
  def history(self):
    for x in self.hist:
      print(x)

features = Features()

In [None]:
class Preprocess_:
  @staticmethod
  def fit(X): pass
  @staticmethod
  def transform(X):
    remove = ['sig_id']  
    for col in X.columns:
      if 'cp_type' == col: X['cp_type'] = X['cp_type'].replace({ 'ctl_vehicle': 0, 'trt_cp': 1 })
      if 'cp_dose' == col: X['cp_dose'] = X['cp_dose'].replace({ 'D1': -0.5, 'D2': 0.5 })
      if 'cp_time' == col: X['cp_time'] = (X['cp_time']/24)-2
      if 'g-' in col: X[col] = X[col]/10
      if 'c-' in col: X[col] = X[col]/10
      if col in remove: X = X.drop([col], axis=1)
    return X

In [None]:
USE_PCA = True
ADD_FEATURES = True
REMOVE_INVARIANT = False
USE_IMPORTANT_FEATURES = True

# Preprocess
train_features = Preprocess_.transform(train_features)
test_features = Preprocess_.transform(test_features)
train_targets = Preprocess_.transform(train_targets)
features.update(train_features.shape[1], 'using all features')

In [None]:
train_features = train_features.astype('float32')
train_targets = train_targets.astype('float32')
test_features = test_features.astype('float32')

# Create Models

In [None]:
def CNNModel(layers, lr=3e-4, inputs=786, outputs=206, lookahead=False, initial_dropout=0):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(inputs))
    model.add(tf.keras.layers.Reshape(best_split(inputs)))
    # model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(initial_dropout))
    
    for layer in layers:
        model.add(tf.keras.layers.Conv2D(layer[0], layer[1], padding='same', activation='relu', input_shape=(5, 157, 1)))
        model.add(tf.keras.layers.MaxPooling2D((2, 2), padding='same'))
        # model.add(tf.keras.layers.BatchNormalization()),
        if layer[2] > 0:
            model.add(tf.keras.layers.Dropout(layer[2])),
        
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(outputs, activation='sigmoid'))
    
    optim = tf.keras.optimizers.Adam(learning_rate=lr)
    if lookahead:
        optim = tfa.optimizers.Lookahead(tf.optimizers.Adam(learning_rate=lr), sync_period=6)
    
    model.compile(
        optimizer=optim,
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=['AUC', 'binary_crossentropy', tf.keras.metrics.FalseNegatives()],
    )
    return model

def best_split(number):
    f = []
    for i in range(1, math.ceil(number/2)):
        if number % i == 0:
            f.append(i)
            f.append(int(number/i))
    factors = sorted(list(dict.fromkeys(f)))

    if len(factors) == 0:
        return (0, 0, 1)
    if len(factors) % 2 == 0:
        middle = int(len(factors)/2)
        return (factors[middle - 1], factors[middle], 1)
    else:
        middle = int((len(factors)-1)/2)
        return (factors[middle], factors[middle], 1)


# Create Engine

In [None]:
USE_ELI5 = False

if USE_ELI5:
  # eli5
  Utils.install('eli5')
  import eli5
  from eli5.permutation_importance import get_score_importances

class Eli5:

  def __init__(self, shape, seeds, batch):
    self.importance = np.zeros(shape)
    self.batch = batch
    self.seeds = seeds

  def score_fn(self):
    def score(X, y):
      return Metrics.log_loss(y, self.model.predict(X, batch_size=self.batch))
    return score

  def get_importance(self, model, X, y, n_iter=2, random_state=0):
    # Called for each fold of a seed
    if not USE_ELI5:
      return
    self.model = model
    start_feature_importance = time.time()
    print(f'Starting to get importance')
    base_score, local_importance = get_score_importances(
        score_func=self.score_fn(), 
        X=X, y=y, n_iter=n_iter, random_state=random_state)
    time_feature_importance = (time.time() - start_feature_importance)/60
    print(f'Calculated importance in {time_feature_importance:.1f}m')
    self.importance += np.mean(local_importance, axis=0)

  def save(self, model_name, columns):
    if not USE_ELI5:
      return
    self.importance = self.importance / (self.seeds)
    importance_df = pd.DataFrame([self.importance], columns=columns).transpose().sort_values(by=0)
    print(importance_df)

    top_feats = np.argwhere(self.importance < 0).flatten()
    with open(f'{model_name}_top_features.json', 'w') as outfile:
      json.dump(top_feats.tolist(), outfile)


In [None]:
class LearningRates:
  @staticmethod
  def ExponentialDecay(init=1e-3, decay_steps=10, decay_rate=0.5, staircase=False):
    return tf.keras.optimizers.schedules.ExponentialDecay(
      init, decay_steps, decay_rate, staircase=staircase, name='exp_decay'
    )

  @staticmethod
  def InverseTimeDecay(init=1e-3, decay_steps=10, decay_rate=0.5, staircase=False):
    return tf.keras.optimizers.schedules.InverseTimeDecay(
      init, decay_steps, decay_rate, staircase=staircase, name='inv_time_decay'
    )

  @staticmethod
  def PiecewiseConstantDecay(boundaries=[20,40,60,80], values=[1e-3, 1e-4, 1e-5, 1e-6, 1e-7]):
    # lr = 1.0 for first 100001 steps, 0.5 for next 10000 steps, and 0.1 for additional steps
    # boundaries = [100000, 110000]; values = [1.0, 0.5, 0.1]
    return tf.keras.optimizers.schedules.PiecewiseConstantDecay(
      boundaries, values, name='piece_const_decay'
    )

  @staticmethod
  def PolynomialDecay(init=1e-3, decay_steps=100, end_lr=0.0001, power=0.5, cycle=False):
    return tf.keras.optimizers.schedules.PolynomialDecay(
      init, decay_steps, end_learning_rate=end_lr, power=power, cycle=cycle, name='poly_decay'
    )

  @staticmethod
  def CosineDecay(init=1e-3, decay_steps=100, alpha=0):
    # https://arxiv.org/abs/1608.03983
    return tf.keras.experimental.CosineDecay(
      init, decay_steps, alpha=alpha, name='cos_decay'
    )

  @staticmethod
  def CosineDecayRestarts(init=1e-3, decay_steps=20, t_mul=2.0, m_mul=1.0, alpha=0.0):
    return tf.keras.experimental.CosineDecayRestarts(
      init, decay_steps, t_mul=t_mul, m_mul=m_mul, alpha=alpha, name='cos_decay_restarts'
    )

  @staticmethod
  def LinearCosineDecay(init=1e-3, decay_steps=100, periods=3, alpha=0.0, beta=0.001):
    return tf.keras.experimental.LinearCosineDecay(
      init, decay_steps, num_periods=periods, alpha=alpha, beta=beta, name='linear_cos_decay'
    )

  @staticmethod
  def NoisyLinearCosineDecay(init=1e-3, decay_steps=100, init_var=0.4, var_decay=0.6, periods=3, alpha=0.0, beta=0.001):
    return tf.keras.experimental.NoisyLinearCosineDecay(
      init, decay_steps, initial_variance=init_var, variance_decay=var_decay,
      num_periods=periods, alpha=alpha, beta=beta, name='noisy_linear_cos_decay'
    )

class LearningRateCallbacks:

  @staticmethod
  def ReduceOnPlateau(metrics, factor=0.25, patience=3, verbose=0, mode='auto', min_delta=0.0001, cooldown=1, min_lr=0):
    return tf.keras.callbacks.ReduceLROnPlateau(
      monitor=metrics, factor=factor, patience=patience, verbose=verbose, mode=mode,
      min_delta=min_delta, cooldown=cooldown, min_lr=min_lr,
    )

  @staticmethod
  def Scheduler(schedule, verbose=0):
    # schedule takes epoch(int), lr(float) as inputs, returns lr
    return tf.keras.callbacks.LearningRateScheduler(
      schedule, verbose=verbose
    )

  @staticmethod
  def RampUpExponentialDecay(init=0.001, max=0.005, min=0.000001, ramp=5, sustain=2, decay=0.7, verbose=0, return_func=False):
    def lr_function(epoch, lr=0):
      if epoch < ramp:
          lr = (max - init) / ramp * epoch + init
      elif epoch < ramp + sustain:
          lr = max
      else:
          lr = (max - min) * decay**(epoch - ramp - sustain) + min
      return lr
    if return_func:
      return lr_function
    else:
      return LearningRateCallbacks.Scheduler(lr_function, verbose=verbose)

def plot_all():
  epochs = [i for i in range(100)]
  plt.figure(figsize=(20,15))
  rows = 4
  cols = 3
  count = 1
  plt.subplot(rows, cols, count)
  plt.plot(epochs, [LearningRates.ExponentialDecay().__call__(i) for i in epochs])
  plt.xlabel('ExponentialDecay'); count += 1
  plt.subplot(rows, cols, count)
  plt.plot(epochs, [LearningRates.ExponentialDecay(staircase=True).__call__(i) for i in epochs])
  plt.xlabel('ExponentialDecay[Staircase]'); count += 1
  plt.subplot(rows, cols, count)
  plt.plot(epochs, [LearningRates.InverseTimeDecay().__call__(i) for i in epochs])
  plt.xlabel('InverseTimeDecay'); count += 1
  plt.subplot(rows, cols, count)
  plt.plot(epochs, [LearningRates.InverseTimeDecay(staircase=True).__call__(i) for i in epochs])
  plt.xlabel('InverseTimeDecay[Staircase]'); count += 1
  plt.subplot(rows, cols, count)
  plt.plot(epochs, [LearningRates.PiecewiseConstantDecay().__call__(i) for i in epochs])
  plt.xlabel('PiecewiseConstantDecay'); count += 1
  plt.subplot(rows, cols, count)
  plt.plot(epochs, [LearningRates.PolynomialDecay().__call__(i) for i in epochs])
  plt.xlabel('PolynomialDecay'); count += 1
  plt.subplot(rows, cols, count)
  plt.plot(epochs, [LearningRates.CosineDecay().__call__(i) for i in epochs])
  plt.xlabel('CosineDecay'); count += 1
  plt.subplot(rows, cols, count)
  plt.plot(epochs, [LearningRates.CosineDecayRestarts().__call__(i) for i in epochs])
  plt.xlabel('CosineDecayRestarts'); count += 1
  plt.subplot(rows, cols, count)
  plt.plot(epochs, [LearningRates.LinearCosineDecay().__call__(i) for i in epochs])
  plt.xlabel('LinearCosineDecay'); count += 1
  plt.subplot(rows, cols, count)
  plt.plot(epochs, [LearningRates.NoisyLinearCosineDecay().__call__(i) for i in epochs])
  plt.xlabel('NoisyLinearCosineDecay'); count += 1
  plt.subplot(rows, cols, count)
  plt.plot(epochs, [LearningRateCallbacks.RampUpExponentialDecay(return_func=True).__call__(i) for i in epochs])
  plt.xlabel('RampUpExponentialDecay')
  plt.show()

plot_all()

In [None]:
class Engine:
    def __init__(self, X, y, X_test, submission):
        self.X = X
        self.y = y
        self.X_test = X_test
        self.submission = submission
        
    def train(self, name, model_details, folds=10, epochs=200, batch_size=1024, seeds=[0,1,2], verbose=False, early_stopping=6):
        self.folds = folds
        self.epochs = epochs
        self.batch = batch_size

        started = time.time()
        plt.figure(figsize=(21, 7))
        verboseInt = 1 if verbose else 0
        
        result = self.y.copy()
        result.loc[:, self.y.columns] = 0
        submit = self.submission.copy()
        submit.loc[:, self.y.columns] = 0

        loadModel(model_details, summary=True)

        losses = []
        print(f'Model:{name}')
        for scount, seed in enumerate(seeds):
            kf = MultilabelStratifiedKFold(n_splits=self.folds, random_state=seed, shuffle=True)
            for fold, (train_idx, val_idx) in enumerate(kf.split(self.X, self.y)):

                model = loadModel(model_details)
                eli = Eli5(self.X.shape[1], len(seeds), self.batch)

                callbacks = [
                  tf.keras.callbacks.EarlyStopping(monitor='val_binary_crossentropy', patience=early_stopping, verbose=verboseInt, mode='min', restore_best_weights=True,),
                  # tf.keras.callbacks.ModelCheckpoint(filepath=f'{name}_{fold}.h5', monitor='val_binary_crossentropy', verbose=verboseInt, save_best_only=True, save_weights_only=False, mode='min',),
                ]
                if 'lr_callback' in model_details: callbacks = callbacks + [model_details['lr_callback']]

                history = model.fit(
                    x=self.X.iloc[train_idx,:].values,
                    y=self.y.iloc[train_idx].values,
                    validation_data=(self.X.iloc[val_idx,:].values, self.y.iloc[val_idx].values),
                    batch_size=self.batch, epochs=self.epochs, verbose=verboseInt,
                    callbacks=callbacks,
                )
                plt.subplot(1, len(seeds)+2, scount+1) # an extra one for learning_rate
                plt.plot(history.epoch, history.history['val_binary_crossentropy'], color='blue')
                plt.plot(history.epoch, history.history['binary_crossentropy'], linestyle='--', color='green')
                plt.xlabel('Epochs')
                plt.ylabel('BCE')

                y_pred = model.predict(
                    self.X.iloc[val_idx,:].values, 
                    batch_size=self.batch
                )

                loss = Metrics.log_loss(self.y.iloc[val_idx].values, y_pred)
                losses.append(loss)

                result.loc[val_idx, self.y.columns] += y_pred
                submit.loc[:, self.y.columns] += model.predict(self.X_test.values, batch_size=self.batch)

                time_taken = time.time() - started
                print(f'Model:{name};Fold:{fold}_{scount};LL:{loss:.8f};CV:{np.array(losses).mean():.8f};Time:{time_taken:.0f}s')

                eli.get_importance(model, self.X.iloc[val_idx,:].values, self.y.iloc[val_idx].values)

            temp_result = result.copy()
            temp_result.loc[:, self.y.columns] /= (scount+1)
            cv = Metrics.log_loss(self.y.values, temp_result.values)
            temp_result.loc[train_cptypes == 'ctl_vehicle', self.y.columns] = 0
            ll = Metrics.log_loss(self.y.values, temp_result.values)
            print(f'Model:{name};LLwC:{ll:.8f};LL:{cv:.8f}\n')

        lr_data = history.history['lr'] if 'lr' in history.history else [model_details['params']['lr'].__call__(i).numpy() for i in history.epoch]
        plt.subplot(1, len(seeds)+2, scount+2) # an extra one for learning_rate
        plt.plot(history.epoch, lr_data, color='red')
        plt.xlabel('Epochs')
        plt.ylabel('Learning Rate')
        plt.show()

        eli.save(name, self.X.columns)

        submit.loc[:, self.y.columns] /= (self.folds * len(seeds))
        submit.loc[test_cptypes == 'ctl_vehicle', self.y.columns] = 0
        submit.to_csv(f'{name}_submission.csv', index=False)

        result.loc[:, self.y.columns] /= len(seeds)
        result.loc[train_cptypes == 'ctl_vehicle', self.y.columns] = 0
        result.to_csv(f'{name}_result.csv', index=False)

        time_taken = (time.time() - started)/60
        report = self.report(result.values, np.array(losses))
        report['y_pred'] = result
        report['time'] = time_taken
        print(f'Model:{name};CV:{report["cv_mean"]:.8f};CVStd:{report["cv_std"]:.8f};LLwC:{report["log_loss"]:.8f};F1W:{report["f1_weighted"]:.8f};BF1W:{report["best_f1_weighted"]:.8f};BF1T:{report["best_threshold"]:.6f};Time:{time_taken:.1f}m\n')
        return report
    
    def report(self, y_pred, losses):
        y_true = self.y.values
        return {
            'log_loss': Metrics.log_loss(y_true, y_pred),
            'f1_weighted': Metrics.f1_weighted(y_true, y_pred),
            'best_f1_weighted': 0,
            'best_threshold': 0,
            'cv_mean': losses.mean(),
            'cv_std': losses.std(),
            'losses': losses,
            'classification': Metrics.classification_report(y_true, y_pred, self.y.columns),
        }

    def show_weights_for_dense_layers(self, name):
        for fold in range(self.folds):
            filename = f'{name}_{fold}.h5'
            print(filename)
            model = tf.keras.models.load_model(filename)
            for layer in model.layers:
                config = layer.get_config()
                if 'dense' in config['name']:
                    plt.plot(sorted(layer.get_weights()[1]))
                    plt.xlabel(config['name'] + ' units ' + str(config['units']))
                    plt.show()

engine = Engine(
    X=train_features,
    y=train_targets,
    X_test=test_features,
    submission=submission,
)

In [None]:
class Ensemble:
    def __init__(self, df, y_true, splits=100):
        self.df = df
        self.y_true = y_true.values
        self.splits = splits

    def set_splits(self, splits):
      self.splits = splits
        
    def improve(self):
        name = self.df.iloc[0]['model']
        ens_result = self.read_result(name)
        ens_submit = self.read_submission(name)
        ens_loss = Metrics.log_loss(self.y_true, ens_result)
        orig_loss = ens_loss.copy()
        print(f'Starting loss with {name}: {ens_loss:.8f}')
        self.remove_row(0)
        
        while(self.df.shape[0] > 0):
            ## Find the best match
            best_id, best_name, best_weight, best_loss = self.find_best(ens_loss, ens_result)
            
            if best_id == None:
                break

            ## Calculate result ensemble
            result = self.read_result(best_name)
            ens_result = ((1 - best_weight) * ens_result) + (best_weight * result)
            ens_loss = Metrics.log_loss(self.y_true, ens_result)
            improvement_temp = (1 - (ens_loss / orig_loss))*100
            print(f'Ensemble loss updated to {ens_loss:.8f}, about {improvement_temp:.2f}%')

            ## Calculate submission ensemble
            submit = self.read_submission(best_name)
            ens_submit = ((1 - best_weight) * ens_submit) + (best_weight * submit)

            ## Remove best row
            self.remove_row(best_id)

        improvement = (1 - (ens_loss / orig_loss))*100
        print(f'Final loss: {ens_loss:.8f}, about {improvement:.2f}%')
        return ens_submit, ens_result
            
    def find_best(self, ens_loss, ens_result):
        best_id = None
        best_name = ''
        best_weight = 0
        best_loss = ens_loss
        
        for count in range(0, self.df.shape[0]):
            name = self.df.iloc[count]['model']
            loss = self.df.iloc[count]['log_loss']
            result = self.read_result(name)
            print(f'Checking {name} with loss {loss:.8f}', end='')
    
            loop_best_name = name
            loop_best_weight = 0
            loop_best_loss = best_loss
            
            for w in np.linspace(0, 1, num=self.splits+1):
                temp_result = ((1 - w) * ens_result) + (w * result)
                temp_loss = Metrics.log_loss(self.y_true, temp_result)
                if temp_loss < loop_best_loss:
                    loop_best_weight = w
                    loop_best_loss = temp_loss
                    print('+', end='')
                else:
                    print('.', end='')
            
            if loop_best_loss < best_loss:
                best_id = count
                best_name = loop_best_name
                best_weight = loop_best_weight
                best_loss = loop_best_loss
                print(f'{best_loss:.8f} ({best_weight:.2f})', end='')
            print('')
            
        print(f'Best Match;Idx:{best_id};Model:{best_name};Wt:{best_weight};Loss:{best_loss:.8f}\n')
        return best_id, best_name, best_weight, best_loss
    
    def read_result(self, name):
        return pd.read_csv(f'{name}_result.csv').values
    
    def read_submission(self, name):
        return pd.read_csv(f'{name}_submission.csv').drop(['sig_id'], axis=1).values
    
    def remove_row(self, idx):
        self.df = self.df.drop([idx]).reset_index(drop=True)

# Train and Evaluate

In [None]:
lr = 1e-3

MODELS = {
    # CNN
    'cnn_32_rlr': { 'type': 'CNN', 'params': {
       'layers': [
            (32, (3,3), 0.25),
        ],
        'lr': lr,
    },  'lr_callback': LearningRateCallbacks.ReduceOnPlateau('val_binary_crossentropy') },
    'cnn_16': { 'type': 'CNN', 'params': {
       'layers': [
            (16, (3,3), 0.25),
        ],
        'lr': LearningRates.CosineDecayRestarts(),
    }},
    'cnn_32_i': { 'type': 'CNN', 'params': {
       'layers': [
            (32, (3,3), 0.25),
        ],
        'lr': LearningRates.CosineDecayRestarts(), 'initial_dropout': 0.25,
    }},
    'cnn_32_16': { 'type': 'CNN', 'params': {
       'layers': [
            (32, (3,3), 0.25),
            (16, (3,3), 0.25),
        ],
        'lr': LearningRates.CosineDecayRestarts(),
    }},
}

def loadModel(detail, save=False, summary=False):
  with strategy.scope():
    detail['params']['inputs'] = features.len()
    detail['params']['outputs'] = train_targets.shape[1]
    model_type = detail['type']
    model = None
    if model_type == 'CNN': model = CNNModel(**detail['params'])
    if (summary == True) and ('summary' in dir(model)): model.summary()
    if (save == True) and ('save' in dir(model)): model.save(f'{m}_base.h5')
    return model

In [None]:
VERBOSE = False
FOLDS = 5
EPOCHS = 100
BATCH = 128
SEEDS = [3,4,5,6] #3,4,5,6 / 4,5
ENSEMBLE = True
ENSEMBLE_SPLITS = 100
EARLY_STOPPING = 6

TRAIN = True
USE_OPTUNA = not TRAIN

features.history()

def train():
  results = pd.DataFrame({ 'model': [], 'log_loss': [], 'time': [], 'f1_weighted': [], 'best_f1_weighted': [], 'best_threshold': [], 'cv_mean': [], 'cv_std': [], 'losses': [], 'classification': [], 'y_pred': [] })
  # Test details for folds - https://docs.google.com/spreadsheets/d/1gGsfor70pD9vZDUHn504cAZP_jGlAeT7X95m1XVPMdY/edit#gid=0
  for m in MODELS:
    detail = MODELS[m]
    epochs = EPOCHS if 'epochs' not in detail else detail['epochs']
    batch = BATCH if 'batch' not in detail else detail['batch']
    folds = FOLDS if 'folds' not in detail else detail['folds']
    seeds = SEEDS if 'seeds' not in detail else detail['seeds']
    early_stopping = EARLY_STOPPING if 'early_stopping' not in detail else detail['early_stopping']
    report = engine.train(
        name=m,
        model_details=detail,
        folds=folds,
        epochs=epochs,
        batch_size=batch * strategy.num_replicas_in_sync,
        verbose=VERBOSE,
        seeds=seeds,
        early_stopping=early_stopping,
    )
    results = results.append(pd.DataFrame(
        [[m, report["log_loss"], report["time"], report["f1_weighted"], report["best_f1_weighted"], report["best_threshold"], report["cv_mean"], report["cv_std"], report["losses"], report["classification"], report['y_pred']]],
        columns=['model', 'log_loss', 'time', 'f1_weighted', 'best_f1_weighted', 'best_threshold', 'cv_mean', 'cv_std', 'losses', 'classification', 'y_pred']
    ), ignore_index=True)
#     display(Metrics.display_column_log_loss(train_targets, report['y_pred'].values).iloc[:25,:])
#     display(Metrics.display_f1(report["classification"]))
    gc.collect()
  
  return pd.DataFrame(results).sort_values(by=['log_loss']).reset_index(drop=True)

if TRAIN:
  results = train()

In [None]:
results.drop(['losses', 'classification', 'y_pred'], axis=1)

# Post training

In [None]:
ensemble = Ensemble(
    results,
    train_targets,
    splits=ENSEMBLE_SPLITS,
)

def get_ensemble():
    print('Getting ensemble')
    start = time.time()
    ensemble_submission, ensemble_result = ensemble.improve()
    print(f'Time: {time.time() - start}')    
    submitted = pd.DataFrame(ensemble_submission, columns=train_targets.columns)
    submitted.insert(0, column='sig_id', value=submission['sig_id'])
    submitted.loc[test_cptypes == 'ctl_vehicle', train_targets.columns] = 0
    submitted.to_csv('submission.csv', index=False)
    return submitted, ensemble_result

def get_best(best_model):
    print('Getting best model')
    submitted = pd.read_csv(f'{best_model}_submission.csv')
    submitted.to_csv('submission.csv', index=False)
    return submitted, None

best_model = results.iloc[0]['model']
submitted, result = get_ensemble() if ENSEMBLE else get_best(best_model)

In [None]:
display(Metrics.display_column_log_loss(train_targets, result))

In [None]:
display(Metrics.display_f1(Metrics.classification_report(train_targets.values, result, train_targets.columns)))

In [None]:
submitted.head()