In [1]:
%%writefile inference.py
import sys
sys.path.append('../input/iterative-stratification/iterative-stratification-master')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

Writing inference.py


In [2]:
%%writefile -a inference.py

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

import tensorflow as tf
import tensorflow_addons as tfa

from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Activation, Dropout, BatchNormalization
from tensorflow_addons.layers import WeightNormalization
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping


from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.metrics import log_loss

print(keras.__version__)
print(tf.__version__)

Appending to inference.py


# Load data

In [3]:
%%writefile -a inference.py

train_features = pd.read_csv('../input/lish-moa/train_features.csv')
train_targets = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
test_features = pd.read_csv('../input/lish-moa/test_features.csv')

Appending to inference.py


# Preprocess data

In [4]:
%%writefile -a inference.py

def preprocess_features(df):
    df = df.copy()
    
    df['cp_dose'] = df['cp_dose'].map({'D1': 0, 'D2': 1})
    df['cp_time'] = df['cp_time'].map({24: 0, 48: 1, 72: 2})
    df.drop(columns=['sig_id', 'cp_type'], inplace=True)
    return df
    
train_features = preprocess_features(train_features)
test_features = preprocess_features(test_features)
train_targets.drop(columns=['sig_id'], inplace=True)

Appending to inference.py



# Model


In [5]:
%%writefile -a inference.py

def create_training_graphs(training_history):
    plt.figure(figsize=(16, 6))

    plt.subplot(1, 2, 1)
    plt.plot(training_history.history['loss'])
    plt.plot(training_history.history['val_loss'])
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(['Training', 'Validation'])
    plt.title('Loss')

    plt.subplot(1, 2, 2)
    plt.plot(training_history.history['accuracy'])
    plt.plot(training_history.history['val_accuracy'])
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(['Training', 'Validation'])
    plt.title('Accuracy')

Appending to inference.py


In [6]:
%%writefile -a inference.py

def create_model(input_size):
    model = keras.Sequential([
        Input(input_size),
        BatchNormalization(),
        Dropout(0.2),
        WeightNormalization(Dense(2048, activation="relu")),
        BatchNormalization(),
        Dropout(0.5),
        WeightNormalization(Dense(1024, activation="relu")),
        BatchNormalization(),
        Dropout(0.5),
        WeightNormalization(Dense(206, activation="sigmoid"))
    ])
    
    #optimizer=tfa.optimizers.Lookahead(tf.optimizers.Adam(), sync_period=10)
    optimizer = tfa.optimizers.AdamW(lr = 1e-3, weight_decay = 1e-5, clipvalue = 756)
    model.compile(loss=BinaryCrossentropy(label_smoothing=1e-15), optimizer=optimizer)
    return model

Appending to inference.py


# Train

In [7]:
%%writefile -a inference.py

def log_loss_metric(y_true, y_pred):
    metrics = []
    for _target in train_targets.columns:
        metrics.append(log_loss(y_true.loc[:, _target], y_pred.loc[:, _target].astype(float), labels = [0,1]))
    return np.mean(metrics)

Appending to inference.py


In [8]:
%%writefile -a inference.py

early_stopping = EarlyStopping(monitor="val_loss", min_delta=0, patience=10, verbose=1, mode="auto", baseline=None, restore_best_weights=True)
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, epsilon=1e-4, mode='min')

Appending to inference.py


In [9]:
%%writefile -a inference.py

histories = []

target_columns = train_targets.columns

val_predictions = train_targets.copy()
val_predictions.loc[:, target_columns] = 0

test_predictions = pd.read_csv('../input/lish-moa/sample_submission.csv')
test_predictions.loc[:, target_columns] = 0

SEEDS = 3
SPLITS = 5
MAX_EPOCHS = 500
BATCH_SIZE = 64

progress_bar = tqdm(range(SEEDS))
for seed in progress_bar:
    mskf = MultilabelStratifiedKFold(n_splits=SPLITS, random_state=seed, shuffle=True)
    for fold_idx, (train_idx, val_idx) in enumerate(mskf.split(X=train_features, y=train_targets)):
        X_train, X_val = train_features.loc[train_idx], train_features.loc[val_idx]
        y_train, y_val = train_targets.loc[train_idx], train_targets.loc[val_idx]

        model = create_model(len(train_features.columns))
        history = model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=MAX_EPOCHS, validation_data=(X_val, y_val), callbacks=[early_stopping, reduce_lr_loss])
        histories.append(history)

        val_predictions.loc[val_idx, train_targets.columns] +=  model.predict(X_val) / SEEDS
        test_predictions.loc[:, target_columns] += model.predict(test_features) / (SPLITS * SEEDS)
        
    #loss = log_loss(train_targets.loc[:, train_targets.columns], val_predictions.loc[:, train_targets.columns])
    #progress_bar.set_description(f"Seed: {seed} | loss: {loss}")

Appending to inference.py


In [10]:
%%writefile -a inference.py

train_features1 = pd.read_csv('../input/lish-moa/train_features.csv')
test_features1 = pd.read_csv('../input/lish-moa/test_features.csv')


print(f'NN OOF before postprocessing: {log_loss_metric(train_targets, val_predictions):.6f}')
val_predictions.loc[train_features1['cp_type'] == 'ctl_vehicle', train_targets.columns] = 0
test_predictions.loc[test_features1['cp_type'] == 'ctl_vehicle', train_targets.columns] = 0
print(f'NN OOF after postprocessing: {log_loss_metric(train_targets, val_predictions):.6f}')

Appending to inference.py


# Create submission

In [11]:
%%writefile -a inference.py
val_predictions.to_csv('val-submission.csv', index=False)
test_predictions.to_csv('submission.csv', index=False)

Appending to inference.py


In [12]:
! python inference.py

2020-11-28 23:18:36.037951: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.2
 The versions of TensorFlow you are currently using is 2.3.1 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons
2.4.0
2.3.1
2020-11-28 23:18:49.427801: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2020-11-28 23:18:49.781377: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-11-28 23:18:49.782152: I 