In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt

In [None]:
from sklearn.model_selection import train_test_split

import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
def seed_everything(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
seed_everything(seed=42)

In [None]:
train_features = pd.read_csv('/kaggle/input/lish-moa/train_features.csv')
test_features = pd.read_csv('/kaggle/input/lish-moa/test_features.csv')
train_targets_scored = pd.read_csv('/kaggle/input/lish-moa/train_targets_scored.csv')
train_targets_nonscored = pd.read_csv('/kaggle/input/lish-moa/train_targets_nonscored.csv')
sample_submission = pd.read_csv('/kaggle/input/lish-moa/sample_submission.csv')

In [None]:
print('Train shape:',train_features.shape)
print('Test shape:',test_features.shape)

In [None]:
# prepare train_features
new_train_features = train_features.copy()
new_train_features

In [None]:
# change cp_dose: D1 -> 0, D2 -> 1
new_train_features['cp_dose'] = new_train_features['cp_dose'].map({'D1':0, 'D2':1})
# change cp_time: 24 -> 0, 48 -> 1, 72 -> 2
new_train_features['cp_time'] = new_train_features['cp_time']//24-1
# drop the cp_type and sig_id column
new_train_features.drop(columns = ['sig_id','cp_type'], inplace = True)
new_train_features

In [None]:
new_train_targets_scored = train_targets_scored.copy()
# drop the sig_id column
new_train_targets_scored.drop(columns = ['sig_id'], inplace = True)
new_train_targets_scored

In [None]:
X = new_train_features
y = new_train_targets_scored
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
# Stop training when the validation loss metric has stopped decreasing for 5 epochs.
early_stopping = EarlyStopping(monitor = 'val_loss',
                               patience = 3,
                               mode = 'min',
                               restore_best_weights = True)

In [None]:
def create_model(num_columns):
    model = tf.keras.Sequential([
        tf.keras.layers.Input(num_columns),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.2),
        tfa.layers.WeightNormalization(tf.keras.layers.Dense(2048, activation="relu")),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.5),
        tfa.layers.WeightNormalization(tf.keras.layers.Dense(1024, activation="relu")),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.5),
        tfa.layers.WeightNormalization(tf.keras.layers.Dense(512, activation="relu")),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.5),
        tfa.layers.WeightNormalization(tf.keras.layers.Dense(206, activation="sigmoid"))
    ])
    model.compile(optimizer=tfa.optimizers.Lookahead(tf.optimizers.Adam(), sync_period=10),
                  loss='binary_crossentropy')
    return model

In [None]:
model = create_model(X_train.shape[1])
history = model.fit(x = X_train, 
                    y = y_train, 
                    validation_data = (X_test, y_test), 
                    epochs = 35, 
                    verbose = 1, 
                    callbacks = [early_stopping])

In [None]:
model.summary()

In [None]:
# plotting the losses of training and validation

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(loss))

plt.figure()

plt.plot(epochs, loss, 'c-', label='Training Loss')
plt.plot(epochs, val_loss, 'y-', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:
# prepare test_features
new_test_features = test_features.copy()
# change cp_dose: D1 -> 0, D2 -> 1
new_test_features['cp_dose'] = new_test_features['cp_dose'].map({'D1':0, 'D2':1})
# change cp_time: 24 -> 0, 48 -> 1, 72 -> 2
new_test_features['cp_time'] = new_test_features['cp_time']//24-1
# drop the cp_type and sig_id column
new_test_features.drop(columns = ['sig_id','cp_type'], inplace = True)
new_test_features

In [None]:
# predict values for test_features
test_predict = model.predict(new_test_features)
test_predict

In [None]:
sample_submission.head()

In [None]:
sub = sample_submission.copy()
sig_ids = sub.sig_id
sub.drop(columns = ['sig_id'],inplace = True)
# add predicted values to sub
sub[:] = test_predict
# add the sig_id column back
sub.insert(0, "sig_id", sig_ids, True)
sub

In [None]:
# write sub to submission.csv file
sub.to_csv('submission.csv', index = False)