In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers

import matplotlib.pyplot as plt

from sklearn.metrics import log_loss
from sklearn.multioutput import MultiOutputClassifier
from sklearn.model_selection import KFold
from sklearn.decomposition import PCA
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform

train_features = pd.read_csv('/kaggle/input/lish-moa/train_features.csv')
train_targets = pd.read_csv('/kaggle/input/lish-moa/train_targets_scored.csv')

In [None]:
train_features_enc = pd.get_dummies(train_features, columns=['cp_type', 'cp_dose'], drop_first=True)

X = train_features_enc.iloc[:,1:].to_numpy()
y = train_targets.iloc[:,1:].to_numpy() 

In [None]:
def l1_model(input_shape, no_classes, lr):
    inputs = tf.keras.Input(shape=input_shape)
    x = layers.Dense(128, activation='sigmoid')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(no_classes, activation='sigmoid')(x)
    model = tf.keras.Model(inputs, outputs)
    model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate = lr), metrics=['binary_crossentropy'])
    return model

def l3_model(input_shape, no_classes, lr):
    inputs = tf.keras.Input(shape=input_shape)
    x = layers.Dense(128, activation='sigmoid')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(128, activation='sigmoid')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(128, activation='sigmoid')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(no_classes, activation='sigmoid')(x)
    model = tf.keras.Model(inputs, outputs)
    model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate = lr), metrics=['binary_crossentropy'])
    return model

def l3_res_model(input_shape, no_classes, lr):
    inputs = tf.keras.Input(shape=input_shape)
    x = layers.Dense(128, activation='sigmoid')(inputs)
    x = layers.BatchNormalization()(x)
    b_1 = layers.Dropout(0.2)(x)
    x = layers.Dense(128, activation='sigmoid')(b_1)
    x = layers.BatchNormalization()(x)
    b_2 = layers.Dropout(0.2)(x)
    x = layers.Dense(128, activation='sigmoid')(b_2)
    x = layers.BatchNormalization()(x)
    b_3 = layers.Dropout(0.2)(x)
    tot_op = tf.keras.layers.add([b_1, b_2, b_3])
    outputs = layers.Dense(no_classes, activation='sigmoid')(tot_op)
    model = tf.keras.Model(inputs, outputs)
    model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate = lr), metrics=['binary_crossentropy'])
    return model

In [None]:
nnclf_1 = l1_model((875,),206,0.0005)
nnclf_2 = l3_model((875,),206,0.0005)
nnclf_3 = l3_res_model((875,),206,0.0005)

control_vehicle_mask = X[:,-2] == 0
X = X[~control_vehicle_mask,:]
y = y[~control_vehicle_mask]

nnclf_1.fit(X, y, batch_size=512, epochs=50, validation_data=(X, y), verbose=0)
nnclf_2.fit(X, y, batch_size=512, epochs=50, validation_data=(X, y), verbose=0)
nnclf_3.fit(X, y, batch_size=512, epochs=50, validation_data=(X, y), verbose=0)

preds_1 = nnclf_1.predict(X) 
preds_2 = nnclf_2.predict(X) 
preds_3 = nnclf_3.predict(X) 

In [None]:
avg_preds = np.mean(np.array([preds_1, preds_2, preds_3]), axis=0)
argmax_preds = np.argmax(np.array([preds_1, preds_2, preds_3]), axis=0)

In [None]:
weighted_preds = (preds_1 * 1 + preds_2 * 2 + preds_3 * 10)/13

In [None]:
loss = log_loss(np.ravel(y), np.ravel(weighted_preds))
print('Loss: '+str(loss))

In [None]:
loss = log_loss(np.ravel(y), np.ravel(avg_preds))
print('Loss: '+str(loss))

In [None]:
# loss = log_loss(np.ravel(y), np.ravel(max_preds))
# print('Loss: '+str(loss))

In [None]:
test_features = pd.read_csv('/kaggle/input/lish-moa/test_features.csv')
test_features_enc = pd.get_dummies(test_features, columns=['cp_type', 'cp_dose'], drop_first=True)
X_test = test_features_enc.iloc[:,1:].to_numpy()

In [None]:
test_predictions_1 = nnclf_1.predict(X_test) 
test_predictions_2 = nnclf_2.predict(X_test) 
test_predictions_3 = nnclf_3.predict(X_test) 

avg_test_preds = np.mean(np.array([test_predictions_1, test_predictions_2, test_predictions_3]), axis=0)
argmax_test_preds = np.argmax(np.array([test_predictions_1, test_predictions_2, test_predictions_3]), axis=0)
weighted_test_preds = (test_predictions_1 * 1 + test_predictions_2 * 2 + test_predictions_3 * 10)/13

In [None]:
submission_pd = pd.DataFrame(columns = train_targets.columns.values)
submission_pd['sig_id'] = test_features['sig_id']

# control_mask = test_features['cp_type'] == 'ctl_vehicle'
# submission_pd[control_mask] = 0

# submission_pd.iloc[:,1:] = test_predictions
submission_pd.iloc[:,1:] = weighted_test_preds
submission_pd.fillna(0)

In [None]:
submission_pd.to_csv("submission.csv", index = False, float_format = '%.20f')