# Load Data

In [None]:
import numpy as np
import pandas as pd

seed = 72
np.random.seed(seed)

In [None]:
train_features = pd.read_csv('../input/lish-moa/train_features.csv')
train_targets_scored = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
test_features = pd.read_csv('../input/lish-moa/test_features.csv')

train_features.shape, train_targets_scored.shape

# Feature Engineering and Preprocessing

In [None]:
#drop rows with cp_type = ctl_vehicle

kp_cl = train_features[train_features['cp_type'] != 'ctl_vehicle'].index
train_features = train_features.loc[kp_cl]
train_targets_scored = train_targets_scored.loc[kp_cl]

train_features.drop(['sig_id', 'cp_type'], axis=1, inplace=True)
train_targets_scored.drop('sig_id', axis=1, inplace=True)
test_features.drop(['sig_id'], axis=1, inplace=True)

In [None]:
#three processing functions
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, QuantileTransformer
from sklearn.feature_selection import VarianceThreshold

cols = train_features.columns
cl_g = cols[cols.str.contains('g-')]
cl_c = cols[cols.str.contains('c-')]

def tra_cp_dose(val):
    if val == 'D1':
        return 1.
    elif val == 'D2':
        return 0.

def tra_cp_time(val):
    return val / 24.

def process1():
    train = train_features.copy()
    test = test_features.drop('cp_type', axis=1)
    
    train['cp_dose'] = train['cp_dose'].apply(tra_cp_dose)
    train['cp_time'] = train['cp_time'].apply(tra_cp_time)
    test['cp_dose'] = test['cp_dose'].apply(tra_cp_dose)
    test['cp_time'] = test['cp_time'].apply(tra_cp_time)
    
    return train, test

def process2(n_c=60, n_g=480, seed=seed, scale=True):
    pca_c = PCA(n_components=n_c, random_state=seed)
    pca_g = PCA(n_components=n_g, random_state=seed)
    SS = StandardScaler()
    
    train_pca_g = pd.DataFrame(pca_g.fit_transform(train_features[cl_g]),
                           columns=[f'g-pca{i}' for i in range(n_g)], 
                           index=train_features.index)
    train_pca_c = pd.DataFrame(pca_c.fit_transform(train_features[cl_c]), 
                           columns=[f'c-pca{i}' for i in range(n_c)], 
                           index=train_features.index)
    
    train = pd.concat([train_pca_g, train_pca_c], axis=1)
    train['g-mean'] = np.mean(train_features[cl_g], axis=1)
    train['g-std'] = np.std(train_features[cl_g], axis=1)
    train['c-mean'] = np.mean(train_features[cl_c], axis=1)
    train['c-std'] = np.std(train_features[cl_c], axis=1)
    
    test_pca_g = pd.DataFrame(pca_g.transform(test_features[cl_g]),
                           columns=[f'g-pca{i}' for i in range(n_g)], 
                           index=test_features.index)
    test_pca_c = pd.DataFrame(pca_c.fit_transform(test_features[cl_c]), 
                           columns=[f'c-pca{i}' for i in range(n_c)], 
                           index=test_features.index)
    
    test = pd.concat([test_pca_g, test_pca_c], axis=1)
    test['g-mean'] = np.mean(test_features[cl_g], axis=1)
    test['g-std'] = np.std(test_features[cl_g], axis=1)
    test['c-mean'] = np.mean(test_features[cl_c], axis=1)
    test['c-std'] = np.std(test_features[cl_c], axis=1)
    if scale:
        train = pd.DataFrame(SS.fit_transform(train), columns=train.columns, index=train.index)
        test = pd.DataFrame(SS.transform(test), columns=test.columns, index=test.index)
     
    return train, test

def process3(n_c=80, n_g=660, tr=.8):
    trn1, tst1 = process1()
    trn2 ,tst2 = process2(n_c, n_g, scale=False)
    
    train = pd.concat([trn1, trn2], axis=1)
    test = pd.concat([tst1, tst2], axis=1)
    
    QS = QuantileTransformer()
    VT = VarianceThreshold(tr)
    
    train = pd.DataFrame(QS.fit_transform(VT.fit_transform(train)), index=train.index)
    test = pd.DataFrame(QS.transform(VT.transform(test)), index=test.index)
    
    return train, test
    

In [None]:
#shuffling the data
per = np.random.permutation(train_features.index)
target = train_targets_scored.loc[per]

train1, test1 = process1()
train1 = train1.loc[per]

train1

In [None]:
train2, test2 = process2()
train2 = train2.loc[per]

train2

In [None]:
train3, test3 = process3()
train3 = train3.loc[per]

train3

# Build Model

In [None]:
#import libraries to build model
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.metrics import binary_crossentropy

In [None]:
#define logloss, early stopping, learning rate decay, maximum epoch
p_min = 0.001
p_max = 0.999

def logloss(y_true, y_pred):
    y_pred = tf.clip_by_value(y_pred,p_min,p_max)
    return binary_crossentropy(y_true, y_pred)

max_epoch = 50
lr_decay = ReduceLROnPlateau(monitor='val_logloss', patience=1, verbose=1, min_lr=1e-8)
early_stop = EarlyStopping(monitor='val_logloss', patience=5, verbose=1, restore_best_weights=True)

In [None]:
#build three models
def build_model1(inp_shape1, inp_shape2, tar_shape):
    inp1 = Input((inp_shape1,), name='inp1')
    inp2 = Input((inp_shape2,), name='inp2')
    
    X1 = BatchNormalization()(inp1)
    X1 = Dropout(.2)(X1)
    X1 = Dense(1024)(X1)
    X1 = BatchNormalization()(X1)
    X1 = LeakyReLU()(X1)
    X1 = Dense(512)(X1)
    X1 = BatchNormalization()(X1)
    X1 = Activation('relu')(X1)
    
    X2 = Dropout(.3)(inp2)
    X2 = Dense(1024)(X2)
    X2 = BatchNormalization()(X2)
    X2 = LeakyReLU()(X2)
    X2 = Dense(512)(X2)
    X2 = BatchNormalization()(X2)
    X2 = LeakyReLU()(X2)
    
    inp3 = Concatenate(name='concat')([X1, X2])
    
    X3 = Dropout(.3)(inp3)
    X3 = Dense(2048)(X3)
    X3 = BatchNormalization()(X3)
    X3 = Activation('relu')(X3)
    X3 = Dense(512)(X3)
    X3 = BatchNormalization()(X3)
    X3 = Activation('relu')(X3)
    X3 = Dropout(.4)(X3)
    X3 = Dense(inp_shape1)(X3)
    X3 = BatchNormalization()(X3)
    X3 = Activation('relu')(X3)
    
    inp4 = Add(name='add')([X3, inp1])
    
    out = Dropout(.3)(inp4)
    out = Dense(1024)(out)
    out = BatchNormalization()(out)
    out = LeakyReLU()(out)
    out = Dropout(.4)(out)
    out = Dense(512)(out)
    out = BatchNormalization()(out)
    out = LeakyReLU()(out)
    out = Dropout(.5)(out)
    out = Dense(tar_shape)(out)
    out = Activation('sigmoid')(out)
    
    model = Model(inputs=[inp1, inp2], outputs=out, name='model1')
    return model


def build_model2(inp_shape, tar_shape):
    inp = Input((inp_shape,), name='inp')
    
    out = BatchNormalization()(inp)
    out = Dropout(.3)(out)
    out = Dense(1024)(out)
    out = BatchNormalization()(out)
    out = Activation('elu')(out)
    out = Dropout(.3)(out)
    out = Dense(512)(out)
    out = BatchNormalization()(out)
    out = Activation('elu')(out)
    out = Dropout(.3)(out)
    out = Dense(256)(out)
    out = BatchNormalization()(out)
    out = Activation('elu')(out)
    out = Dropout(.3)(out)
    out = Dense(tar_shape)(out)
    out = BatchNormalization()(out)
    out = Activation('sigmoid')(out)
    
    model = Model(inputs=inp, outputs=out, name='model2')
    return model

def build_model3(inp_shape, tar_shape):
    inp = Input((inp_shape,), name='inp')
    
    out = BatchNormalization()(inp)
    out = Dropout(.3)(out)
    
    out = Dense(2048)(out)
    out = BatchNormalization()(out)
    out = Activation('elu')(out)
    
    out = Dropout(.5)(out)
    
    out = Dense(256)(out)
    out = BatchNormalization()(out)
    out = Activation('elu')(out)
    
    out = Dropout(.3)(out)
    
    out = Dense(1024)(out)
    out = BatchNormalization()(out)
    out = Activation('elu')(out)
    
    out = Dropout(.5)(out)
    
    out = Dense(tar_shape)(out)
    out = BatchNormalization()(out)
    out = Activation('sigmoid')(out)
    
    model = Model(inputs=inp, outputs=out, name='model3')
    return model

In [None]:
model1 = build_model1(train1.shape[1], train2.shape[1], target.shape[1])
model1.compile(optimizer=Adam(3 * 1e-3), loss='binary_crossentropy', metrics=[logloss])
model1.summary()

In [None]:
his1 = model1.fit([train1, train2], target, batch_size=64, epochs=max_epoch, validation_split=.2,
                  shuffle=False, callbacks=[lr_decay, early_stop], verbose=2)

In [None]:
model2 = build_model2(train3.shape[1], target.shape[1])
model2.compile(optimizer=Adam(1e-1), loss='binary_crossentropy', metrics=[logloss])
model2.summary()

In [None]:
his2 = model2.fit(train3, target, batch_size=64, epochs=max_epoch, validation_split=.2,
                  shuffle=False, callbacks=[lr_decay, early_stop], verbose=2)

In [None]:
model3 = build_model3(train3.shape[1], target.shape[1])
model3.compile(optimizer=Adam(.1), loss='binary_crossentropy', metrics=[logloss])
model3.summary()

In [None]:
his3 = model3.fit(train3, target, batch_size=128, epochs=max_epoch, validation_split=.2,
                  shuffle=False, callbacks=[lr_decay, early_stop], verbose=2)

# Making Predictions

In [None]:
pred1 = model1.predict([test1, test2])
pred2 = model2.predict(test3)
pred3 = model3.predict(test3)

pred = (pred1 + pred2 + pred3) / 3.0
pred.shape

In [None]:
out = pd.read_csv('/kaggle/input/lish-moa/sample_submission.csv')
out.iloc[:,1:] = np.clip(pred, p_min, p_max)

out.iloc[test_features['cp_type'] == 'ctl_vehicle', 1:] = 0
out

In [None]:
out.to_csv('submission.csv', index=False)