## Loading files:

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
import keras
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from tensorflow.keras import Sequential,Model,backend
from tensorflow.keras import layers,regularizers
from tensorflow.keras import callbacks,optimizers,metrics,losses

from sklearn.preprocessing import StandardScaler 
from sklearn.feature_selection import VarianceThreshold
from sklearn.decomposition import PCA

# Utility methods
def plotFeatures(columns, plt):
    cols =columns[0:12]
    for i, col in enumerate(cols):
        plt.subplot(5, 5, i + 1)
        plt.hist(train_features.loc[:, col], bins=200, alpha=1);
        plt.title(col)
        
def getMissingPercent(series):
    num = series.isnull().sum()
    den = len(series)
    return round(num/den, 2)

def getFeatures(X_train,X_test):
    
    ngFeatures = 20
    ncFeatures = 100
    
    pcFeatures = PCA(n_components = ncFeatures)
    pgFeatures = PCA(n_components = ngFeatures)

    X_train_pgFeatures = pgFeatures.fit_transform(X_train[:,gfeatures])
    X_train_pcFeatures = pcFeatures.fit_transform(X_train[:,cfeatures])
    X_test_pgFeatures = pgFeatures.transform(X_test[:,gfeatures])
    X_test_pcFeatures = pcFeatures.transform(X_test[:,cfeatures])
    
    X_train_c_mean = X_train[:,cfeatures].mean(axis=1)
    X_test_c_mean = X_test[:,cfeatures].mean(axis=1)
    
    X_train_g_mean = X_train[:,gfeatures].mean(axis=1)
    X_test_g_mean = X_test[:,gfeatures].mean(axis=1)
    
    X_train = np.concatenate((X_train,X_train_pgFeatures,X_train_pcFeatures,X_train_c_mean[:,np.newaxis]
                            ,X_train_g_mean[:,np.newaxis]),axis=1)
    X_test = np.concatenate((X_test,X_test_pgFeatures,X_test_pcFeatures,X_test_c_mean[:,np.newaxis],
                           X_test_g_mean[:,np.newaxis]),axis=1)
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    return X_train, X_test

def lossFunction(y_true, y_pred):
    y_pred = tf.clip_by_value(y_pred,p_min,p_max)
    return -backend.mean(y_true * backend.log(y_pred) + (1-y_true) * backend.log(1-y_pred))

In [None]:
train_features = pd.read_csv(f'../input/lish-moa/train_features.csv')
train_target = pd.read_csv(f'../input/lish-moa/train_targets_scored.csv')

test_features = pd.read_csv(f'../input/lish-moa/test_features.csv')
sample_sub = pd.read_csv(f'../input/lish-moa/sample_submission.csv')

print("Sample Training set features")
train_features.head(5)

In [None]:
print("Sample Training set target")
train_target.head(5)

In [None]:
train_features.describe()

In [None]:
train_features.groupby( ['cp_dose','cp_type','cp_time'] ).agg( ['mean','std'] )

## Visualization of the Data:

In [None]:
# Summing the columsn across as the target is one hot encoded
fig = plt.figure(figsize=(14,6))
plt.bar(train_target.iloc[:,1:].sum(axis=0).sort_values(ascending=False)[:25].index, 
            train_target.iloc[:,1:].sum(axis=0).sort_values(ascending=False)[:25].values)
plt.xticks(rotation=90)
plt.show()

In [None]:
# Viewing features that start with c
cCols = train_features.columns[train_features.columns.str.startswith('c-')]
plt.figure(figsize=(15, 15))
plotFeatures(cCols, plt)

# Viewing features that start with g
gCols = train_features.columns[train_features.columns.str.startswith('g-')]
plt.figure(figsize=(15, 15))
plotFeatures(gCols, plt)

In [None]:
# Correlation Matrix
corr = train_features.corr()
# Taking features with higher correlation
plt.figure(figsize=(12,8))
sns.heatmap(corr[corr>=.9])

## Model Training and Predictions:

In [None]:
# Checking for NaNs
for i in train_features.columns:
    print(i,getMissingPercent(train_features[i]))

In [None]:

train_features = train_features.drop(['sig_id','cp_type','cp_dose','cp_time'],axis=1)
test_features = test_features.drop(['sig_id','cp_dose','cp_time'],axis=1)

train_targets_scored = pd.read_csv(f'../input/lish-moa/train_targets_scored.csv')
train_targets_scored.pop('sig_id')
labels_train = train_targets_scored.values

# Considering numerical feature only
num_features= train_features.columns[train_features.dtypes!="object"]
num_features

In [None]:
cfeatures = train_features.columns.str.contains('c-')
gfeatures = train_features.columns.str.contains('g-')

n_labels = train_targets_scored.shape[1]
n_train = train_features.shape[0]
n_test = test_features.shape[0]

p_min = 5E-5
p_max = 0.95

In [None]:
def sequenceInitOne(input):
    seq = Sequential([
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        layers.Dense(512), 
        layers.BatchNormalization(),
        layers.Dense(256)
        ]) 
    return seq(input)

def sequenceInitTwo(input):
    seq = Sequential([
            layers.BatchNormalization(),
            layers.Dropout(0.3),
            layers.Dense(512), 
            layers.BatchNormalization(),
            layers.Dense(512),
            layers.BatchNormalization(),
            layers.Dense(256),
            layers.BatchNormalization(),
            layers.Dense(256)
            ])
    return seq(input)

def sequenceInitThree(input):
    seq = Sequential([
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        layers.Dense(1024), 
        layers.BatchNormalization(),
        layers.Dense(512),
        layers.BatchNormalization(),
        layers.Dense(512),
        layers.BatchNormalization(),
        layers.Dense(256)
        ])
    return seq(input)

def sequenceInitFour(input):
    seq = Sequential([
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        layers.Dense(512), 
        layers.BatchNormalization(),
        layers.Dense(512),
        layers.BatchNormalization(),
        layers.Dense(256),
        layers.BatchNormalization(),
        layers.Dense(256)
        ])
    return seq(input)

def sequenceInitFive(input):
    seq = Sequential([
        layers.BatchNormalization(),
        layers.Dense(256),
        layers.BatchNormalization(),
        layers.Dense(n_labels),
        layers.BatchNormalization(),
        layers.Dense(n_labels)
        ])
    return seq(input)

def modelling (n_features, n_features_2, n_labels, opt, label_smoothing = 0.0005):    
    input_1 = layers.Input(shape = (n_features,), name = 'Input1')
    input_2 = layers.Input(shape = (n_features_2,), name = 'Input2')
    input_3 = sequenceInitOne(input_1)
    input_3_concat = layers.Concatenate()([input_2, input_3])

    input_4 = sequenceInitTwo(input_3_concat)
    input_4_avg = layers.Average()([input_3, input_4]) 

    input_5 = sequenceInitThree(input_4_avg)
    input_5_avg = layers.Average()([input_4, input_5]) 

    input_6 = sequenceInitFour(input_5_avg)
    input_6_avg = layers.Average()([input_5, input_6]) 
    
    output = sequenceInitFive(input_6_avg)

    model = Model(inputs = [input_1, input_2], outputs = output)
    model.compile(optimizer=opt, loss=losses.BinaryCrossentropy(label_smoothing=label_smoothing),
                  metrics=lossFunction)
    
    return model



In [None]:
for optimizer in ['adam','adagrad','SGD']:
    X_train, X_test = getFeatures(train_features.values,train_features.values)
    _,unknown = getFeatures(train_features.values,
                                       test_features.drop('cp_type',axis=1).values)

    allcols_train = train_features[num_features].values
    allcols_test = train_features[num_features].values
    unknown_2 = test_features[num_features].values

    y_train = labels_train
    y_test = labels_train
    n_features = X_train.shape[1]
    n_features_2 = allcols_train.shape[1]

    model = modelling(n_features, n_features_2, n_labels, opt=optimizer)
    reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_lossFunction',
                                            mode='min', factor=0.1, patience=2)
    early_stopping = callbacks.EarlyStopping(monitor='val_lossFunction',
                                             patience=10,
                                             mode='min',restore_best_weights=True)
    hist = model.fit([X_train,allcols_train],
                     y_train, batch_size=128,
                     epochs=3,verbose=1,
                     validation_data = ([X_test,allcols_test],y_test),
                     callbacks=[reduce_lr, early_stopping])

    print(model.evaluate([X_test,allcols_test],y_test)[1])

In [None]:
n_seeds = 5
np.random.seed(4545)
n_folds = 5
seeds = np.random.randint(0,100,size=n_seeds)
y_pred = np.zeros((n_test,n_labels))

for seed in seeds:
    fold = 0
    kf = KFold(n_splits=n_folds,shuffle=True,random_state=seed)
    for train, test in kf.split(train_features):
        X_train, X_test = getFeatures(train_features.iloc[train].values,
                                       train_features.iloc[test].values)
        _,unknown = getFeatures(train_features.iloc[train].values,
                                   test_features.drop('cp_type',axis=1).values)
        
        allcols_train = train_features.iloc[train][num_features].values
        allcols_test = train_features.iloc[test][num_features].values
        unknown_2 = test_features[num_features].values
        
        y_train = labels_train[train]
        y_test = labels_train[test]
        n_features = X_train.shape[1]
        n_features_2 = allcols_train.shape[1]

        model = modelling(n_features, n_features_2, n_labels, opt='adam')
        reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_lossFunction', mode='min', factor=0.1, patience=2)
        early_stopping = callbacks.EarlyStopping(monitor='val_lossFunction',patience=10, mode='min',restore_best_weights=True)
        hist = model.fit([X_train,allcols_train],y_train, batch_size=128, epochs=15,verbose=1,validation_data = ([X_test,allcols_test],y_test),
                         callbacks=[reduce_lr, early_stopping])
        
        y_pred += model.predict([unknown,unknown_2])/(n_folds*n_seeds)

        plt.plot(hist.history['lossFunction'])
        plt.plot(hist.history['val_lossFunction'])
        plt.title('model accuracy')
        plt.ylabel('accuracy')
        plt.xlabel('epoch')
        plt.legend(['train', 'val'], loc='upper right')
        plt.show()
        
        plt.plot(hist.history['loss'])
        plt.plot(hist.history['val_loss'])
        plt.title('model accuracy')
        plt.ylabel('accuracy')
        plt.xlabel('epoch')
        plt.legend(['train', 'val'], loc='upper right')
        plt.show()
        fold += 1

In [None]:
y_pred

In [None]:
sample_sub.iloc[:,1:] = y_pred
sample_sub

In [None]:
sample_sub.to_csv("submission.csv",index=False)