In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import random
import os
from tensorflow.keras import Input, Model 
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Conv2D, Dropout, AlphaDropout, MaxPooling2D, AveragePooling2D, BatchNormalization, Concatenate, Flatten, Reshape, Add, Activation
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import ReduceLROnPlateau

In [None]:
train = pd.read_csv('../input/lish-moa/train_features.csv')
test = pd.read_csv('../input/lish-moa/test_features.csv')
train_target = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
sample = pd.read_csv('../input/lish-moa/sample_submission.csv')
train.head()

In [None]:
def preprocess(df):
    
    df.loc[:, 'cp_type'] = df.loc[:, 'cp_type'].map({'trt_cp': 0, 'ctl_vehicle': 1})
    df.loc[:, 'cp_dose'] = df.loc[:, 'cp_dose'].map({'D1': 0, 'D2': 1})
    del df['sig_id']
    return df

train = preprocess(train)
test = preprocess(test)


In [None]:
y_train = train_target.drop(["sig_id"], axis=1)

In [None]:
print("training data size is ",train.shape)
print("target data size is   ",y_train.shape)
print("testing data size is ",test.shape)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(train, y_train, test_size=0.3, random_state=42)

In [None]:
def build_model(): 
    
    inputs = Input((875))
    dense1 = Dense(1024, activation = 'relu')(inputs) 
    dense1 = BatchNormalization()(dense1) 
    dense2 = Dense(1024, activation = 'relu')(dense1) 
    dense2 = BatchNormalization()(dense2) 
    dense3 = Dense(1024, activation = 'relu')(dense2) 
    dense3 = Add()([dense1, dense3])
    dense4 = Dense(512, activation = 'relu')(dense3) 
    dense4 = BatchNormalization()(dense4) 
    dense5 = Dense(512, activation = 'relu')(dense4) 
    dense5 = BatchNormalization()(dense5) 
    dense6 = Dense(512, activation = 'relu')(dense5) 
    dense7 = Add()([dense5, dense6]) 
    dense7 = BatchNormalization()(dense7) 
    outputs = Dropout(0.25)(dense7) 
    outputs = Dense(206, activation = 'sigmoid')(outputs)
    model = Model(inputs = inputs, outputs = outputs)     
    return model 

model = build_model() 

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(lr=2.75e-5), loss='binary_crossentropy', metrics=["accuracy", "AUC"])

In [None]:
Es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=10,verbose=1, mode='min', restore_best_weights=True)

In [None]:
Rl = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-5,mode='min', verbose=1)

In [None]:
history = model.fit(X_train, y_train, batch_size=30, epochs=25,validation_data=(X_test, y_test),callbacks=[Es, Rl])

In [None]:
def plot_accuracy_loss(history):
    fig = plt.figure(figsize=(20,15))

    # Plot accuracy
    plt.subplot(221)
    plt.plot(history.history['accuracy'],'bo--', label = "acc")
    plt.plot(history.history['val_accuracy'], 'ro--', label = "val_acc")
    plt.title("train_acc vs val_acc")
    plt.ylabel("accuracy")
    plt.xlabel("epochs")
    plt.legend()

    # Plot loss function
    plt.subplot(222)
    plt.plot(history.history['loss'],'bo--', label = "loss")
    plt.plot(history.history['val_loss'], 'ro--', label = "val_loss")
    plt.title("train_loss vs val_loss")
    plt.ylabel("loss")
    plt.xlabel("epochs")

    plt.legend()
    plt.show()
    
    
plot_accuracy_loss(history)

In [None]:
pred = model.predict(test)
pred.shape

In [None]:
sample = pd.read_csv('../input/lish-moa/sample_submission.csv')
sample.iloc[:,1:] = pred
sample.head()

In [None]:
sample.to_csv('submission.csv', index=False)