## Drug Method of Action prediction using tensorflow

In [None]:
# Import the needed libraries 
import pandas as pd 
import numpy as np 
# Import Deep learning library 
import tensorflow as tf
import tensorflow_hub as hub
from sklearn.preprocessing import OneHotEncoder, StandardScaler, scale
from sklearn.model_selection import train_test_split

In [None]:
train_features = pd.read_csv('../input/lish-moa/train_features.csv')
train_features.drop(['sig_id'], axis=1, inplace=True)
train_targets_scored = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
train_targets_scored.drop(['sig_id'], axis=1, inplace=True)
test_features = pd.read_csv('../input/lish-moa/test_features.csv')
test_features.drop(['sig_id'], axis=1, inplace=True)

In [None]:
train_features.info()

In [None]:
train_features.head()

My Strategy of Analysis & Modeling will be as follows :


1. Turn categorical features into numerical (Encoding) 
2. Scaling the data (Standarization) 
3. Split the data to training & validation sets and transform to proper format for modeling with tensorflow 
4. Create model with tensorflow 
5. fit the model to the data 
6. Tuning Hyperparameters of the deep learning model 
7. Make predictions & Submit to kaggle.com

In [None]:
# 1. Turn categorical features into numerical through one hot encoding 
cat_features = ['cp_type', 'cp_dose']
one_enc = OneHotEncoder(handle_unknown='ignore')
enc_df = pd.DataFrame(one_enc.fit_transform(train_features[['cp_type', 'cp_dose']]).toarray())
train_features = train_features.join(enc_df)
train_features = train_features.drop(cat_features, axis=1)
train_features.head()

In [None]:
# 2.Scaling the data
for label, content in train_features.items(): 
    mean = np.mean(content)
    std = np.std(content)
    train_features[label] = (content-mean)/std
train_features.head()

In [None]:
# 3. Split the data into training and validation sets & transform to proper format for tensorflow
np.random.seed(70)
x_train, x_valid, y_train, y_valid = train_test_split(train_features, train_targets_scored, test_size=0.1)
np.savez('train_data', inputs = x_train, targets=y_train)
np.savez('valid_data', inputs = x_valid, targets=y_valid)

In [None]:
# 4. Create model with tensorflow 
# load the saved data 
npz_train = np.load('train_data.npz',allow_pickle=True )
train_inputs, train_targets = npz_train['inputs'].astype(np.float32), npz_train['targets'].astype(np.int32)
npz_valid = np.load('valid_data.npz',allow_pickle=True )
valid_inputs, valid_targets = npz_valid['inputs'].astype(np.float32), npz_valid['targets'].astype(np.int32)
# creating and compiling the model 
input_size, output_size = train_inputs.shape[1], train_targets.shape[1]
hidden_layer_size = 800
model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='sigmoid')
])
opt = tf.keras.optimizers.SGD(learning_rate=0.02)
model.compile(optimizer='adagrad', loss='categorical_crossentropy', metrics=['accuracy'])
batch_size, max_epochs = 128, 100
early_stopping = tf.keras.callbacks.EarlyStopping(patience=2) #callback is created to prevent overfitting 
# fitting the model 
model.fit(train_inputs, train_targets, batch_size=batch_size, epochs=max_epochs, callbacks=[early_stopping],
          validation_data=(valid_inputs, valid_targets), verbose=1)