In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import random

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import tensorflow as tf

from sklearn.metrics import log_loss
from sklearn.preprocessing import LabelEncoder
import tensorflow.keras as k
from keras.models import Sequential
import keras
import os

import matplotlib.pyplot as plt



tf.config.optimizer.set_jit(True)

In [None]:
def seed_everything(seed=0):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    session_conf = tf.compat.v1.ConfigProto(
        intra_op_parallelism_threads=1,
        inter_op_parallelism_threads=1
    )
    sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
    tf.compat.v1.keras.backend.set_session(sess)

In [None]:
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data_root = "/kaggle/input/lish-moa/"

train_X = pd.read_csv(data_root + 'train_features.csv', index_col='sig_id')
train_Y = pd.read_csv(data_root + 'train_targets_scored.csv', index_col='sig_id')
test_X = pd.read_csv(data_root + 'test_features.csv', index_col='sig_id')

sample_sub = pd.read_csv(data_root + 'sample_submission.csv')
#sample_sub.to_csv("submission.csv", index=False)

In [None]:
display(train_X.head())
display(train_Y.head())

display(train_Y.sum())

In [None]:
le = LabelEncoder()
for columns in ['cp_type','cp_dose']:
    le.fit(train_X[columns])
    train_X[columns] = le.transform(train_X[columns])
for columns in ['cp_type','cp_dose']:
    le.fit(test_X[columns])
    test_X[columns] = le.transform(test_X[columns])

In [None]:


X = train_X.to_numpy()

Y = train_Y.to_numpy()


X_test = test_X.to_numpy()


col = train_Y.columns
num_columns=len(X.T)


In [None]:
model = Sequential()
model.add(keras.layers.Dense(32,activation='relu',input_shape=(num_columns,)))
model.add(keras.layers.Dense(16,activation='relu'))
model.add(keras.layers.Dense(206,activation='sigmoid'))
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['acc'])
history = model.fit(X,Y,epochs=50,batch_size=2000,verbose=1)

# preds = model.predict(X_test,verbose=0)
# sample_sub.iloc[:,1:] = preds
# sample_sub.to_csv("submission.csv",index=False)

In [None]:
def random_model(inputs):

    activationList = ["swish", "selu", "elu", "relu"]

    x = inputs

    for _ in range(random.randint(1, 5)):
        x = tf.keras.layers.Dense(random.randint(1, 1000),activation=random.choice(activationList))(x)
        x = tf.keras.layers.Dropout(random.uniform(0, 0.5))(x)


    outputs = tf.keras.layers.Dense(206,activation='sigmoid')(x)

    model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
    
    model.compile(optimizer=tf.keras.optimizers.Adam(), loss="binary_crossentropy")
    history = model.fit(X,Y,epochs=random.randint(20, 60),batch_size=2000,verbose=0)
    
    return model

In [None]:
def ensemble(models, model_input):
    outputs = [model.outputs[0] for model in models]    
    y = keras.layers.Average()(outputs)
    model = keras.Model(model_input, y, name='ensemble')
    return model

In [None]:
inputs = tf.keras.layers.Input(shape = (num_columns, ))


all_models = []
num_models = 3 #set this to a larger number making small to compile the notebook

for i in range(num_models):
    print("training model: ", i+1)
    all_models.append(random_model(inputs))
    
    
model_x = ensemble(all_models, inputs)

preds = model_x.predict(X_test,verbose=0)
sample_sub.iloc[:,1:] = preds

display(sample_sub.head())
sample_sub.to_csv("submission.csv",index=False)

    

In [None]:
def random_model_no_train(inputs):

    activationList = ["swish", "selu", "elu", "relu"]

    x = inputs

    for _ in range(random.randint(1, 5)):
        x = tf.keras.layers.Dense(random.randint(1, 1000),activation=random.choice(activationList))(x)
        x = tf.keras.layers.Dropout(random.uniform(0, 0.5))(x)


    outputs = tf.keras.layers.Dense(206,activation='sigmoid')(x)

    model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
    
    model.compile(optimizer=tf.keras.optimizers.Adam(), loss="binary_crossentropy")
    
    return model


num_graph = 20
all_graph = []

for i in range(num_graph):
    print("training model: ", i+1)
    all_graph.append(random_model_no_train(inputs))


model_graph = ensemble(all_graph, inputs)
tf.keras.utils.plot_model(model_graph, to_file='model_plot.png', show_shapes=True, show_layer_names=True)


In [None]:

def random_model_last_epoch_loss(inputs):

    activationList = ["swish", "selu", "elu", "relu"]

    x = inputs

    for _ in range(random.randint(1, 5)):
        x = tf.keras.layers.Dense(random.randint(1, 1000),activation=random.choice(activationList))(x)
        x = tf.keras.layers.Dropout(random.uniform(0, 0.5))(x)


    outputs = tf.keras.layers.Dense(206,activation='sigmoid')(x)

    model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
    
    model.compile(optimizer=tf.keras.optimizers.Adam(), loss="binary_crossentropy")
    history = model.fit(X,Y,epochs=random.randint(20, 60),batch_size=2000,verbose=0)
    
    return history.history["loss"]



results = []

model_range = range(2, 4) # set this to a larger range just made it small to run the whole notebook

for num_models in model_range:
    all_models = []


    for i in range(num_models):
        print("training model: ", i+1)
        all_models.append(random_model_last_epoch_loss(inputs))
        
    results.append((sum(all_models[:][-1]) / len(all_models)))
    

plt.plot(model_range, results)
plt.xlabel('number of models', fontsize=18)
plt.ylabel('averaged last epoch lost', fontsize=16)

    

