# Build and FineTune Neural Network

This notebook loads the cleaned data and trains a shallow neural network

## Load data

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:

xtrain_path = "./data/transformed/xtrain.csv"
ytrain_path = "./data/transformed/ytrain.csv"

X = pd.read_csv(xtrain_path, sep=',', header=None)
y = pd.read_csv(ytrain_path, sep=',', header=None)

X = X.to_numpy()
y = y.to_numpy()

# Assume that each row of `features` corresponds to the same row as `labels`.
assert X.shape[0] == y.shape[0]

n_features = X.shape[1]


In [None]:
full_dataset = tf.data.Dataset.from_tensor_slices((X, y))

In [None]:
for item in full_dataset:
    print(item)
    break

## Train the model

In [None]:
BATCH_SIZE = 10
SHUFFLE_BUFFER_SIZE = 100

full_ds_size  = X.shape[0]
train_ds_size = int(0.80 * full_ds_size)
steps = (0.80 * full_ds_size)//BATCH_SIZE
train_dataset = full_dataset.take(train_ds_size)
val_dataset = train_dataset.skip(train_ds_size)  

train_dataset_batch = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
val_dataset=val_dataset.batch(BATCH_SIZE)

n_epochs = 50
checkpoint_filepath = './models/checkpoint'
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath + 'best.h5', 
    save_weights_only=True, monitor='val_accuracy', mode='max', save_best_only=True)

model_early_stop = EarlyStopping(monitor='val_accuracy', mode='max', patience=2)

In [None]:
hype_model = tf.keras.Sequential([
  layers.Dense(32, activation='relu', kernel_initializer='glorot_uniform'),
  layers.Dropout(rate=0.5),  
  layers.Dense(1, activation='sigmoid')
])

hype_model.compile(loss = tf.keras.losses.BinaryCrossentropy(), metrics=['accuracy'],
                      optimizer = tf.optimizers.Adam())

In [None]:
history = hype_model.fit(train_dataset_batch.repeat(), epochs=n_epochs, callbacks=[model_checkpoint_callback, model_early_stop], 
                          steps_per_epoch = steps, validation_steps=10, validation_data=val_dataset.repeat())

In [None]:
hype_model.summary()

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['loss'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['accuracy', 'loss'], loc='upper left')
plt.show()

In [None]:
history.history.keys()

In [None]:
xtest_path = "./data/transformed/xtest.csv"
ytest_path = "./data/transformed/ytest.csv"

a = pd.read_csv(xtest_path, sep=',', header=None)
b = pd.read_csv(ytest_path, sep=',', header=None)

a = a.to_numpy()
b = b.to_numpy()


In [None]:
hype_model.evaluate(a, b)