# Title
notebook description.

## Preparation
In this section useful libraries are imported which are used in most data science projects.

In [1]:
import os
import sys

# sets the path to the home directory of this repository so other modules can be imported. 
project_path = os.getcwd()
root_path = os.path.split(os.path.split(os.getcwd())[0])[0]
assert root_path.endswith("Fontys-ADS"), "The root path does not end with Fontys-ADS: " + root_path 
sys.path.insert(0, root_path)

import numpy as np
import tensorflow as tf

# set the seed for reproducible results.
np.random.seed(56)
tf.random.set_seed(56)

# optionally, set TensorFlow to use the GPU with all available memory.
# physical_devices = tf.config.experimental.list_physical_devices('GPU')
# tf.config.experimental.set_memory_growth(physical_devices[0], True)

# Data collection
explain how to gather the data

In [2]:
import pandas as pd
df = pd.DataFrame({'target': [0, 1, 0, 1, 0, 1, 0, 1] * 100, 'feature1': [12, 43, 11, 77, 12, 43, 11, 77] * 100, 'feature2': [-4, 14, -4, 16, 12, 43, 11, 77] * 100})
df

Unnamed: 0,target,feature1,feature2
0,0,12,-4
1,1,43,14
2,0,11,-4
3,1,77,16
4,0,12,12
...,...,...,...
795,1,77,16
796,0,12,12
797,1,43,43
798,0,11,11


# Preparing the data
explain how the data is prepared

In [3]:
from datasets.base_dataset import DatasetBase

# the dataset class
class MyDataset(DatasetBase):
    def __init__(self, df, batch_size, train_percentage, validation_percentage, test_percentage):
        # sets the batch size
        self.batch_size = batch_size
        
        features = tf.cast(df.loc[:, df.columns != 'target'].values, tf.float32)
        labels = tf.cast(df.loc[:, 'target'].values, tf.bool)

        # sets the data.
        self.data = tf.data.Dataset.from_tensor_slices((features, labels))

        # set the feature length.
        self.feature_length = len(df.columns) - 1
        
        # shuffles the dataset
        self.shuffle(256)

        # splits the data into train, validation, and test datasets.
        self.split_data_to_train_val_test(self.data, train_percentage, validation_percentage, test_percentage)

In [4]:
batch_size = 64
train_percentage = 0.6
validation_percentage = 0.2
test_percentage = 0.2
myDataset = MyDataset(df, batch_size, train_percentage, validation_percentage, test_percentage)

train: 240 validation: 80 test: 80


# Exploratory Data Analysis
Explore the data to gain insights on possible features

# Modelling
Apply ML/DL models

In [5]:
from models.base_model import ModelBase
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Dense

class MyModel(ModelBase):
    def __init__(self, feature_length, gpu_initialized=False, training=False, limit=5000):
        super().__init__(gpu_initialized, training, limit)
        # the name for the model.
        self.name = 'MyModel'

        # sets the feature length for input.
        self.feature_length = feature_length

    def predict(self, X):
        # create predictable array, since predicting only works on an array.
        predictable_array = np.expand_dims(X, axis=0)

        # perform prediction and take the first and only prediction out of the predictions array.
        prediction = self.model.predict(X, verbose=1)[0]
        
        return prediction

    def fit(self, training, callbacks, epochs, validation, validation_steps, steps_per_epoch):
        self.model.fit(
            training,
            callbacks=callbacks,
            epochs=epochs,
            validation_data=validation,
            validation_steps=validation_steps,
            steps_per_epoch=steps_per_epoch, verbose=0)

    def compile(self, optimizer='adam', loss='mse', metrics=['mse'], loss_weights=[1.0], show_summary=False):
        inputs = Input((self.feature_length,))

        dense1 = Dense(64, activation='relu', kernel_initializer='glorot_uniform')(inputs)
        outputs = Dense(1, activation='sigmoid', kernel_initializer='glorot_uniform')(dense1)

        # construct the model by stitching the inputs and outputs
        self.model = Model(inputs=inputs, outputs=outputs, name=self.name)

        # compile the model
        self.model.compile(optimizer=optimizer, loss=loss, metrics=metrics, loss_weights=loss_weights)

        if show_summary:
            self.model.summary()

In [6]:
model = MyModel(myDataset.feature_length, training=True, gpu_initialized=True)

In [7]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime

epochs = 512
INIT_LR = 1e-4
opt = Adam(lr = INIT_LR, decay = INIT_LR / epochs)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['mse', 'accuracy'], show_summary=True)

# current time
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

# create logging
log_dir = os.path.join(project_path, f'logs\{model.name}\{current_time}')

# create all callbacks
callbacks = [
  EarlyStopping(patience=50, monitor='val_loss'),
  TensorBoard(log_dir=log_dir, profile_batch=0)
]

Model: "MyModel"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 2)]               0         
_________________________________________________________________
dense (Dense)                (None, 64)                192       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 65        
Total params: 257
Trainable params: 257
Non-trainable params: 0
_________________________________________________________________


In [8]:
%load_ext tensorboard

In [9]:
# fit the model using the training data
results = model.fit(
  training=myDataset.train_ds,
  callbacks=callbacks,
  epochs=epochs,
  validation=myDataset.val_ds,
  validation_steps=myDataset.val_size,
  steps_per_epoch=myDataset.train_size)
  
# save the weights of the model
weights_path = os.path.join(project_path, f'models\{model.name}_trained_model_weights')
model.save_weights(weights_path)

# Validation
Validate the model performance

In [11]:
# re initialize the model.
model.training = False
model.compile(optimizer=Adam(lr = 1e-4), loss='binary_crossentropy', metrics=['mse', 'accuracy'], show_summary=False) 
model.load_weights(weights_path)

print('\n# Evaluate on test data')
result = model.evaluate(myDataset.actual_test_ds)
print('test loss, test acc:', result)
res = dict(zip(model.get_metric_names(), result))
print(res)


# Evaluate on test data
test loss, test acc: [4.607292541340698e-09, 1.1063724e-18, 1.0]
{'loss': 4.607292541340698e-09, 'mse': 1.1063724e-18, 'accuracy': 1.0}
