In [1]:
import pandas as pd
import numpy as np
import random
import os
from sklearn.preprocessing import OneHotEncoder
from neural_network_wrapper import NeuralNetworkWrapper

from sklearn.metrics import accuracy_score
import optimizers

import json

In [2]:
data = pd.read_csv("./projekt1/classification/data.simple.train.1000.csv")

X = np.array(data.loc[:, ['x', 'y']])
y = data.cls
y -= 1
#one hot encoding
y_ohc = np.zeros((y.size, int(np.max(y))+1))
y_ohc[np.arange(y.size),y.astype(np.int)] = 1
y = y_ohc

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

ss = StandardScaler()
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)

In [3]:
input_dim = 2
neuron_numbers = [4, 4, 2]
activation_functions = ['relu', 'relu', 'sigmoid']
loss_function = 'logistic_loss'
learning_rate = 0.01
optimizer = optimizers.Optimizer()
batch_size = 128
val_split = 0.1
num_epochs=50
seed=42
dataset_name="test"
experiment_name="test"

experiment_dict = {
    "input_dim" : input_dim,
    "neuron_numbers" : neuron_numbers, # number of neurons in consecutive layers
    "activation_functions" : activation_functions,
    "loss_function" : loss_function,
    "learning_rate" : learning_rate,
    "optimizer" : optimizer,
    "batch_size" : batch_size,
    "validation_split" : val_split,
    "num_epochs" : num_epochs,
    "seed" : seed,
    "dataset_name" : dataset_name,
    "experiment_name" : experiment_name
}

In [4]:
def experiments_pipeline(X, y, experiment_dict, save_to_file=False):
    """
    """
    d = experiment_dict.copy()
    
    # reproducibility issues
    random.seed(d['seed'])
    np.random.seed(d['seed'])
    
    NN = NeuralNetworkWrapper(d['input_dim'],
                          d['neuron_numbers'],
                          d['activation_functions'],
                          d['loss_function'],
                          d['learning_rate'],
                          d['optimizer'],
                          d['batch_size'])
    NN.train(X,
              y,
              d['num_epochs'],
              d['validation_split'],
            cache_accuracy=True)
    
    d['loss_on_epoch'] = NN.loss_on_epoch
    d['loss_on_epoch_valid'] = NN.loss_on_epoch_valid
    d['accuracy_on_epoch'] = NN.accuracy
    d['accuracy_on_epoch_valid'] = NN.accuracy_valid
    
    #TODO: how to evaluate optimizers? Object cannot be saved to a JSON file
    try:
        del d['optimizer']
    except:
        pass
    
    if save_to_file:
        filename = d['experiment_name'] + '_' + d['dataset_name'] + '.json'
        
        if filename in os.listdir():
            raise Exception(f"File {filename} already exists!")
        else:
            with open(filename, 'w') as file:
                json.dump(d, file)
            print("File successfully saved!")
        
    return d

In [5]:
output = experiments_pipeline(X, y, experiment_dict, True)

Loss after 1 epochs: 1.507
Loss after 2 epochs: 1.473
Loss after 3 epochs: 1.447
Loss after 4 epochs: 1.426
Loss after 5 epochs: 1.410
Loss after 6 epochs: 1.396
Loss after 7 epochs: 1.385
Loss after 8 epochs: 1.375
Loss after 9 epochs: 1.366
Loss after 10 epochs: 1.357
Loss after 11 epochs: 1.349
Loss after 12 epochs: 1.341
Loss after 13 epochs: 1.333
Loss after 14 epochs: 1.325
Loss after 15 epochs: 1.316
Loss after 16 epochs: 1.307
Loss after 17 epochs: 1.298
Loss after 18 epochs: 1.288
Loss after 19 epochs: 1.277
Loss after 20 epochs: 1.266
Loss after 21 epochs: 1.253
Loss after 22 epochs: 1.240
Loss after 23 epochs: 1.225
Loss after 24 epochs: 1.209
Loss after 25 epochs: 1.192
Loss after 26 epochs: 1.177
Loss after 27 epochs: 1.161
Loss after 28 epochs: 1.145
Loss after 29 epochs: 1.129
Loss after 30 epochs: 1.113
Loss after 31 epochs: 1.097
Loss after 32 epochs: 1.081
Loss after 33 epochs: 1.066
Loss after 34 epochs: 1.050
Loss after 35 epochs: 1.035
Loss after 36 epochs: 1.019
L

In [6]:
output

{'input_dim': 2,
 'neuron_numbers': [4, 4, 2],
 'activation_functions': ['relu', 'relu', 'sigmoid'],
 'loss_function': 'logistic_loss',
 'learning_rate': 0.01,
 'batch_size': 128,
 'validation_split': 0.1,
 'num_epochs': 50,
 'seed': 42,
 'dataset_name': 'test',
 'experiment_name': 'test',
 'loss_on_epoch': [1.5065833293327107,
  1.4726817609542129,
  1.4466688743878382,
  1.4262859660460951,
  1.4099175182103483,
  1.3964153052246724,
  1.384943282860538,
  1.3748716549967892,
  1.3657767016084341,
  1.3572370296857323,
  1.349047434556958,
  1.340975677212307,
  1.3328891837377557,
  1.3246551690764985,
  1.3161129093363315,
  1.3072929302185265,
  1.297994876296953,
  1.2880070417111675,
  1.2772995178942628,
  1.2658313169336983,
  1.2533643221576585,
  1.2397626542550293,
  1.2248697432686255,
  1.2087324654286788,
  1.1921266386510228,
  1.1766031448803258,
  1.1607625904743508,
  1.1449592076167474,
  1.128762800021767,
  1.1125881616367557,
  1.0967634804123896,
  1.08126183167