In [1]:
import pandas as pd
import numpy as np
import random
import os
from sklearn.preprocessing import OneHotEncoder
from neural_network_wrapper import NeuralNetworkWrapper

from sklearn.metrics import accuracy_score
import optimizers

import json

In [2]:
data = pd.read_csv("./projekt1/classification/data.simple.train.1000.csv")

X = np.array(data.loc[:, ['x', 'y']])
y = data.cls
y -= 1
#one hot encoding
y_ohc = np.zeros((y.size, int(np.max(y))+1))
y_ohc[np.arange(y.size),y.astype(np.int)] = 1
y = y_ohc

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

ss = StandardScaler()
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)

In [3]:
input_dim = 2
neuron_numbers = [4, 4, 2]
activation_functions = ['relu', 'relu', 'sigmoid']
loss_function = 'logistic_loss'
learning_rate = 0.01
optimizer = optimizers.Optimizer()
batch_size = 128
val_split = 0.1
num_epochs=50
seed=42
dataset_name="test"
experiment_name="test"

experiment_dict = {
    "input_dim" : input_dim,
    "neuron_numbers" : neuron_numbers, # number of neurons in consecutive layers
    "activation_functions" : activation_functions,
    "loss_function" : loss_function,
    "learning_rate" : learning_rate,
    "optimizer" : optimizer,
    "batch_size" : batch_size,
    "validation_split" : val_split,
    "num_epochs" : num_epochs,
    "seed" : seed,
    "dataset_name" : dataset_name,
    "experiment_name" : experiment_name
}

In [28]:
def experiments_pipeline(X,
                         y,
                         experiment_dict,
                         num_reps=1,
                         save_to_file=False):
    """
    """
    d = experiment_dict.copy()
    
    
    
    d['loss_on_epoch'] = []
    d['loss_on_epoch_valid'] = []
    d['accuracy_on_epoch'] = []
    d['accuracy_on_epoch_valid'] = []
    
    
    for i in range(num_reps):
        print(f"Experiment {i+1}/{num_reps}")
    
        # reproducibility issues
        random.seed(d['seed'] + i)
        np.random.seed(d['seed'] + i)
    
        NN = NeuralNetworkWrapper(d['input_dim'],
                              d['neuron_numbers'],
                              d['activation_functions'],
                              d['loss_function'],
                              d['learning_rate'],
                              d['optimizer'],
                              d['batch_size'])
        NN.train(X,
                  y,
                  d['num_epochs'],
                  d['validation_split'],
                cache_accuracy=True,
                verbosity=False)

        d['loss_on_epoch'].append(NN.loss_on_epoch)
        d['loss_on_epoch_valid'].append(NN.loss_on_epoch_valid)
        d['accuracy_on_epoch'].append(NN.accuracy)
        d['accuracy_on_epoch_valid'].append(NN.accuracy_valid)

    # aggregating results
    d['train_loss_mean'] = np.mean(np.array(output['loss_on_epoch']).T, axis=1)
    d['valid_loss_mean'] = np.mean(np.array(output['loss_on_epoch_valid']).T, axis=1)
    d['train_accuracy_mean'] = np.mean(np.array(output['accuracy_on_epoch']).T, axis=1)
    d['valid_accuracy_mean'] = np.mean(np.array(output['accuracy_on_epoch_valid']).T, axis=1)
    
    d['train_loss_std'] = np.std(np.array(output['loss_on_epoch']).T, axis=1)
    d['valid_loss_std'] = np.std(np.array(output['loss_on_epoch_valid']).T, axis=1)
    d['train_accuracy_std'] = np.std(np.array(output['accuracy_on_epoch']).T, axis=1)
    d['valid_accuracy_std'] = np.std(np.array(output['accuracy_on_epoch_valid']).T, axis=1)
    
    
    #TODO: how to evaluate optimizers? Object cannot be saved to a JSON file
    try:
        del d['optimizer']
    except:
        pass
    
    if save_to_file:
        filename = d['experiment_name'] + '_' + d['dataset_name'] + '.json'
        
        if filename in os.listdir():
            raise Exception(f"File {filename} already exists!")
        else:
            with open(filename, 'w') as file:
                json.dump(d, file)
            print("File successfully saved!")
        
    return d

In [29]:
output = experiments_pipeline(X, y, experiment_dict, num_reps=2)

Experiment 1/2
Final loss: 0.781
Experiment 2/2
Final loss: 0.294


In [31]:
output['train_accuracy_std']

array([0.07444444, 0.08888889, 0.09055556, 0.09777778, 0.10388889,
       0.10777778, 0.11277778, 0.12      , 0.12222222, 0.12944444,
       0.13277778, 0.13722222, 0.13888889, 0.14333333, 0.145     ,
       0.14      , 0.12444444, 0.10944444, 0.09833333, 0.08888889,
       0.07722222, 0.07277778, 0.06444444, 0.05666667, 0.05      ,
       0.04666667, 0.04722222, 0.04277778, 0.04555556, 0.04222222,
       0.04222222, 0.04111111, 0.04333333, 0.04277778, 0.04166667,
       0.04333333, 0.04388889, 0.04277778, 0.04      , 0.03555556,
       0.035     , 0.03444444, 0.03333333, 0.03222222, 0.03333333,
       0.03111111, 0.03055556, 0.02944444, 0.02444444, 0.02444444])

In [32]:
import matplotlib.pyplot as plt