## **Monk Import**

In [None]:
import pandas as pd
from io import StringIO
import numpy as np

problem   = "monks-3"
filename  = f"monk/{problem}"

train       = ".train"
test        = ".test"

f = open(filename + train, 'r')
res = f.readlines()
f.close()
train_str = ''.join(res)

f = open(filename + test, 'r')
res = f.readlines()
f.close()
test_str = ''.join(res)

def retrieveData(data_str):
    # Create a DataFrame from the structured data
    encoding_length = []
    column_names = ["R", "col2", "col3", "col4", "col5", "col6", "col7", "data"]
    column_features = ["col2", "col3", "col4", "col5", "col6", "col7"]
    data = pd.read_csv(StringIO(data_str), sep=' ', header=None, names=column_names)
    data = data.iloc[:, :-1]
    for col in column_features:
        encoding_length.append(max(data[col].unique()))
    data=data.iloc[np.random.permutation(len(data))]
    
    #scaler = MinMaxScaler()
    #df_scaled = scaler.fit_transform(data.to_numpy())
    df_scaled = pd.DataFrame(data.to_numpy(), columns=data.columns.values)
    del df_scaled['R']
    #del df_scaled['Id']
    df_scaled = df_scaled.assign(R=data['R'].values)
    df_train = df_scaled
    
    features = 6
    X_train = df_train.iloc[ : , :features].values
    y_train = df_train.iloc[:,features:].values
    return X_train, y_train, encoding_length

def oneHotEncoding(X_data, l):
    X_result = []
    for x in X_data:
        p = []
        for i in range(len(x)):
            d = [0] * l[i]
            if x[i] == 1:
                d[0] = 1
            elif x[i] == 2:
                d[1] = 1
            elif x[i] == 3:
                d[2] = 1
            elif x[i] == 4:
                d[3] = 1
            p += d
        X_result.append(p)
    return X_result

X_train, y_train, encoding_length = retrieveData(train_str)
X_train = oneHotEncoding(X_train, encoding_length)

X_test, y_test, encoding_length = retrieveData(test_str)
X_test = oneHotEncoding(X_test, encoding_length)

len(X_train[0])

## **Model Selection**

In [None]:
from activation_function import instantiate_act_func
from layer import Layer
from mlp import MLP
from losses import instantiate_loss
from grid_search import create_test
from weigth_init import instantiate_initializer
from utils import k_fold_cross_validation
import matplotlib.pyplot as plt
from datetime import datetime

In [None]:
json_file_config = [
    "models/model1.json", 
    "models/model2.json", 
]
tests = create_test(json_file_config)

In [None]:
def create_model_from_test(test):
    layers = []
    for layer in test['layers']:
        layers.append(
            Layer(
                layer['units'],
                instantiate_act_func(layer['act_func']),
                layer['inputs'],
                weights_initializer=instantiate_initializer(test['weights_initializer']),
                kernel_regularizer=test['kernel_regularizer'],
                bias_regularizer=test['bias_regularizer'],
                momentum=test['momentum'],
                Nesterov=test['Nesterov']
            )
        )
    mlp = MLP(layers)
    mlp.compile(test['learning_rate'],instantiate_loss(test['loss']), test['metrics'])
    return mlp

In [None]:
def save_result(path, test, accuracy, errors, summary):
    iso_date = datetime.now().replace(microsecond=0).isoformat()
    filename = f"{path}/{iso_date}-acc:{str(round(accuracy, 2))}"
    f = open(f"{filename}.logs", 'w')
    f.write(f"{str(test)}\n")
    f.write(f"{summary}\n")
    f.close()
    plt.plot(errors)
    plt.savefig(f'{filename}.png')
    plt.show()

In [None]:
path_model_selection_result = f"results/model-selection/{problem}"

In [None]:
k = 4
best_model = None
best_accuracy = 0
for test in tests:
    epochs = round(test['epochs'] / k)
    model = create_model_from_test(test)
    dataset = k_fold_cross_validation(X_train, y_train, k)
    print(test)
    errors = []

    for fold in dataset:
        errors += model.fit(fold['X_train'], fold['y_train'], epochs)
        _, accuracy = model.evaluate(fold['X_val'], fold['y_val'])
        summary = model.summary()
    
    if best_accuracy < accuracy:
        best_accuracy = accuracy
        best_model = test

    save_result(path_model_selection_result, test, accuracy, errors, summary)



## **Model  Assessment - Hold-out Validation**

In [None]:
path_model_assessment_result = f"results/model-assessment/{problem}"
best_model = {'learning_rate': 0.08, 'momentum': 0.8, 'Nesterov': True, 'kernel_regularizer': 0, 'bias_regularizer': 0, 'weights_initializer': 'random_init', 'layers': [{'units': 4, 'inputs': 17, 'act_func': 'relu'}, {'units': 1, 'inputs': 4, 'act_func': 'sigmoid'}], 'name': 'model3', 'epochs': 400, 'loss': 'mean_squared_error', 'metrics': ['accuracy']}
model = create_model_from_test(best_model)

errors = model.fit(X_train, y_train, best_model['epochs'])
error, accuracy = model.evaluate(X_test, y_test)
summary = model.summary()

save_result(path_model_assessment_result, best_model, accuracy, errors, summary)
