# MLP

In [19]:
import sys
sys.path.append("..")
from sklearn.model_selection import KFold, cross_val_score
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from data_preprocess import load_training_data, load_test_data, normalize_features
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd
import numpy as np
import time
import joblib

In [20]:
# Get the data
X_train, y_train = load_training_data('../data/train.csv')

# Normalize the features
X_train = normalize_features(X_train)

# Initialize variables
hidden_layer_sizes = [(128,64,32), (64,32,16), (32,16,8), (8, 16, 32), (16, 32, 64), (32, 64, 128), (8, 16, 32, 64, 128), (128, 64, 32, 16, 8)]
alpha_values = [0.1]
learning_rate_init_values = [0.01] 
max_iter_values = [500]
results = {}

# Initialize a DataFrame to store the results
results_df = pd.DataFrame(columns=['hidden_layer_size', 'alpha', 'learning_rate_init', 'max_iter', 'accuracy', 'f1_score'])

# Define 5-fold cross validation test harness
kfold = KFold(n_splits=5, shuffle=True)

# Perform 5-fold cross validation
for hidden_layer_size in hidden_layer_sizes:
    for alpha in alpha_values:
        for learning_rate_init in learning_rate_init_values:
            for max_iter in max_iter_values:
                start_time = time.time()

                # Define the model with early stopping
                model = MLPClassifier(hidden_layer_sizes=hidden_layer_size, activation='relu', solver='adam',
                                      max_iter=max_iter, alpha=alpha, learning_rate_init=learning_rate_init,
                                      early_stopping=True, n_iter_no_change=10)

                # Perform cross-validation manually to get the number of iterations
                cv_results = []
                f1_results = []
                for train_index, test_index in kfold.split(X_train):
                    X_train_fold, X_test_fold = X_train.iloc[train_index], X_train.iloc[test_index]
                    y_train_fold, y_test_fold = y_train.iloc[train_index], y_train.iloc[test_index]
                    model.fit(X_train_fold, y_train_fold)
                    score = model.score(X_test_fold, y_test_fold)
                    cv_results.append(score)
                    y_pred = model.predict(X_test_fold)
                    f1 = f1_score(y_test_fold, y_pred, average='weighted')
                    f1_results.append(f1)

                end_time = time.time()
                elapsed_time = end_time - start_time
                config = (hidden_layer_size, alpha, learning_rate_init, max_iter)
                print("Config: {}\nCross-validation mean accuracy: {:.2f}%\nElapsed time: {:.2f} seconds\nNumber of iterations: {}\n".format(
                    config, np.mean(cv_results)*100, elapsed_time, model.n_iter_))

                new_row = pd.DataFrame({
                    'hidden_layer_size': [hidden_layer_size],
                    'alpha': [alpha],
                    'learning_rate_init': [learning_rate_init],
                    'max_iter': [max_iter],
                    'accuracy': [np.mean(cv_results)],
                    'f1_score': [np.mean(f1_results)]
                })

                results_df = pd.concat([results_df, new_row], ignore_index=True)

# Save the results to a CSV file
results_df.to_csv('model_results.csv', index=False)

Config: ((128, 64, 32), 0.1, 0.01, 500)
Cross-validation mean accuracy: 58.71%
Elapsed time: 0.83 seconds
Number of iterations: 23



  results_df = pd.concat([results_df, new_row], ignore_index=True)


Config: ((64, 32, 16), 0.1, 0.01, 500)
Cross-validation mean accuracy: 58.35%
Elapsed time: 0.47 seconds
Number of iterations: 23

Config: ((32, 16, 8), 0.1, 0.01, 500)
Cross-validation mean accuracy: 56.92%
Elapsed time: 0.23 seconds
Number of iterations: 20

Config: ((8, 16, 32), 0.1, 0.01, 500)
Cross-validation mean accuracy: 59.07%
Elapsed time: 0.21 seconds
Number of iterations: 28

Config: ((16, 32, 64), 0.1, 0.01, 500)
Cross-validation mean accuracy: 57.28%
Elapsed time: 0.45 seconds
Number of iterations: 25

Config: ((32, 64, 128), 0.1, 0.01, 500)
Cross-validation mean accuracy: 58.00%
Elapsed time: 0.68 seconds
Number of iterations: 23

Config: ((8, 16, 32, 64, 128), 0.1, 0.01, 500)
Cross-validation mean accuracy: 58.27%
Elapsed time: 0.95 seconds
Number of iterations: 29

Config: ((128, 64, 32, 16, 8), 0.1, 0.01, 500)
Cross-validation mean accuracy: 58.90%
Elapsed time: 1.01 seconds
Number of iterations: 26



## Model's paramemters with the best accuracy

In [21]:
# Find the best parameters
best_params = results_df.loc[results_df['accuracy'].idxmax()]
print(f"Best parameters: {best_params}")

# Train the final model with the best parameters
final_model = MLPClassifier(hidden_layer_sizes=best_params['hidden_layer_size'], 
                            activation='relu', 
                            solver='adam', 
                            max_iter=best_params['max_iter'], 
                            alpha=best_params['alpha'], 
                            learning_rate_init=best_params['learning_rate_init'], 
                            early_stopping=True, 
                            n_iter_no_change=10)

final_model.fit(X_train, y_train)

# Save the final model
joblib.dump(final_model, 'model/ann.joblib')

Best parameters: hidden_layer_size     (8, 16, 32)
alpha                         0.1
learning_rate_init           0.01
max_iter                      500
accuracy                 0.590739
f1_score                 0.558756
Name: 3, dtype: object


['model/ann.joblib']

## Acuracy on the test set

In [22]:
# Load the final model
final_model = joblib.load('model/ann.joblib')

# Load the test data
X_test, y_test = load_test_data('../data/test.csv', 'quality')

# Normalize the test data
X_test_normalized = normalize_features(X_test)

# Predict the labels of the test set
y_pred = final_model.predict(X_test_normalized)

# Calculate the accuracy of the model on the test set
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print(f"Accuracy: {accuracy}")

Accuracy: 0.625
