In [17]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.pipeline import Pipeline

Inspired by biological neural networks in animal brains, Artificial Neural Networks (ANN) are the result of decades computational model adaptations and discoveries to create a powerful computing system that mimics "learning" through example without specified rules. The invention of the perceptron model (1957, B.Wildrow and M.Hoff), a method involving learnable weights and thresholds, lead to key theoretical basis integral to ANNs. Because single layer perceptrons were unable to handle the "exclusive or" problem by effectively predicting the outputs of non-linearly seperable datasets. The introduction of backpropagation for training multiple-layered perceptrons allowed for networks to learn nonlinear decision boundaries by stacking multiple layers of neurons, or nodes.

ANNs are comprised of at least three layers of nodes: an input layer that recieves the raw features, at least one "hidden" layer that performs intermediate transformation, and an output layer that applies the activation function to produce a prediction.


Hidden nodes compute 

$\sum{\ x_iw_{ij}={w^T}_jx}$




In [7]:
#df=pd.read_csv('../../data/processed/winequality-red-normalized.csv')
#df_normalized = pd.read_csv('../../data/processed/winequality-red-normalized.csv')
#df_interactions = pd.read_csv('../../data/processed/winequality-red-interactions.csv')
#df_pca = pd.read_csv('../../data/processed/winequality-red-pca.csv')

df=pd.read_csv('winequality-red-normalized.csv')
df_normalized = pd.read_csv('winequality-red-normalized.csv')
df_interactions = pd.read_csv('winequality-red-normalized.csv')
df_pca = pd.read_csv('winequality-red-normalized.csv')

X_norm = df_normalized.drop('quality', axis=1)
y_norm = df_normalized['quality']

X_inter = df_interactions.drop('quality', axis=1)
y_inter = df_interactions['quality']

X_pca = df_pca.drop('quality', axis=1)
y_pca = df_pca['quality']

In [13]:
# # Common CV setup
# cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# param_grid = {
#     'mlp__hidden_layer_sizes': [(64,), (64, 32), (128, 64)],
#     'mlp__activation': ['relu', 'tanh'],
#     'mlp__alpha': [1e-5, 1e-4, 1e-3],
#     'mlp__learning_rate_init': [5e-4, 1e-3, 2e-3],
#     'mlp__solver': ['adam']
# }

# def run_pipeline(X, y, label):
#     # consistent split
#     X_train, X_test, y_train, y_test = train_test_split(
#         X, y, test_size=0.2, stratify=y, random_state=42
#     )

#     pipe = Pipeline([
#         ('mlp', MLPClassifier(max_iter=600,
#                               early_stopping=True,
#                               n_iter_no_change=20,
#                               random_state=42))
#     ])

#     grid = GridSearchCV(pipe, param_grid, scoring='f1_macro',
#                         cv=cv, n_jobs=-1, verbose=1, refit=True)
#     grid.fit(X_train, y_train)

#     print(f"{label} best params:", grid.best_params_)
#     print(f"{label} CV macro F1:", grid.best_score_)

#     y_pred = grid.predict(X_test)
#     print(f"{label} Test Accuracy:", accuracy_score(y_test, y_pred))
#     print(f"{label} Test Macro F1:", f1_score(y_test, y_pred, average='macro'))

# # Run for each dataset version
# run_pipeline(X_norm, y_norm, "Normalized")
# run_pipeline(X_inter, y_inter, "Interactions")
# run_pipeline(X_pca, y_pca, "PCA")

Fitting 5 folds for each of 54 candidates, totalling 270 fits
Normalized best params: {'mlp__activation': 'tanh', 'mlp__alpha': 1e-05, 'mlp__hidden_layer_sizes': (64,), 'mlp__learning_rate_init': 0.0005, 'mlp__solver': 'adam'}
Normalized CV macro F1: 0.28058829536122276
Normalized Test Accuracy: 0.5625
Normalized Test Macro F1: 0.256403017490465
Fitting 5 folds for each of 54 candidates, totalling 270 fits
Interactions best params: {'mlp__activation': 'tanh', 'mlp__alpha': 1e-05, 'mlp__hidden_layer_sizes': (64,), 'mlp__learning_rate_init': 0.0005, 'mlp__solver': 'adam'}
Interactions CV macro F1: 0.28058829536122276
Interactions Test Accuracy: 0.5625
Interactions Test Macro F1: 0.256403017490465
Fitting 5 folds for each of 54 candidates, totalling 270 fits
PCA best params: {'mlp__activation': 'tanh', 'mlp__alpha': 1e-05, 'mlp__hidden_layer_sizes': (64,), 'mlp__learning_rate_init': 0.0005, 'mlp__solver': 'adam'}
PCA CV macro F1: 0.28058829536122276
PCA Test Accuracy: 0.5625
PCA Test Macr

things to do:
change the learning rates
describe the activation, choices of alpha

In [23]:
datasets = {
    "Normalized": (X_norm, y_norm),
    "Interactions": (X_inter, y_inter),
    "PCA": (X_pca, y_pca)
}
param_grid = {
    "hidden_layer_sizes": [(64,), (64, 32), (128, 64)],
    "activation": ["relu", "tanh"],
    "alpha": [1e-5, 1e-4, 1e-3],
    "learning_rate_init": [5e-4, 1e-3, 2e-3],
    "solver": ["adam"]
}
results = {}

for name, (X, y) in datasets.items():
    print(f"\n{'='*60}")
    print(f"Training ANN on {name} dataset")
    print(f"{'='*60}")
    
    # Split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, stratify=y, random_state=42
    )
    
    # Grid search
    ann = MLPClassifier(max_iter=600, early_stopping=True, n_iter_no_change=20, random_state=42)
    grid = GridSearchCV(
        ann,
        param_grid,
        cv=5, 
        scoring="f1_macro", 
        n_jobs=-1, 
        verbose=1)
    grid.fit(X_train, y_train)
    
    best_ann = grid.best_estimator_
    y_pred = best_ann.predict(X_test)
    
    # Metrics
    acc = accuracy_score(y_test, y_pred)
    f1m = f1_score(y_test, y_pred, average="macro")
    cv_scores = cross_val_score(best_ann, X_train, y_train, cv=5, scoring="f1_macro")
    
    # Store results
    results[name] = {
        "best_params": grid.best_params_,
        "accuracy": acc,
        "f1_macro": f1m,
        "cv_mean": cv_scores.mean(),
        "cv_std": cv_scores.std(),
        "confusion_matrix": confusion_matrix(y_test, y_pred),
        "report": classification_report(y_test, y_pred, digits=4,zero_division=0)
    }
    
    # Print
    print(f"\nBest parameters: {grid.best_params_}")
    print(f"Test Accuracy: {acc:.4f}")
    print(f"Macro F1-Score: {f1m:.4f}")
    print(f"CV Macro F1: {cv_scores.mean():.4f} (+/- {cv_scores.std()*2:.4f})")
    print("\nConfusion Matrix:\n", results[name]["confusion_matrix"])
    print("\nClassification Report:\n", results[name]["report"])


Training ANN on Normalized dataset
Fitting 5 folds for each of 54 candidates, totalling 270 fits

Best parameters: {'activation': 'tanh', 'alpha': 1e-05, 'hidden_layer_sizes': (128, 64), 'learning_rate_init': 0.002, 'solver': 'adam'}
Test Accuracy: 0.5969
Macro F1-Score: 0.2925
CV Macro F1: 0.2972 (+/- 0.1187)

Confusion Matrix:
 [[ 0  0  1  1  0  0]
 [ 0  0 10  1  0  0]
 [ 0  0 94 41  1  0]
 [ 0  0 42 81  5  0]
 [ 0  0  1 23 16  0]
 [ 0  0  0  2  1  0]]

Classification Report:
               precision    recall  f1-score   support

           3     0.0000    0.0000    0.0000         2
           4     0.0000    0.0000    0.0000        11
           5     0.6351    0.6912    0.6620       136
           6     0.5436    0.6328    0.5848       128
           7     0.6957    0.4000    0.5079        40
           8     0.0000    0.0000    0.0000         3

    accuracy                         0.5969       320
   macro avg     0.3124    0.2873    0.2925       320
weighted avg     0.5743    

In [25]:
# Convert results dict to DataFrame
summary = pd.DataFrame.from_dict(results, orient='index')

# Select only the columns you want in the table
summary_table = summary[[
    'best_params',
    'accuracy',
    'f1_macro',
    'cv_mean',
    'cv_std'
]]

print("\n=== ANN Results Summary ===")
print(summary_table)


=== ANN Results Summary ===
                                                    best_params  accuracy  \
Normalized    {'activation': 'tanh', 'alpha': 1e-05, 'hidden...  0.596875   
Interactions  {'activation': 'tanh', 'alpha': 1e-05, 'hidden...  0.596875   
PCA           {'activation': 'tanh', 'alpha': 1e-05, 'hidden...  0.596875   

              f1_macro   cv_mean    cv_std  
Normalized    0.292458  0.297194  0.059325  
Interactions  0.292458  0.297194  0.059325  
PCA           0.292458  0.297194  0.059325  
