In [73]:
import numpy as np
import pandas as pd
import numpy.typing as npt
from sklearn import metrics
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
import json
import pickle

In [74]:
# defining constants
TEST_SIZE: np.float_ = .3
RANDOM_STATE: np.int_ = 42

In [75]:
# importing and handling data
data: pd.DataFrame = pd.read_csv("data/input/data_tp1", header=None)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [76]:
# helper function to apply different parameters

def MNIST_mpl(hidden_layer_sizes: np.int_, gradient_type: str, learning_rate: np.float_) -> None:
    clf = MLPClassifier(hidden_layer_sizes=25, activation="logistic", learning_rate_init=0.01)

    input_data: npt.NDArray[np.int_] = data.iloc[:, data.columns != 0].to_numpy()
    labels: npt.NDArray[np.int_] = data.iloc[:, 0].to_numpy()

    X_train, X_test, y_train, y_test = train_test_split(input_data, labels, test_size=TEST_SIZE, random_state=RANDOM_STATE)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy_score: npt.NDArray[np.float_] = metrics.accuracy_score(y_test, y_pred)
    f1_score: npt.NDArray[np.float_] = metrics.f1_score(y_test, y_pred, average=None)
    cf = metrics.classification_report(y_test, y_pred, output_dict=True)

    run_info = {
        'accuracy': cf,
        'f1_score': f1_score.tolist(),
        'random_state_seed': RANDOM_STATE,
        "test_size": TEST_SIZE
    }

    with open(f"data/results/run_info.json", "w") as file:
        json.dump(run_info, file, indent=4)

In [77]:
MNIST_mpl(25, "s", 0.01)