In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import (
    accuracy_score,
    cohen_kappa_score,
    mean_absolute_error,
    mean_squared_error,
    classification_report,
    confusion_matrix,
)

train_df = pd.read_csv('training-password-data.csv')
test_df  = pd.read_csv('testing-password-data.csv')

X_train = train_df.drop(columns=['strength'])
y_train = train_df['strength'].astype(str)
X_test  = test_df.drop(columns=['strength'])
y_test  = test_df['strength'].astype(str)

X_train = pd.get_dummies(X_train)
X_test  = pd.get_dummies(X_test).reindex(columns=X_train.columns, fill_value=0)

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

n_inputs  = X_train_scaled.shape[1]
n_outputs = y_train.nunique()
hidden_units = (n_inputs + n_outputs) // 2

mlp = MLPClassifier(
    hidden_layer_sizes=(hidden_units,),
    activation='logistic',
    solver='sgd',
    learning_rate='constant',
    learning_rate_init=0.3,
    momentum=0.2,
    batch_size=100,
    max_iter=500,
    tol=0.0,
    early_stopping=False,
    alpha=0.0,
    shuffle=True,
    random_state=0,
)

mlp.fit(X_train_scaled, y_train)

y_pred = mlp.predict(X_test_scaled)

le = LabelEncoder().fit(y_train)
y_true_int = le.transform(y_test)
y_pred_int = le.transform(y_pred)



In [7]:
total      = len(y_test)
correct    = (y_pred == y_test).sum()
incorrect  = total - correct
pct_corr   = correct   / total * 100
pct_incorr = incorrect / total * 100

y_true_int = le.transform(y_test)
y_pred_int = le.transform(y_pred)

kappa = cohen_kappa_score(y_test, y_pred)
mae   = mean_absolute_error(y_true_int, y_pred_int)

mse   = mean_squared_error(y_true_int, y_pred_int)
rmse  = np.sqrt(mse)

mean_true = np.mean(y_true_int)
rae  = mae  / np.mean(np.abs(y_true_int - mean_true))
rrse = rmse / np.sqrt(np.mean((y_true_int - mean_true)**2))

print("=== Summary ===")
print(f"Correctly Classified Instances   {correct}   {pct_corr:.4f} %")
print(f"Incorrectly Classified Instances {incorrect}   {pct_incorr:.4f} %")
print(f"Kappa statistic                  {kappa:.3f}")
print(f"Mean absolute error              {mae:.4f}")
print(f"Root mean squared error          {rmse:.4f}")
print(f"Relative absolute error          {rae:.4f}")
print(f"Root relative squared error      {rrse:.4f}")
print(f"Total Number of Instances        {total}")

print("\n=== Detailed Accuracy By Class ===")
print(classification_report(y_test, y_pred, digits=4))

print("=== Confusion Matrix ===")
cm = confusion_matrix(y_test, y_pred, labels=le.classes_)
print(pd.DataFrame(cm, index=[f"actual={c}" for c in le.classes_],
                      columns=[f"pred={c}" for c in le.classes_]))


=== Summary ===
Correctly Classified Instances   531315   99.9708 %
Incorrectly Classified Instances 155   0.0292 %
Kappa statistic                  0.999
Mean absolute error              0.0003
Root mean squared error          0.0171
Relative absolute error          0.0011
Root relative squared error      0.0339
Total Number of Instances        531470

=== Detailed Accuracy By Class ===
              precision    recall  f1-score   support

           0     0.9993    0.9990    0.9992     71711
           1     0.9998    0.9998    0.9998    396208
           2     0.9995    1.0000    0.9997     63551

    accuracy                         0.9997    531470
   macro avg     0.9995    0.9996    0.9996    531470
weighted avg     0.9997    0.9997    0.9997    531470

=== Confusion Matrix ===
          pred=0  pred=1  pred=2
actual=0   71641      70       0
actual=1      50  396126      32
actual=2       0       3   63548
