In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import cohen_kappa_score, mean_absolute_error, mean_squared_error
from sklearn.metrics import classification_report, confusion_matrix

train_df = pd.read_csv('training-password-data.csv')
test_df  = pd.read_csv('testing-password-data.csv')

X_train = train_df.drop(columns=['strength'])
y_train = train_df['strength'].astype(str)
X_test  = test_df.drop(columns=['strength'])
y_test  = test_df['strength'].astype(str)

X_train = pd.get_dummies(X_train)
X_test  = pd.get_dummies(X_test).reindex(columns=X_train.columns, fill_value=0)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

le = LabelEncoder().fit(y_train)

logreg = LogisticRegression(
    penalty='l2',
    C=1e8,
    solver='lbfgs',
    multi_class='multinomial',
    max_iter=1000,
    random_state=0
)
logreg.fit(X_train_scaled, y_train)

y_pred = logreg.predict(X_test_scaled)




In [3]:
total      = len(y_test)
correct    = (y_pred == y_test).sum()
incorrect  = total - correct
pct_corr   = correct   / total * 100
pct_incorr = incorrect / total * 100

y_true_int = le.transform(y_test)
y_pred_int = le.transform(y_pred)

kappa = cohen_kappa_score(y_test, y_pred)
mae   = mean_absolute_error(y_true_int, y_pred_int)
rmse  = np.sqrt(mean_squared_error(y_true_int, y_pred_int))

mean_true = np.mean(y_true_int)
rae  = mae  / np.mean(np.abs(y_true_int - mean_true))
rrse = rmse / np.sqrt(np.mean((y_true_int - mean_true)**2))

print("=== Summary ===")
print(f"Correctly Classified Instances   {correct}   {pct_corr:.4f} %")
print(f"Incorrectly Classified Instances {incorrect}   {pct_incorr:.4f} %")
print(f"Kappa statistic                  {kappa:.3f}")
print(f"Mean absolute error              {mae:.4f}")
print(f"Root mean squared error          {rmse:.4f}")
print(f"Relative absolute error          {rae:.4f}")
print(f"Root relative squared error      {rrse:.4f}")
print(f"Total Number of Instances        {total}")

print("\n=== Detailed Accuracy By Class ===")
print(classification_report(y_test, y_pred, digits=4))

print("=== Confusion Matrix ===")
cm = confusion_matrix(y_test, y_pred, labels=le.classes_)
print(pd.DataFrame(
    cm,
    index=[f"actual={c}" for c in le.classes_],
    columns=[f"pred={c}" for c in le.classes_]
))


=== Summary ===
Correctly Classified Instances   531310   99.9699 %
Incorrectly Classified Instances 160   0.0301 %
Kappa statistic                  0.999
Mean absolute error              0.0003
Root mean squared error          0.0174
Relative absolute error          0.0011
Root relative squared error      0.0344
Total Number of Instances        531470

=== Detailed Accuracy By Class ===
              precision    recall  f1-score   support

           0     0.9993    0.9990    0.9991     71711
           1     0.9998    0.9998    0.9998    396208
           2     0.9995    1.0000    0.9997     63551

    accuracy                         0.9997    531470
   macro avg     0.9995    0.9996    0.9996    531470
weighted avg     0.9997    0.9997    0.9997    531470

=== Confusion Matrix ===
          pred=0  pred=1  pred=2
actual=0   71637      74       0
actual=1      51  396125      32
actual=2       0       3   63548
