In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from types import SimpleNamespace

def load_dry_bean_data(path="data/Dry_Bean.csv"):

    # Read data file
    data_file = pd.read_csv(path)
    data_file.columns = data_file.columns.str.strip()

    # define features and target
    X = data_file.drop("Class", axis=1)
    y = data_file["Class"]

    # Encoding labels
    encoder = LabelEncoder()
    y_encoded = encoder.fit_transform(y)

    # Return SimpleNamespace
    return SimpleNamespace(
        data=X.to_numpy(dtype=np.float32),
        target=y_encoded,
        feature_names=X.columns.tolist(),
        target_names=encoder.classes_,
        label_encoder=encoder
    )

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load dataset
data = load_dry_bean_data()
X, y = data.data, data.target

# Split the data (80% train, 20% test)
train_X, test_X, train_y, test_y = train_test_split(
    X, y, test_size=0.2, random_state=5, stratify=y
)

# Feature Scaling
scaler = StandardScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.transform(test_X)


# Multiclass Logistic Regression
from sklearn.linear_model import LogisticRegression
logistic_regression_model = LogisticRegression(max_iter=2000, solver="lbfgs")
logistic_regression_model.fit(train_X, train_y)

# Decision Tree
from sklearn.tree import DecisionTreeClassifier
decision_tree_model = DecisionTreeClassifier(random_state=5)
decision_tree_model.fit(train_X, train_y)

# KNN
from sklearn.neighbors import KNeighborsClassifier
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(train_X, train_y)

# Naive Bayes
from sklearn.naive_bayes import GaussianNB
naive_bayes_model = GaussianNB()
naive_bayes_model.fit(train_X, train_y)

# Random Forest
from sklearn.ensemble import RandomForestClassifier
random_forest_model = RandomForestClassifier(n_estimators=100, random_state=5)
random_forest_model.fit(train_X, train_y)

# XGBoost
from xgboost import XGBClassifier
xgboost_model = XGBClassifier(
    objective="multi:softprob",
    num_class=len(data.target_names),
    eval_metric="mlogloss"
)
xgboost_model.fit(train_X, train_y)


from sklearn.metrics import (
    accuracy_score, roc_auc_score, precision_score, 
    recall_score, f1_score, matthews_corrcoef
)

# List of models and their names
models = {
    "Logistic Regression": logistic_regression_model,
    "Decision Tree": decision_tree_model,
    "KNN": knn_model,
    "Naive Bayes": naive_bayes_model,
    "Random Forest": random_forest_model,
    "XGBoost": xgboost_model
}

results = []

for name, model in models.items():
    # Predictions
    y_pred = model.predict(test_X)
    
    # Probability predictions
    y_prob = model.predict_proba(test_X)
    
    # Metrics calculations
    metrics = {
        "ML Model name": name,
        "Accuracy": accuracy_score(test_y, y_pred),
        "AUC": roc_auc_score(test_y, y_prob, multi_class='ovr', average='macro'),
        "Precision": precision_score(test_y, y_pred, average='macro'),
        "Recall": recall_score(test_y, y_pred, average='macro'),
        "F1": f1_score(test_y, y_pred, average='macro'),
        "MCC": matthews_corrcoef(test_y, y_pred)
    }
    results.append(metrics)

data_frame_results = pd.DataFrame(results)
print(data_frame_results)
