In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
import glob

In [2]:
# Load datasets
train_data = pd.read_csv("../train_drop_records.csv")
test_data = pd.read_csv("../test_drop_records.csv")

# Define your target column
target_col = 'Addiction_Class'

# Split into features and target
y_train = train_data[target_col]
y_test = test_data[target_col]

# # To collect results
# results = []

# Find all feature list files
feature_files = glob.glob("../feature_selection/*_features.csv")  # e.g., chi2_top5_features.csv, mi_top5_features.csv

In [3]:
def evaluate_and_store(y_true, y_pred, method_name, model_name):
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)

    # Append result as dictionary
    results.append({
        "Feature_Selection_Method": method_name,
        "Model": model_name,
        "Accuracy": acc,
        "Precision": prec,
        "Recall": rec,
        "F1_Score": f1
    })

In [4]:
def train_and_evaluate(method_name, selected_features):
    # Select features
    X_train = train_data[selected_features]
    X_test = test_data[selected_features]

    print(f"\n================== {method_name.upper()} ==================")

    # Logistic Regression
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    log_model = LogisticRegression(max_iter=1000, class_weight='balanced')  # Balanced in case of imbalance
    log_model.fit(X_train_scaled, y_train)
    y_pred_log = log_model.predict(X_test_scaled)
    evaluate_and_store(y_test, y_pred_log, method_name, "Logistic Regression")
    print("Logistic Regression Report:")
    print(classification_report(y_test, y_pred_log, zero_division=0))

    # XGBoost
    xgb_model = XGBClassifier(eval_metric='logloss')
    xgb_model.fit(X_train, y_train)
    y_pred_xgb = xgb_model.predict(X_test)
    evaluate_and_store(y_test, y_pred_xgb, method_name, "XGBoost")
    print("XGBoost Report:")
    print(classification_report(y_test, y_pred_xgb, zero_division=0))

In [5]:
results = []  # <-- Reset the results list at the very beginning

# Loop through each feature selection method
for file in feature_files:
    # method_name = file.split("\\")[1].split("_")[0]
    method_name = file.split("_")[1].split("\\")[1]
    feature_df = pd.read_csv(file)
    selected_features = feature_df['Feature'].tolist()
    train_and_evaluate(method_name, selected_features)

# Save all results into a CSV
results_df = pd.DataFrame(results)
results_df.to_csv("model_evaluation_results.csv", index=False)

print("✅ All model results saved to 'model_evaluation_results.csv'!")


Logistic Regression Report:
              precision    recall  f1-score   support

           0       0.79      0.72      0.76      5987
           1       0.20      0.27      0.23      1529

    accuracy                           0.63      7516
   macro avg       0.50      0.50      0.49      7516
weighted avg       0.67      0.63      0.65      7516

XGBoost Report:
              precision    recall  f1-score   support

           0       0.80      1.00      0.89      5987
           1       0.00      0.00      0.00      1529

    accuracy                           0.80      7516
   macro avg       0.40      0.50      0.44      7516
weighted avg       0.63      0.80      0.71      7516


Logistic Regression Report:
              precision    recall  f1-score   support

           0       0.80      0.78      0.79      5987
           1       0.21      0.23      0.22      1529

    accuracy                           0.67      7516
   macro avg       0.51      0.51      0.50      7516
