<a href="https://colab.research.google.com/github/satvemula/ApexLogistics/blob/main/Supply_Chain_Training_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ==============================================================================
# 1. Installation Cell
# Install requested advanced models: LightGBM, XGBoost, CatBoost
# ==============================================================================
!pip install mlflow
!pip install scikit-learn
!pip install lightgbm xgboost catboost



In [None]:
# ==============================================================================
# 2. IMPORTS (Final Update)
# Ensure all necessary libraries for preprocessing and modeling are imported.
# ==============================================================================
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, PolynomialFeatures
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.exceptions import NotFittedError
from sklearn.impute import SimpleImputer
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA # Kept for completeness, though not used in the final flow

# Requested Scikit-learn Models (Classification)
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# NEW: Missing Models
from sklearn.svm import SVC # Support Vector Classifier
from sklearn.naive_bayes import GaussianNB # Gaussian Naive Bayes
from sklearn.neighbors import KNeighborsClassifier # k-Nearest Neighbors

# Boosting Models
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

# MLflow
import mlflow
import mlflow.sklearn

In [None]:
# ==============================================================================
# 2. CONFIGURE DATABRICKS MLFLOW TRACKING (FIXED)
# ==============================================================================
import os
import mlflow
# FIX: Import userdata to access secrets in Colab
from google.colab import userdata

print("="*70)
print("CONFIGURING DATABRICKS MLFLOW")
print("="*70)

# Define experiment path explicitly
EXPERIMENT_NAME = "/Users/svemulak@asu.edu/Attempt_6_Supply-Chain-Classification-Experiment-1"

try:
    # Attempt to load secrets
    DATABRICKS_HOST = userdata.get('DATABRICKS_HOST')
    DATABRICKS_TOKEN = userdata.get('DATABRICKS_TOKEN')

    os.environ["DATABRICKS_HOST"] = DATABRICKS_HOST
    os.environ["DATABRICKS_TOKEN"] = DATABRICKS_TOKEN

    mlflow.set_tracking_uri("databricks")
    mlflow.set_experiment(EXPERIMENT_NAME)

    print(f"‚úì Using Databricks experiment: {EXPERIMENT_NAME}")
    print(f"‚úì Databricks workspace: {DATABRICKS_HOST}")

except NameError:
    print("‚ö† Error: 'userdata' not found. Ensure you are running in Colab and have imported it.")
    print("  Continuing with local MLflow tracking...")
    mlflow.set_tracking_uri("file:./mlruns")
except Exception as e:
    print(f"‚ö† Warning: Could not connect to Databricks: {e}")
    print("  Continuing with local MLflow tracking...")
    mlflow.set_tracking_uri("file:./mlruns")

CONFIGURING DATABRICKS MLFLOW


2025/12/04 22:36:21 INFO mlflow.tracking.fluent: Experiment with name '/Users/svemulak@asu.edu/Attempt_6_Supply-Chain-Classification-Experiment-1' does not exist. Creating a new experiment.


‚úì Using Databricks experiment: /Users/svemulak@asu.edu/Attempt_6_Supply-Chain-Classification-Experiment-1
‚úì Databricks workspace: https://dbc-a0c89f71-7936.cloud.databricks.com


In [None]:
# ==============================================================================
# 3. LOAD CSV FROM MY COMPUTER (KEPT UNCHANGED)
# ==============================================================================
print("\n" + "="*70)
print("LOADING DATASET")
print("="*70)

from google.colab import files
import io
uploaded = files.upload()
df = pd.read_csv(io.BytesIO(uploaded[list(uploaded.keys())[0]]))

print(f"\n‚úì Dataset loaded successfully!")
print(f"  Shape: {df.shape}")
print(f"  Columns: {list(df.columns)}")


LOADING DATASET


Saving c2k_data_comma.csv to c2k_data_comma (6).csv

‚úì Dataset loaded successfully!
  Shape: (3943, 98)
  Columns: ['nr', 'i1_legid', 'i1_rcs_p', 'i1_rcs_e', 'i1_dep_1_p', 'i1_dep_1_e', 'i1_dep_1_place', 'i1_rcf_1_p', 'i1_rcf_1_e', 'i1_rcf_1_place', 'i1_dep_2_p', 'i1_dep_2_e', 'i1_dep_2_place', 'i1_rcf_2_p', 'i1_rcf_2_e', 'i1_rcf_2_place', 'i1_dep_3_p', 'i1_dep_3_e', 'i1_dep_3_place', 'i1_rcf_3_p', 'i1_rcf_3_e', 'i1_rcf_3_place', 'i1_dlv_p', 'i1_dlv_e', 'i1_hops', 'i2_legid', 'i2_rcs_p', 'i2_rcs_e', 'i2_dep_1_p', 'i2_dep_1_e', 'i2_dep_1_place', 'i2_rcf_1_p', 'i2_rcf_1_e', 'i2_rcf_1_place', 'i2_dep_2_p', 'i2_dep_2_e', 'i2_dep_2_place', 'i2_rcf_2_p', 'i2_rcf_2_e', 'i2_rcf_2_place', 'i2_dep_3_p', 'i2_dep_3_e', 'i2_dep_3_place', 'i2_rcf_3_p', 'i2_rcf_3_e', 'i2_rcf_3_place', 'i2_dlv_p', 'i2_dlv_e', 'i2_hops', 'i3_legid', 'i3_rcs_p', 'i3_rcs_e', 'i3_dep_1_p', 'i3_dep_1_e', 'i3_dep_1_place', 'i3_rcf_1_p', 'i3_rcf_1_e', 'i3_rcf_1_place', 'i3_dep_2_p', 'i3_dep_2_e', 'i3_dep_2_place', 'i3_rcf_

In [None]:
# ==============================================================================
# 5. PREPROCESSING (Final Corrected Code Block)
# FIX APPLIED: Feature name cleanup added to resolve CatBoost/XGBoost errors.
# ==============================================================================
print("\n" + "="*70)
print("DATA PREPROCESSING")
print("="*70)

print(f"\nOriginal dataset size: {df.shape[0]} rows")

# 1. Replace '?' and Drop 100% Missing Columns
df = df.replace('?', np.nan)
print("‚úì Replaced '?' with NaN.")

df_cols_before = df.shape[1]
df = df.dropna(axis=1, how='all')
print(f"‚úì Dropped {df_cols_before - df.shape[1]} columns that were 100% NaN.")

# 2. Target Definition and Integrity Check
TARGET_COLUMN = 'legs'

if TARGET_COLUMN not in df.columns:
    print(f"\n--- FATAL ERROR: Target column '{TARGET_COLUMN}' not found in the DataFrame. ---")
else:
    # --- TARGET INTEGRITY FIX ---
    initial_rows = len(df)
    df = df.dropna(subset=[TARGET_COLUMN])
    print(f"‚úì Dropped {initial_rows - len(df)} rows with missing target '{TARGET_COLUMN}'.")
    # --- END OF TARGET INTEGRITY FIX ---

    # 3. Handle Categorical Columns and Encoding
    label_encoders = {}
    categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
    if TARGET_COLUMN in categorical_cols:
        categorical_cols.remove(TARGET_COLUMN)

    print(f"\nEncoding {len(categorical_cols)} feature columns...")
    for col in categorical_cols:
        df[col] = df[col].fillna('_IMPUTE_CATEGORY_')
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le
        print(f"  ‚úì Encoded: {col}")

    # Encode the Target Column (y) separately
    print(f"\nEncoding target column: {TARGET_COLUMN}")
    target_encoder = LabelEncoder()
    df[TARGET_COLUMN] = df[TARGET_COLUMN].astype(int)
    df[TARGET_COLUMN] = target_encoder.fit_transform(df[TARGET_COLUMN])
    label_encoders[TARGET_COLUMN] = target_encoder
    print(f"  ‚úì Encoded: {TARGET_COLUMN}")

    # 4. Final Split into X and y
    X = df.drop(TARGET_COLUMN, axis=1)
    y = df[TARGET_COLUMN]

    # --- REVISED DATA LEAKAGE FIX ---
    LEAKY_COLUMNS_PATTERNS = ['i1_hops', 'i2_hops', 'o_hops', 'nr', 'i2_dlv_p', 'i2_dlv_e']
    i3_cols = [col for col in X.columns if col.startswith('i3_')]
    LEAKY_COLUMNS_TO_DROP = set(LEAKY_COLUMNS_PATTERNS + i3_cols)

    X = X.drop(columns=[col for col in LEAKY_COLUMNS_TO_DROP if col in X.columns], errors='ignore')
    print(f"\nRemoved potential leaky columns: {LEAKY_COLUMNS_TO_DROP}")
    print(f"New X shape after removing leak: {X.shape}")
    # --- END OF DATA LEAKAGE FIX ---

    # 5. Impute Remaining NaNs (For numerical features)
    print("\nImputing remaining NaNs using median strategy...")
    imputer = SimpleImputer(strategy='median')
    X_imputed = imputer.fit_transform(X)
    X = pd.DataFrame(X_imputed, columns=X.columns, index=X.index)
    print("  ‚úì Imputation completed.")

    # 6. Feature Scaling (Crucial for Logistic Regression convergence)
    print("\nScaling numerical features...")
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X = pd.DataFrame(X_scaled, columns=X.columns, index=X.index)
    print("  ‚úì Scaling completed.")
    # --- END OF SCALING ---

    # ==========================================================================
    # 6.5 FEATURE ENGINEERING (Targeted Interaction Terms + Selection)
    # ==========================================================================
    print("\nCreating 2nd-degree Polynomial/Interaction Features (Targeted)...")

    # 1. Generate ALL interaction features (temporarily creates ~4000 features)
    poly = PolynomialFeatures(degree=2, include_bias=False, interaction_only=True)
    X_poly = poly.fit_transform(X)
    poly_feature_names = poly.get_feature_names_out(X.columns)

    # Create a DataFrame of just the new interaction terms
    X_interaction_only = pd.DataFrame(X_poly, columns=poly_feature_names, index=X.index)

    # 2. Select only the top 200 interaction features based on absolute correlation with the target
    correlation = X_interaction_only.apply(lambda col: np.abs(col.corr(y)))
    top_interaction_features = correlation.nlargest(200).index

    # 3. Update X to include only the original features plus the top 200 selected interaction features
    X = pd.concat([X, X_interaction_only[top_interaction_features]], axis=1)

    print(f"  ‚úì Added top 200 interaction features.")

    # ==========================================================================
    # 6.6 FINAL FEATURE CLEANUP (Fixes Boosting Model Errors)
    # ==========================================================================
    print("\nCleaning up feature names for model compatibility...")

    # 1. Remove duplicate columns (required for CatBoost/LightGBM fix)
    X = X.loc[:,~X.columns.duplicated()]

    # 2. Rename columns to simple strings (required for XGBoost fix)
    X.columns = [f'f_{i}' for i in range(X.shape[1])]

    print(f"  ‚úì Cleanup completed. Final unique feature count: {X.shape[1]}")
    # ==========================================================================

    print(f"\nTarget variable distribution (Encoded):")
    print(y.value_counts())

    # 7. Train/Test Split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    print(f"\n‚úì Train/test split completed:")
    print(f"  X_train shape: {X_train.shape}")
    print(f"  X_test shape: {X_test.shape}")
    print(f"  y_train shape: {y_train.shape}")
    print(f"  y_test shape: {y_test.shape}")


DATA PREPROCESSING

Original dataset size: 3943 rows
‚úì Replaced '?' with NaN.
‚úì Dropped 0 columns that were 100% NaN.
‚úì Dropped 1 rows with missing target 'legs'.

Encoding 72 feature columns...
  ‚úì Encoded: i1_dep_2_p
  ‚úì Encoded: i1_dep_2_e
  ‚úì Encoded: i1_dep_2_place
  ‚úì Encoded: i1_rcf_2_p
  ‚úì Encoded: i1_rcf_2_e
  ‚úì Encoded: i1_rcf_2_place
  ‚úì Encoded: i1_dep_3_p
  ‚úì Encoded: i1_dep_3_e
  ‚úì Encoded: i1_dep_3_place
  ‚úì Encoded: i1_rcf_3_p
  ‚úì Encoded: i1_rcf_3_e
  ‚úì Encoded: i1_rcf_3_place
  ‚úì Encoded: i2_legid
  ‚úì Encoded: i2_rcs_p
  ‚úì Encoded: i2_rcs_e
  ‚úì Encoded: i2_dep_1_p
  ‚úì Encoded: i2_dep_1_e
  ‚úì Encoded: i2_dep_1_place
  ‚úì Encoded: i2_rcf_1_p
  ‚úì Encoded: i2_rcf_1_e
  ‚úì Encoded: i2_rcf_1_place
  ‚úì Encoded: i2_dep_2_p
  ‚úì Encoded: i2_dep_2_e
  ‚úì Encoded: i2_dep_2_place
  ‚úì Encoded: i2_rcf_2_p
  ‚úì Encoded: i2_rcf_2_e
  ‚úì Encoded: i2_rcf_2_place
  ‚úì Encoded: i2_dep_3_p
  ‚úì Encoded: i2_dep_3_e
  ‚úì Encoded: i2_

In [None]:
# ==============================================================================
# 3. MODEL DEFINITION (Expanded Search Space)
# ==============================================================================

# Define a dictionary of models to train
models = {
    # LOGISTIC REGRESSION (Expanded)
    'Logistic_Regression_default': LogisticRegression(random_state=42, max_iter=1000),
    'Logistic_Regression_l1_liblinear': LogisticRegression(random_state=42, penalty='l1', solver='liblinear', max_iter=1000),
    'Logistic_Regression_C10': LogisticRegression(random_state=42, C=10, max_iter=1000),
    'Logistic_Regression_C01': LogisticRegression(random_state=42, C=0.1, max_iter=1000), # NEW C value
    'Logistic_Regression_l2_lbfgs': LogisticRegression(random_state=42, penalty='l2', solver='lbfgs', max_iter=1000), # NEW penalty/solver combo
    'Logistic_Regression_C10_l1': LogisticRegression(random_state=42, C=10, penalty='l1', solver='liblinear', max_iter=1000), # NEW combination

    # DECISION TREES (Expanded)
    'Decision_Tree_default': DecisionTreeClassifier(random_state=42),
    'Decision_Tree_depth10': DecisionTreeClassifier(random_state=42, max_depth=10),
    'Decision_Tree_min_samples_10': DecisionTreeClassifier(random_state=42, min_samples_leaf=10),
    'Decision_Tree_depth5': DecisionTreeClassifier(random_state=42, max_depth=5), # NEW depth
    'Decision_Tree_min_samples_5': DecisionTreeClassifier(random_state=42, min_samples_leaf=5), # NEW min_samples_leaf
    'Decision_Tree_entropy': DecisionTreeClassifier(random_state=42, criterion='entropy'), # NEW criterion

    # RANDOM FOREST (Expanded)
    'Random_Forest_default': RandomForestClassifier(random_state=42, n_estimators=100),
    'Random_Forest_150trees': RandomForestClassifier(random_state=42, n_estimators=150),
    'Random_Forest_depth10': RandomForestClassifier(random_state=42, max_depth=10),
    'Random_Forest_200trees': RandomForestClassifier(random_state=42, n_estimators=200), # NEW n_estimators
    'Random_Forest_depth5': RandomForestClassifier(random_state=42, max_depth=5), # NEW max_depth
    'Random_Forest_min_samples_5': RandomForestClassifier(random_state=42, min_samples_leaf=5), # NEW min_samples_leaf

    # LIGHTGBM (Already good with 3+ hyperparameters and multiple values)
    'LightGBM_default': LGBMClassifier(random_state=42, verbose=-1),
    'LightGBM_lr01': LGBMClassifier(random_state=42, learning_rate=0.01, verbose=-1),
    'LightGBM_depth5': LGBMClassifier(random_state=42, max_depth=5, verbose=-1),
    'LightGBM_n500': LGBMClassifier(random_state=42, n_estimators=500, verbose=-1),

    # XGBOOST (Expanded)
    'XGBoost_default': XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss'),
    'XGBoost_lr01': XGBClassifier(random_state=42, learning_rate=0.01, use_label_encoder=False, eval_metric='mlogloss'),
    'XGBoost_depth5': XGBClassifier(random_state=42, max_depth=5, use_label_encoder=False, eval_metric='mlogloss'),
    'XGBoost_n200': XGBClassifier(random_state=42, n_estimators=200, use_label_encoder=False, eval_metric='mlogloss'), # NEW n_estimators
    'XGBoost_lr005': XGBClassifier(random_state=42, learning_rate=0.05, use_label_encoder=False, eval_metric='mlogloss'), # NEW learning_rate
    'XGBoost_depth3': XGBClassifier(random_state=42, max_depth=3, use_label_encoder=False, eval_metric='mlogloss'), # NEW max_depth

    # CATBOOST (Already good with 3+ hyperparameters and multiple values)
    'CatBoost_default': CatBoostClassifier(random_state=42, verbose=0),
    'CatBoost_lr01': CatBoostClassifier(random_state=42, verbose=0, learning_rate=0.01),
    'CatBoost_depth5': CatBoostClassifier(random_state=42, verbose=0, depth=5),
    'CatBoost_n500': CatBoostClassifier(random_state=42, verbose=0, n_estimators=500),
    'CatBoost_lr005': CatBoostClassifier(random_state=42, verbose=0, learning_rate=0.05),

    # NEW MODEL TYPE: MULTILAYER PERCEPTRON (MLP/Neural Network) (Expanded)
    'MLP_default': MLPClassifier(random_state=42, max_iter=500, verbose=False),
    'MLP_hidden50': MLPClassifier(random_state=42, max_iter=500, hidden_layer_sizes=(50, ), verbose=False),
    'MLP_hidden100_50': MLPClassifier(random_state=42, max_iter=500, hidden_layer_sizes=(100, 50), verbose=False), # NEW hidden_layer_sizes
    'MLP_tanh': MLPClassifier(random_state=42, max_iter=500, activation='tanh', verbose=False), # NEW activation
    'MLP_sgd': MLPClassifier(random_state=42, max_iter=500, solver='sgd', verbose=False), # NEW solver

    # NEW MODEL TYPE: SUPPORT VECTOR MACHINE (SVC) (Already good with 3+ hyperparameters and multiple values)
    'SVC_rbf_C1': SVC(random_state=42, C=1.0, kernel='rbf', probability=True),
    'SVC_linear_C01': SVC(random_state=42, C=0.1, kernel='linear', probability=True),
    'SVC_poly_C10': SVC(random_state=42, C=10.0, kernel='poly', degree=3, probability=True),
    'SVC_rbf_gamma_auto': SVC(random_state=42, C=1.0, kernel='rbf', gamma='auto', probability=True),
    'SVC_linear_C1_gamma_scale': SVC(random_state=42, C=1.0, kernel='linear', gamma='scale', probability=True),

    # NEW MODEL TYPE: GAUSSIAN NAIVE BAYES (Main hyperparameter var_smoothing already has multiple values)
    'GaussianNB_default': GaussianNB(),
    'GaussianNB_var_smooth_1e8': GaussianNB(var_smoothing=1e-8),
    'GaussianNB_var_smooth_1e7': GaussianNB(var_smoothing=1e-7),
    'GaussianNB_var_smooth_1e6': GaussianNB(var_smoothing=1e-6), # Additional value for var_smoothing

    # NEW MODEL TYPE: K-NEAREST NEIGHBORS (Already good with 3+ hyperparameters and multiple values)
    'KNeighbors_default': KNeighborsClassifier(n_neighbors=5),
    'KNeighbors_n3_uniform': KNeighborsClassifier(n_neighbors=3, weights='uniform'),
    'KNeighbors_n10_distance': KNeighborsClassifier(n_neighbors=10, weights='distance'),
    'KNeighbors_n5_euclidean': KNeighborsClassifier(n_neighbors=5, metric='euclidean'),
    'KNeighbors_n7_manhattan': KNeighborsClassifier(n_neighbors=7, metric='manhattan') # Additional metric
}

print(f"Total models defined: {len(models)}")

Total models defined: 52


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd # Import pandas for results_df
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
import numpy as np # For np.argsort

# ... (Previous model list definitions remain the same) ...

# Initialize a DataFrame to store results for comparison
results_df = pd.DataFrame(columns=['Model', 'Run_ID', 'Accuracy', 'Precision (Macro)', 'Recall (Macro)', 'F1 (Macro)', 'ROC_AUC (OVR)'])

for model_name, model in models.items():
    print(f"\n" + "="*70)
    print(f"Training: {model_name}")
    print(f"="*70)

    with mlflow.start_run(run_name=model_name) as run:
        print(f"üî¨ MLflow Run Started: {run.info.run_id}")

        try:
            # --- Training ---
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)

            # --- Metrics Calculation ---
            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred, average='macro', zero_division=0)
            recall = recall_score(y_test, y_pred, average='macro', zero_division=0)
            f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)

            roc_auc = None
            if hasattr(model, 'predict_proba'):
                y_pred_proba = model.predict_proba(X_test)
                # Handle cases where y_test might have fewer than 2 unique classes for ROC AUC
                if len(np.unique(y_test)) > 1:
                    roc_auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr', average='macro')

            # --- MLflow Logging (Metrics & Params) ---
            mlflow.log_param("model_type", model_name.split('_')[0])
            mlflow.log_metric("accuracy", accuracy)
            mlflow.log_metric("f1_score_macro", f1)
            if roc_auc is not None:
                mlflow.log_metric("roc_auc_ovr_macro", roc_auc)

            # Log all model parameters
            for param_name, param_value in model.get_params().items():
                mlflow.log_param(f"model_{param_name}", param_value)

            # --- ARTIFACT GENERATION & LOGGING ---

            # 1. Confusion Matrix Plot
            fig_cm, ax = plt.subplots(figsize=(8, 6))
            cm = confusion_matrix(y_test, y_pred)
            disp = ConfusionMatrixDisplay(confusion_matrix=cm)
            disp.plot(cmap='Blues', ax=ax)
            plt.title(f'Confusion Matrix: {model_name}')

            # Save locally then log to MLflow
            cm_filename = "confusion_matrix.png"
            plt.savefig(cm_filename)
            mlflow.log_artifact(cm_filename) # Uploads to Databricks/MLflow
            plt.close(fig_cm)
            print(f"  ‚úì Logged artifact: {cm_filename}")

            # 2. Feature Importance Plot (Tree models only)
            if hasattr(model, 'feature_importances_'):
                fig_fi, ax = plt.subplots(figsize=(10, 6))
                importances = model.feature_importances_
                indices = np.argsort(importances)[::-1]
                # Top 20 features
                top_indices = indices[:20]

                plt.bar(range(len(top_indices)), importances[top_indices], align='center')
                plt.xticks(range(len(top_indices)), [X.columns[i] for i in top_indices], rotation=90)
                plt.title(f'Top 20 Feature Importances: {model_name}')
                plt.tight_layout()

                fi_filename = "feature_importance.png"
                plt.savefig(fi_filename)
                mlflow.log_artifact(fi_filename)
                plt.close(fig_fi)
                print(f"  ‚úì Logged artifact: {fi_filename}")

            # --- Log Model ---
            mlflow.sklearn.log_model(model, name="model", input_example=X_train.head(1))

            print(f"‚úì Model logged to Databricks")

            # --- Store results in DataFrame ---
            results_df.loc[len(results_df)] = [
                model_name,
                run.info.run_id,
                accuracy,
                precision,
                recall,
                f1,
                roc_auc
            ]

        except Exception as e:
            print(f"!!! FATAL ERROR: Model {model_name} failed. {e}")

# Sort results by F1 Score to find the best model
results_df = results_df.sort_values(by='F1 (Macro)', ascending=False).reset_index(drop=True)

print("\n" + "="*70)
print("ALL MODEL TRAINING COMPLETE")
print("="*70)
display(results_df)


Training: Logistic_Regression_default
üî¨ MLflow Run Started: 46541d8acee343b8bba06a620ad3eafb
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run Logistic_Regression_default at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/46541d8acee343b8bba06a620ad3eafb
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: Logistic_Regression_l1_liblinear
üî¨ MLflow Run Started: f779ecf6fb20498b87976735e289e5c3
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run Logistic_Regression_l1_liblinear at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/f779ecf6fb20498b87976735e289e5c3
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: Logistic_Regression_C10
üî¨ MLflow Run Started: 7fba67b8f75e496eaea6d96a73c01085
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run Logistic_Regression_C10 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/7fba67b8f75e496eaea6d96a73c01085
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: Logistic_Regression_C01
üî¨ MLflow Run Started: b544572052614503a165384864b3111b
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run Logistic_Regression_C01 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/b544572052614503a165384864b3111b
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: Logistic_Regression_l2_lbfgs
üî¨ MLflow Run Started: a84e7ce6633b4d04b231008d8afa0ec6
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run Logistic_Regression_l2_lbfgs at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/a84e7ce6633b4d04b231008d8afa0ec6
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: Logistic_Regression_C10_l1
üî¨ MLflow Run Started: 4d3a355d1e4b4efb8c785b6eda992290
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run Logistic_Regression_C10_l1 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/4d3a355d1e4b4efb8c785b6eda992290
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: Decision_Tree_default
üî¨ MLflow Run Started: 7ff01aae2c90439ab66871ce97610762
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run Decision_Tree_default at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/7ff01aae2c90439ab66871ce97610762
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: Decision_Tree_depth10
üî¨ MLflow Run Started: 4dff9b8f8f6a4742b50ed7760b049907
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run Decision_Tree_depth10 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/4dff9b8f8f6a4742b50ed7760b049907
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: Decision_Tree_min_samples_10
üî¨ MLflow Run Started: 5e46817d96b24c839653cd0cfc588266
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run Decision_Tree_min_samples_10 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/5e46817d96b24c839653cd0cfc588266
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: Decision_Tree_depth5
üî¨ MLflow Run Started: f6b9a50c6ac7450db7e595521d3d7ea3
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run Decision_Tree_depth5 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/f6b9a50c6ac7450db7e595521d3d7ea3
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: Decision_Tree_min_samples_5
üî¨ MLflow Run Started: 3dbe4bb7c52f497a96663f770baa6975
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run Decision_Tree_min_samples_5 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/3dbe4bb7c52f497a96663f770baa6975
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: Decision_Tree_entropy
üî¨ MLflow Run Started: c2699c42cf01453b93da358ee8cd902c
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run Decision_Tree_entropy at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/c2699c42cf01453b93da358ee8cd902c
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: Random_Forest_default
üî¨ MLflow Run Started: 7db18d60b3bb46dc8b746beff6bc84b9
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run Random_Forest_default at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/7db18d60b3bb46dc8b746beff6bc84b9
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: Random_Forest_150trees
üî¨ MLflow Run Started: 424675382740420b8ddc87d4bb4962c4
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run Random_Forest_150trees at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/424675382740420b8ddc87d4bb4962c4
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: Random_Forest_depth10
üî¨ MLflow Run Started: d2acec2cd3d3425dbea255a97d106122
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run Random_Forest_depth10 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/d2acec2cd3d3425dbea255a97d106122
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: Random_Forest_200trees
üî¨ MLflow Run Started: b785ac60f8154d339f327fa7be5af9e3
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run Random_Forest_200trees at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/b785ac60f8154d339f327fa7be5af9e3
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: Random_Forest_depth5
üî¨ MLflow Run Started: 2492a36223fb4cd4b2517259ccf8eadd
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run Random_Forest_depth5 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/2492a36223fb4cd4b2517259ccf8eadd
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: Random_Forest_min_samples_5
üî¨ MLflow Run Started: db8c122191ad48679aa11149ecf7c9c2
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run Random_Forest_min_samples_5 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/db8c122191ad48679aa11149ecf7c9c2
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: LightGBM_default
üî¨ MLflow Run Started: 943f071f0a054367ae30c0254e6cb2af
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run LightGBM_default at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/943f071f0a054367ae30c0254e6cb2af
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: LightGBM_lr01
üî¨ MLflow Run Started: 894fa24b4013491e831ff40ad2b3a229
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run LightGBM_lr01 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/894fa24b4013491e831ff40ad2b3a229
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: LightGBM_depth5
üî¨ MLflow Run Started: 703c2b08804e42a8945a153021a8d904
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run LightGBM_depth5 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/703c2b08804e42a8945a153021a8d904
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: LightGBM_n500
üî¨ MLflow Run Started: 85effb752d364232851c285927511bbf
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run LightGBM_n500 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/85effb752d364232851c285927511bbf
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: XGBoost_default
üî¨ MLflow Run Started: 7734ee080c584816bacb421ada21b5bf


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run XGBoost_default at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/7734ee080c584816bacb421ada21b5bf
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: XGBoost_lr01
üî¨ MLflow Run Started: 05fd8f6d207c438e810349a6855c2b0b


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run XGBoost_lr01 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/05fd8f6d207c438e810349a6855c2b0b
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: XGBoost_depth5
üî¨ MLflow Run Started: 4910502fb76f4165b03592965c075b80


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run XGBoost_depth5 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/4910502fb76f4165b03592965c075b80
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: XGBoost_n200
üî¨ MLflow Run Started: 6f99fb81127d40108e23bdc652fb8d53


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run XGBoost_n200 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/6f99fb81127d40108e23bdc652fb8d53
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: XGBoost_lr005
üî¨ MLflow Run Started: 28eb6546f7e94cdf84f43013a77c2aa7


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run XGBoost_lr005 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/28eb6546f7e94cdf84f43013a77c2aa7
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: XGBoost_depth3
üî¨ MLflow Run Started: 82421e4dd74b486aa6dca8407fde4b36


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png




‚úì Model logged to Databricks
üèÉ View run XGBoost_depth3 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/82421e4dd74b486aa6dca8407fde4b36
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: CatBoost_default
üî¨ MLflow Run Started: ac35cbea4eb14bbcbbe25abac2d1d1bd
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png
‚úì Model logged to Databricks
üèÉ View run CatBoost_default at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/ac35cbea4eb14bbcbbe25abac2d1d1bd
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: CatBoost_lr01
üî¨ MLflow Run Started: af1a715ff89a4c55946efc2238984625
  ‚úì Logged artifact: confusion_matrix.png
  ‚úì Logged artifact: feature_importance.png
‚úì Model logged to Databricks
üèÉ View run CatBoost_lr01 at: https://dbc-a0c8



‚úì Model logged to Databricks
üèÉ View run MLP_default at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/6a811e7ace00485bb2b32cc814eea91e
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: MLP_hidden50
üî¨ MLflow Run Started: 8a1ed1386382482884eb58db9769bb74
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run MLP_hidden50 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/8a1ed1386382482884eb58db9769bb74
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: MLP_hidden100_50
üî¨ MLflow Run Started: f76a86cf55c7418e8ea9c91e942416a0
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run MLP_hidden100_50 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/f76a86cf55c7418e8ea9c91e942416a0
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: MLP_tanh
üî¨ MLflow Run Started: b530d3168d1a4d368b889253e44ef0af
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run MLP_tanh at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/b530d3168d1a4d368b889253e44ef0af
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: MLP_sgd
üî¨ MLflow Run Started: 32385eac59a7499abd7913477f171b12




  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run MLP_sgd at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/32385eac59a7499abd7913477f171b12
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: SVC_rbf_C1
üî¨ MLflow Run Started: cb2e4aeb70474ceea76133d4334c9800
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run SVC_rbf_C1 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/cb2e4aeb70474ceea76133d4334c9800
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: SVC_linear_C01
üî¨ MLflow Run Started: 0746fe523029428e92d5cd785fda9b7f
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run SVC_linear_C01 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/0746fe523029428e92d5cd785fda9b7f
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: SVC_poly_C10
üî¨ MLflow Run Started: b3472d7eadd24c3c80e374d64067a8e5
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run SVC_poly_C10 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/b3472d7eadd24c3c80e374d64067a8e5
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: SVC_rbf_gamma_auto
üî¨ MLflow Run Started: a09eb800a5d04b678a53e9fd115fa385
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run SVC_rbf_gamma_auto at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/a09eb800a5d04b678a53e9fd115fa385
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: SVC_linear_C1_gamma_scale
üî¨ MLflow Run Started: 8bf9e81ff1614498965a68a268b6a4a6
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run SVC_linear_C1_gamma_scale at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/8bf9e81ff1614498965a68a268b6a4a6
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: GaussianNB_default
üî¨ MLflow Run Started: 510fcc90a79a47b196596e6846386780
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run GaussianNB_default at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/510fcc90a79a47b196596e6846386780
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: GaussianNB_var_smooth_1e8
üî¨ MLflow Run Started: 2c63a5f0295045378cb04659a5f1e00f
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run GaussianNB_var_smooth_1e8 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/2c63a5f0295045378cb04659a5f1e00f
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: GaussianNB_var_smooth_1e7
üî¨ MLflow Run Started: 436c7dda28784a739ed920f6f3c2f348
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run GaussianNB_var_smooth_1e7 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/436c7dda28784a739ed920f6f3c2f348
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: GaussianNB_var_smooth_1e6
üî¨ MLflow Run Started: 7b9eea29e9824847b5e57b70f73e956b
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run GaussianNB_var_smooth_1e6 at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/7b9eea29e9824847b5e57b70f73e956b
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: KNeighbors_default
üî¨ MLflow Run Started: 3beeafb0781b460dbc71e19de16da3ae
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run KNeighbors_default at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/3beeafb0781b460dbc71e19de16da3ae
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: KNeighbors_n3_uniform
üî¨ MLflow Run Started: 98ef27dcc5c8414b95d50bfc05b04ea8
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run KNeighbors_n3_uniform at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/98ef27dcc5c8414b95d50bfc05b04ea8
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: KNeighbors_n10_distance
üî¨ MLflow Run Started: 90f64054b47e48bcb505de71f7722726
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run KNeighbors_n10_distance at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/90f64054b47e48bcb505de71f7722726
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: KNeighbors_n5_euclidean
üî¨ MLflow Run Started: 4149ec9c0942457486407e25cb0d2068
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run KNeighbors_n5_euclidean at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/4149ec9c0942457486407e25cb0d2068
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

Training: KNeighbors_n7_manhattan
üî¨ MLflow Run Started: 49d9c09db90a472c8499bbcce3124acf
  ‚úì Logged artifact: confusion_matrix.png




‚úì Model logged to Databricks
üèÉ View run KNeighbors_n7_manhattan at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190/runs/49d9c09db90a472c8499bbcce3124acf
üß™ View experiment at: https://dbc-a0c89f71-7936.cloud.databricks.com/ml/experiments/3868354309605190

ALL MODEL TRAINING COMPLETE


Unnamed: 0,Model,Run_ID,Accuracy,Precision (Macro),Recall (Macro),F1 (Macro),ROC_AUC (OVR)
0,CatBoost_default,ac35cbea4eb14bbcbbe25abac2d1d1bd,0.708492,0.701521,0.700715,0.700022,0.858032
1,XGBoost_depth3,82421e4dd74b486aa6dca8407fde4b36,0.703422,0.697021,0.696894,0.69687,0.849691
2,XGBoost_default,7734ee080c584816bacb421ada21b5bf,0.69962,0.692292,0.691732,0.69102,0.850156
3,XGBoost_n200,6f99fb81127d40108e23bdc652fb8d53,0.698352,0.691346,0.691046,0.690758,0.852041
4,Random_Forest_min_samples_5,db8c122191ad48679aa11149ecf7c9c2,0.697085,0.689416,0.68874,0.687604,0.850341
5,MLP_hidden100_50,f76a86cf55c7418e8ea9c91e942416a0,0.693283,0.68709,0.68709,0.68709,0.820398
6,LightGBM_n500,85effb752d364232851c285927511bbf,0.69455,0.687277,0.686965,0.686531,0.851666
7,XGBoost_depth5,4910502fb76f4165b03592965c075b80,0.693283,0.686525,0.686415,0.686337,0.846429
8,CatBoost_lr005,b7d00a722f42477e83089bc9ce620eda,0.692015,0.684456,0.684109,0.683451,0.846646
9,MLP_default,6a811e7ace00485bb2b32cc814eea91e,0.684411,0.679747,0.679187,0.679112,0.836052


In [None]:
# ==============================================================================
# 7. LOG BEST MODEL SUMMARY
# ==============================================================================
print("\n" + "="*70)
print("BEST MODEL SUMMARY")
print("="*70)

if 'results_df' in locals() and not results_df.empty:
    best_model_name = results_df.iloc[0]['Model']
    best_run_id = results_df.iloc[0]['Run_ID']
    best_f1 = results_df.iloc[0]['F1 (Macro)']
    best_accuracy = results_df.iloc[0]['Accuracy']
    best_precision = results_df.iloc[0]['Precision (Macro)']
    best_recall = results_df.iloc[0]['Recall (Macro)']
    best_roc_auc = results_df.iloc[0]['ROC_AUC (OVR)']

    print(f"\nüèÜ Best Model: {best_model_name}")
    print(f"   Run ID: {best_run_id}")
    print(f"   F1 Score: {best_f1:.4f}")
    print(f"   Accuracy: {best_accuracy:.4f}")
    print(f"   Precision: {best_precision:.4f}")
    print(f"   Recall: {best_recall:.4f}")
    print(f"   ROC AUC: {best_roc_auc if best_roc_auc is not None else 'N/A'}")
else:
    print("\n--- Best model summary skipped because no models were trained or results_df is empty. ---")


BEST MODEL SUMMARY

üèÜ Best Model: CatBoost_default
   Run ID: ac35cbea4eb14bbcbbe25abac2d1d1bd
   F1 Score: 0.7000
   Accuracy: 0.7085
   Precision: 0.7015
   Recall: 0.7007
   ROC AUC: 0.8580322361052738


In [None]:
# ==============================================================================
# 8. TRAINING COMPLETE
# ==============================================================================
print("\n" + "="*70)
print("TRAINING COMPLETE! üéâ")
print("="*70)

if 'results_df' in locals() and not results_df.empty:
    print(f"\nüìä SUMMARY:")
    print(f"   Total models trained: {len(models)}")
    print(f"   All models logged to Databricks MLflow")
    print(f"   Best model: {best_model_name}")
    print(f"   Best F1 Score: {best_f1:.4f}")

    print(f"\nüìä WHAT WAS LOGGED TO DATABRICKS:")
    print(f"   ‚úì {len(models)} separate MLflow runs")
    print(f"   ‚úì All hyperparameters for each model")
    print(f"   ‚úì All metrics (accuracy, precision, recall, F1, ROC AUC)")
    print(f"   ‚úì All trained models")

    print(f"\nüîç VIEW YOUR EXPERIMENTS IN DATABRICKS:")
    print(f"   1. Go to your Databricks workspace: {DATABRICKS_HOST}")
    print(f"   2. Click 'Machine Learning' in left sidebar")
    print(f"   3. Click 'Experiments'")
    print(f"   4. Find: {EXPERIMENT_NAME}")
    print(f"   5. Compare all {len(models)} runs side-by-side")

    print("\n" + "="*70)
else:
    print("\n--- Training completion summary skipped. Please check if models were trained and results_df is populated. ---")


TRAINING COMPLETE! üéâ

üìä SUMMARY:
   Total models trained: 52
   All models logged to Databricks MLflow
   Best model: CatBoost_default
   Best F1 Score: 0.7000

üìä WHAT WAS LOGGED TO DATABRICKS:
   ‚úì 52 separate MLflow runs
   ‚úì All hyperparameters for each model
   ‚úì All metrics (accuracy, precision, recall, F1, ROC AUC)
   ‚úì All trained models

üîç VIEW YOUR EXPERIMENTS IN DATABRICKS:
   1. Go to your Databricks workspace: https://dbc-a0c89f71-7936.cloud.databricks.com
   2. Click 'Machine Learning' in left sidebar
   3. Click 'Experiments'
   4. Find: /Users/svemulak@asu.edu/Attempt_6_Supply-Chain-Classification-Experiment-1
   5. Compare all 52 runs side-by-side



In [None]:
print(df.columns.tolist())

['nr', 'i1_legid', 'i1_rcs_p', 'i1_rcs_e', 'i1_dep_1_p', 'i1_dep_1_e', 'i1_dep_1_place', 'i1_rcf_1_p', 'i1_rcf_1_e', 'i1_rcf_1_place', 'i1_dep_2_p', 'i1_dep_2_e', 'i1_dep_2_place', 'i1_rcf_2_p', 'i1_rcf_2_e', 'i1_rcf_2_place', 'i1_dep_3_p', 'i1_dep_3_e', 'i1_dep_3_place', 'i1_rcf_3_p', 'i1_rcf_3_e', 'i1_rcf_3_place', 'i1_dlv_p', 'i1_dlv_e', 'i1_hops', 'i2_legid', 'i2_rcs_p', 'i2_rcs_e', 'i2_dep_1_p', 'i2_dep_1_e', 'i2_dep_1_place', 'i2_rcf_1_p', 'i2_rcf_1_e', 'i2_rcf_1_place', 'i2_dep_2_p', 'i2_dep_2_e', 'i2_dep_2_place', 'i2_rcf_2_p', 'i2_rcf_2_e', 'i2_rcf_2_place', 'i2_dep_3_p', 'i2_dep_3_e', 'i2_dep_3_place', 'i2_rcf_3_p', 'i2_rcf_3_e', 'i2_rcf_3_place', 'i2_dlv_p', 'i2_dlv_e', 'i2_hops', 'i3_legid', 'i3_rcs_p', 'i3_rcs_e', 'i3_dep_1_p', 'i3_dep_1_e', 'i3_dep_1_place', 'i3_rcf_1_p', 'i3_rcf_1_e', 'i3_rcf_1_place', 'i3_dep_2_p', 'i3_dep_2_e', 'i3_dep_2_place', 'i3_rcf_2_p', 'i3_rcf_2_e', 'i3_rcf_2_place', 'i3_dep_3_p', 'i3_dep_3_e', 'i3_dep_3_place', 'i3_rcf_3_p', 'i3_rcf_3_e', 'i3_r

In [None]:
print(df.shape)

(3942, 98)
