In [3]:

import pandas as pd
from sklearn.preprocessing import StandardScaler
import mlflow
import uuid
from datetime import datetime
import os
import io
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score



In [11]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("mlg-ulb/creditcardfraud")

print("Path to dataset files:", path)

Path to dataset files: C:\Users\aman1\.cache\kagglehub\datasets\mlg-ulb\creditcardfraud\versions\3


In [4]:
# Load data
df = pd.read_csv("creditcard.csv")#paste path of download file 
df = df.sample(n=10000, random_state=42).reset_index(drop=True)

# Check missing values
print(df.isnull().sum())  # should be all 0

# Remove duplicates
df = df.drop_duplicates()

# Scale Amount column
scaler = StandardScaler()
df['scaled_amount'] = scaler.fit_transform(df[['Amount']])

# Optionally drop original Amount and Time
df = df.drop(['Amount', 'Time'], axis=1)
# === Step 2: Add Required Columns ===

df["TransactionID"] = [str(uuid.uuid4()) for _ in range(len(df))]
df["EventTime"] = datetime.utcnow().isoformat()

# === Step 3: Save Locally as Parquet ===
parquet_path = "fraud_data.parquet"
df.to_parquet(parquet_path, index=False)
print("✅ Parquet file saved:", parquet_path)
# Read Parquet file into DataFrame
# Read Parquet file into DataFrame
parquet_path = "fraud_data.parquet"
df = pd.read_parquet(parquet_path)

print("✅ Parquet file loaded successfully.")

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64
✅ Parquet file saved: fraud_data.parquet
✅ Parquet file loaded successfully.


In [9]:
# Start a run
mlflow.set_tracking_uri("http://127.0.0.1:5000")  # Or local path
mlflow.set_experiment("Fraud_Detection_Comparison")
with mlflow.start_run():
    mlflow.log_param("param1", 5)
    mlflow.log_metric("accuracy", 0.95)

#  Start MLflow run and log
with mlflow.start_run(run_name="data_exploration_run") as run:
    # Log a sample parameter
    mlflow.log_param("data_source", "fraud_data.parquet")

    # Log a sample metric
    mlflow.log_metric("num_rows", len(df))

    # Save and log artifact
    sample_path = "sample_data.csv"
    df.head(100).to_csv(sample_path, index=False)
    mlflow.log_artifact(sample_path)

    print("✅ MLflow run completed. Run ID:", run.info.run_id)


🏃 View run silent-bug-144 at: http://127.0.0.1:5000/#/experiments/865794027230414114/runs/92b65c24d1e842f682ee5bbe9a2078e6
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/865794027230414114
✅ MLflow run completed. Run ID: 76d33a1df8bd45fea4ea436e975fe2de
🏃 View run data_exploration_run at: http://127.0.0.1:5000/#/experiments/865794027230414114/runs/76d33a1df8bd45fea4ea436e975fe2de
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/865794027230414114


In [10]:
## Load your dataset
df = pd.read_parquet("data/fraud_data.parquet")
# ✅ Features and label
target = "Class"
X_full = df.drop(columns=[target])
y = df[target]

# ✅ Define feature subsets
# Drop columns with too many unique values (e.g., more than 100)
for col in X_full.select_dtypes(include="object").columns:
    if X_full[col].nunique() > 100:
        print(f"Dropping column: {col} (unique: {X_full[col].nunique()})")
        X_full = X_full.drop(columns=col)

X_full = pd.get_dummies(X_full)  # One-hot encode all object-type columns
all_features = X_full.columns.tolist()
half_features = all_features[:len(all_features)//2]
top_features = all_features[10:20]  # Example: features ranked important by domain or feature importance

feature_sets = {
    "all_features": all_features,
    "half_features": half_features,
    "top_10_features": top_features
}

# ✅ Define models and hyperparameters
models = {
    "LogisticRegression": {
        "model": LogisticRegression(max_iter=500),
        "params": {"C": [0.1, 1, 10]}
    },
    "RandomForest": {
        "model": RandomForestClassifier(),
        "params": {"n_estimators": [50, 100], "max_depth": [3, 5]}
    },
    "SVC": {
        "model": SVC(),
        "params": {"C": [0.1, 1], "kernel": ["linear", "rbf"]}
    }
}


for exp_id in range(1, 11):
    experiment_name = f"Fraud_Detection_Exp_{exp_id}"
    mlflow.set_experiment(experiment_name)

    for feature_set_name, selected_features in feature_sets.items():
        X = X_full[selected_features]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=exp_id)

        for model_name, config in models.items():
            model = config["model"]
            param_grid = config["params"]

            grid = GridSearchCV(model, param_grid, cv=3, scoring="accuracy")
            grid.fit(X_train, y_train)

            best_model = grid.best_estimator_
            y_pred = best_model.predict(X_test)

            acc = accuracy_score(y_test, y_pred)
            prec = precision_score(y_test, y_pred, zero_division=0)
            rec = recall_score(y_test, y_pred, zero_division=0)

            with mlflow.start_run(run_name=f"{model_name}_{feature_set_name}"):
                mlflow.log_param("model", model_name)
                mlflow.log_param("feature_set", feature_set_name)
                mlflow.log_params(grid.best_params_)
                mlflow.log_metric("accuracy", acc)
                mlflow.log_metric("precision", prec)
                mlflow.log_metric("recall", rec)
                mlflow.sklearn.log_model(best_model, artifact_path="model")

                print(f"✅ Run logged for {model_name} with {feature_set_name} in {experiment_name}")
                

Dropping column: TransactionID (unique: 10000)




✅ Run logged for LogisticRegression with all_features in Fraud_Detection_Exp_1
🏃 View run LogisticRegression_all_features at: http://127.0.0.1:5000/#/experiments/716805485245489680/runs/c0d6bc8d585c4c31b5f2143d4e593258
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/716805485245489680




✅ Run logged for RandomForest with all_features in Fraud_Detection_Exp_1
🏃 View run RandomForest_all_features at: http://127.0.0.1:5000/#/experiments/716805485245489680/runs/b2e26c5d98194a2681d2e4bf42946f57
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/716805485245489680




✅ Run logged for SVC with all_features in Fraud_Detection_Exp_1
🏃 View run SVC_all_features at: http://127.0.0.1:5000/#/experiments/716805485245489680/runs/0ce000222f8a496f9fd924df023f0485
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/716805485245489680




✅ Run logged for LogisticRegression with half_features in Fraud_Detection_Exp_1
🏃 View run LogisticRegression_half_features at: http://127.0.0.1:5000/#/experiments/716805485245489680/runs/ebb0bd1fa1f94fcea846e018eb0926c4
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/716805485245489680




✅ Run logged for RandomForest with half_features in Fraud_Detection_Exp_1
🏃 View run RandomForest_half_features at: http://127.0.0.1:5000/#/experiments/716805485245489680/runs/e2e8b864671f4442b45bdb721d51245c
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/716805485245489680




✅ Run logged for SVC with half_features in Fraud_Detection_Exp_1
🏃 View run SVC_half_features at: http://127.0.0.1:5000/#/experiments/716805485245489680/runs/2404dd3ba27f48aa826c7e817652f9e2
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/716805485245489680




✅ Run logged for LogisticRegression with top_10_features in Fraud_Detection_Exp_1
🏃 View run LogisticRegression_top_10_features at: http://127.0.0.1:5000/#/experiments/716805485245489680/runs/a0de4d2a56914ef5a4e764b49508ba6f
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/716805485245489680




✅ Run logged for RandomForest with top_10_features in Fraud_Detection_Exp_1
🏃 View run RandomForest_top_10_features at: http://127.0.0.1:5000/#/experiments/716805485245489680/runs/9c37cb7d329542f69410274df2371b2f
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/716805485245489680




✅ Run logged for SVC with top_10_features in Fraud_Detection_Exp_1
🏃 View run SVC_top_10_features at: http://127.0.0.1:5000/#/experiments/716805485245489680/runs/c9d8754f565f44de977d7551bb2c4954
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/716805485245489680




✅ Run logged for LogisticRegression with all_features in Fraud_Detection_Exp_2
🏃 View run LogisticRegression_all_features at: http://127.0.0.1:5000/#/experiments/247689927393813146/runs/77f04b1e1d2d433693bc83553cad0266
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/247689927393813146




✅ Run logged for RandomForest with all_features in Fraud_Detection_Exp_2
🏃 View run RandomForest_all_features at: http://127.0.0.1:5000/#/experiments/247689927393813146/runs/76a88035d5244a7ea242b2eb4b1f9257
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/247689927393813146




✅ Run logged for SVC with all_features in Fraud_Detection_Exp_2
🏃 View run SVC_all_features at: http://127.0.0.1:5000/#/experiments/247689927393813146/runs/61f28ab64a9a4761bb7525c4301d1b6e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/247689927393813146




✅ Run logged for LogisticRegression with half_features in Fraud_Detection_Exp_2
🏃 View run LogisticRegression_half_features at: http://127.0.0.1:5000/#/experiments/247689927393813146/runs/90ec1d38ca6846fc9fbf7858d63e7fdd
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/247689927393813146




✅ Run logged for RandomForest with half_features in Fraud_Detection_Exp_2
🏃 View run RandomForest_half_features at: http://127.0.0.1:5000/#/experiments/247689927393813146/runs/b214421c492a4035a261ccb193cee8e6
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/247689927393813146




✅ Run logged for SVC with half_features in Fraud_Detection_Exp_2
🏃 View run SVC_half_features at: http://127.0.0.1:5000/#/experiments/247689927393813146/runs/a677cb3e5f2e4696b6604a9a94880158
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/247689927393813146




✅ Run logged for LogisticRegression with top_10_features in Fraud_Detection_Exp_2
🏃 View run LogisticRegression_top_10_features at: http://127.0.0.1:5000/#/experiments/247689927393813146/runs/502f9c2d7a0a4e80bd740746fa3540ad
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/247689927393813146




✅ Run logged for RandomForest with top_10_features in Fraud_Detection_Exp_2
🏃 View run RandomForest_top_10_features at: http://127.0.0.1:5000/#/experiments/247689927393813146/runs/2a470bfdf58049099e8b0b3d4666a389
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/247689927393813146




✅ Run logged for SVC with top_10_features in Fraud_Detection_Exp_2
🏃 View run SVC_top_10_features at: http://127.0.0.1:5000/#/experiments/247689927393813146/runs/fd3b66ada0094b05a294505536b5ba15
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/247689927393813146




✅ Run logged for LogisticRegression with all_features in Fraud_Detection_Exp_3
🏃 View run LogisticRegression_all_features at: http://127.0.0.1:5000/#/experiments/911313465822548870/runs/eaecb3c6ff834207a1043d3087d5c2a8
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/911313465822548870




✅ Run logged for RandomForest with all_features in Fraud_Detection_Exp_3
🏃 View run RandomForest_all_features at: http://127.0.0.1:5000/#/experiments/911313465822548870/runs/128b70a95d60404695cedced9a37f2fa
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/911313465822548870




✅ Run logged for SVC with all_features in Fraud_Detection_Exp_3
🏃 View run SVC_all_features at: http://127.0.0.1:5000/#/experiments/911313465822548870/runs/6923ca82d3714d679d0b2b156bfb4266
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/911313465822548870




✅ Run logged for LogisticRegression with half_features in Fraud_Detection_Exp_3
🏃 View run LogisticRegression_half_features at: http://127.0.0.1:5000/#/experiments/911313465822548870/runs/5ead67dff331476087ad4d02ad3dcf4b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/911313465822548870




✅ Run logged for RandomForest with half_features in Fraud_Detection_Exp_3
🏃 View run RandomForest_half_features at: http://127.0.0.1:5000/#/experiments/911313465822548870/runs/b1262ae065d945c091974ac1f911beeb
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/911313465822548870




✅ Run logged for SVC with half_features in Fraud_Detection_Exp_3
🏃 View run SVC_half_features at: http://127.0.0.1:5000/#/experiments/911313465822548870/runs/d900937f8ec54922933e6e0dccc9affa
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/911313465822548870




✅ Run logged for LogisticRegression with top_10_features in Fraud_Detection_Exp_3
🏃 View run LogisticRegression_top_10_features at: http://127.0.0.1:5000/#/experiments/911313465822548870/runs/5263f7a1bb334f36aa3a710229dc5428
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/911313465822548870




✅ Run logged for RandomForest with top_10_features in Fraud_Detection_Exp_3
🏃 View run RandomForest_top_10_features at: http://127.0.0.1:5000/#/experiments/911313465822548870/runs/fb98e795ec2c49f1ae0db735fc428442
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/911313465822548870




✅ Run logged for SVC with top_10_features in Fraud_Detection_Exp_3
🏃 View run SVC_top_10_features at: http://127.0.0.1:5000/#/experiments/911313465822548870/runs/88b08c3351d14eb5ab87b5dbfb62eb08
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/911313465822548870




✅ Run logged for LogisticRegression with all_features in Fraud_Detection_Exp_4
🏃 View run LogisticRegression_all_features at: http://127.0.0.1:5000/#/experiments/900422544236318584/runs/558260ff291c48c5966c71cc4c9d8a5b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/900422544236318584




✅ Run logged for RandomForest with all_features in Fraud_Detection_Exp_4
🏃 View run RandomForest_all_features at: http://127.0.0.1:5000/#/experiments/900422544236318584/runs/ccf56756eff14e36b8a730dcd16b16b1
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/900422544236318584




✅ Run logged for SVC with all_features in Fraud_Detection_Exp_4
🏃 View run SVC_all_features at: http://127.0.0.1:5000/#/experiments/900422544236318584/runs/a22feda4a07c44549298da2602e682dd
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/900422544236318584




✅ Run logged for LogisticRegression with half_features in Fraud_Detection_Exp_4
🏃 View run LogisticRegression_half_features at: http://127.0.0.1:5000/#/experiments/900422544236318584/runs/630727277ea54bc78aabec318895da4e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/900422544236318584




✅ Run logged for RandomForest with half_features in Fraud_Detection_Exp_4
🏃 View run RandomForest_half_features at: http://127.0.0.1:5000/#/experiments/900422544236318584/runs/040ee902fc9840f3ba57fb7f9585233f
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/900422544236318584




✅ Run logged for SVC with half_features in Fraud_Detection_Exp_4
🏃 View run SVC_half_features at: http://127.0.0.1:5000/#/experiments/900422544236318584/runs/755ca2e788da470c94a860cd37a0e233
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/900422544236318584




✅ Run logged for LogisticRegression with top_10_features in Fraud_Detection_Exp_4
🏃 View run LogisticRegression_top_10_features at: http://127.0.0.1:5000/#/experiments/900422544236318584/runs/acecef69e3c345168afe48111e65c8f5
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/900422544236318584




✅ Run logged for RandomForest with top_10_features in Fraud_Detection_Exp_4
🏃 View run RandomForest_top_10_features at: http://127.0.0.1:5000/#/experiments/900422544236318584/runs/90d33cdc8e374f968ccbfce859d88b69
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/900422544236318584




✅ Run logged for SVC with top_10_features in Fraud_Detection_Exp_4
🏃 View run SVC_top_10_features at: http://127.0.0.1:5000/#/experiments/900422544236318584/runs/ce7ad64051b1465a8ca3a54c9978ab28
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/900422544236318584




✅ Run logged for LogisticRegression with all_features in Fraud_Detection_Exp_5
🏃 View run LogisticRegression_all_features at: http://127.0.0.1:5000/#/experiments/475839512402755664/runs/6b0b4ab7aeee4750a641b089b2306a11
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/475839512402755664




✅ Run logged for RandomForest with all_features in Fraud_Detection_Exp_5
🏃 View run RandomForest_all_features at: http://127.0.0.1:5000/#/experiments/475839512402755664/runs/4ba3839cab12445d93b33fde32d1d953
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/475839512402755664




✅ Run logged for SVC with all_features in Fraud_Detection_Exp_5
🏃 View run SVC_all_features at: http://127.0.0.1:5000/#/experiments/475839512402755664/runs/26a856d0e7204ce5baedff1aae13a405
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/475839512402755664




✅ Run logged for LogisticRegression with half_features in Fraud_Detection_Exp_5
🏃 View run LogisticRegression_half_features at: http://127.0.0.1:5000/#/experiments/475839512402755664/runs/f4e617cb631f473581aff48750629276
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/475839512402755664




✅ Run logged for RandomForest with half_features in Fraud_Detection_Exp_5
🏃 View run RandomForest_half_features at: http://127.0.0.1:5000/#/experiments/475839512402755664/runs/58538bced2404a108f69c4ea9a83b0f3
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/475839512402755664




✅ Run logged for SVC with half_features in Fraud_Detection_Exp_5
🏃 View run SVC_half_features at: http://127.0.0.1:5000/#/experiments/475839512402755664/runs/e95f08c280944b699a21b71be5382e53
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/475839512402755664




✅ Run logged for LogisticRegression with top_10_features in Fraud_Detection_Exp_5
🏃 View run LogisticRegression_top_10_features at: http://127.0.0.1:5000/#/experiments/475839512402755664/runs/1df305737f794845a41555f480f3522b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/475839512402755664




✅ Run logged for RandomForest with top_10_features in Fraud_Detection_Exp_5
🏃 View run RandomForest_top_10_features at: http://127.0.0.1:5000/#/experiments/475839512402755664/runs/e64bd5d3a487452eb1e0e9c262567071
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/475839512402755664




✅ Run logged for SVC with top_10_features in Fraud_Detection_Exp_5
🏃 View run SVC_top_10_features at: http://127.0.0.1:5000/#/experiments/475839512402755664/runs/da0bd016667f456981e25566da9c7325
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/475839512402755664




✅ Run logged for LogisticRegression with all_features in Fraud_Detection_Exp_6
🏃 View run LogisticRegression_all_features at: http://127.0.0.1:5000/#/experiments/100133137102634117/runs/66d97e3384b646c397cdff83b9720b64
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/100133137102634117




✅ Run logged for RandomForest with all_features in Fraud_Detection_Exp_6
🏃 View run RandomForest_all_features at: http://127.0.0.1:5000/#/experiments/100133137102634117/runs/ef01a1bd25374de5a8ed9c7924c44667
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/100133137102634117




✅ Run logged for SVC with all_features in Fraud_Detection_Exp_6
🏃 View run SVC_all_features at: http://127.0.0.1:5000/#/experiments/100133137102634117/runs/5c9ba1eebb074ef79d09911533ceabc9
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/100133137102634117




✅ Run logged for LogisticRegression with half_features in Fraud_Detection_Exp_6
🏃 View run LogisticRegression_half_features at: http://127.0.0.1:5000/#/experiments/100133137102634117/runs/ffbdb5d944e04e4992d7cfbab488a27b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/100133137102634117




✅ Run logged for RandomForest with half_features in Fraud_Detection_Exp_6
🏃 View run RandomForest_half_features at: http://127.0.0.1:5000/#/experiments/100133137102634117/runs/8698eb1b20f94c59af780ecbd10080b1
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/100133137102634117




✅ Run logged for SVC with half_features in Fraud_Detection_Exp_6
🏃 View run SVC_half_features at: http://127.0.0.1:5000/#/experiments/100133137102634117/runs/b53961bb1cdc4c45af2241f7dae8e48a
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/100133137102634117




✅ Run logged for LogisticRegression with top_10_features in Fraud_Detection_Exp_6
🏃 View run LogisticRegression_top_10_features at: http://127.0.0.1:5000/#/experiments/100133137102634117/runs/62b5037bd8d04946b9188dbf9a20db33
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/100133137102634117




✅ Run logged for RandomForest with top_10_features in Fraud_Detection_Exp_6
🏃 View run RandomForest_top_10_features at: http://127.0.0.1:5000/#/experiments/100133137102634117/runs/702a9f6765a845f2a93e367f83d85a89
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/100133137102634117




✅ Run logged for SVC with top_10_features in Fraud_Detection_Exp_6
🏃 View run SVC_top_10_features at: http://127.0.0.1:5000/#/experiments/100133137102634117/runs/63c83f610e0447f7965745fcac966508
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/100133137102634117




✅ Run logged for LogisticRegression with all_features in Fraud_Detection_Exp_7
🏃 View run LogisticRegression_all_features at: http://127.0.0.1:5000/#/experiments/350371553737021814/runs/a4e8bf95e60c48aa8e6c874a6c7d9618
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/350371553737021814




✅ Run logged for RandomForest with all_features in Fraud_Detection_Exp_7
🏃 View run RandomForest_all_features at: http://127.0.0.1:5000/#/experiments/350371553737021814/runs/753d5555090242fdad7742da5b0412bf
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/350371553737021814




✅ Run logged for SVC with all_features in Fraud_Detection_Exp_7
🏃 View run SVC_all_features at: http://127.0.0.1:5000/#/experiments/350371553737021814/runs/fc329c46b36b49808f490e7e4ded1a91
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/350371553737021814




✅ Run logged for LogisticRegression with half_features in Fraud_Detection_Exp_7
🏃 View run LogisticRegression_half_features at: http://127.0.0.1:5000/#/experiments/350371553737021814/runs/ab8a6b087e264175af7d60e3cb571caa
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/350371553737021814




✅ Run logged for RandomForest with half_features in Fraud_Detection_Exp_7
🏃 View run RandomForest_half_features at: http://127.0.0.1:5000/#/experiments/350371553737021814/runs/7987aa4e1c02477195ced73e6ffdb530
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/350371553737021814




✅ Run logged for SVC with half_features in Fraud_Detection_Exp_7
🏃 View run SVC_half_features at: http://127.0.0.1:5000/#/experiments/350371553737021814/runs/fecb6c6831c54d9592435f5a6dec7746
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/350371553737021814




✅ Run logged for LogisticRegression with top_10_features in Fraud_Detection_Exp_7
🏃 View run LogisticRegression_top_10_features at: http://127.0.0.1:5000/#/experiments/350371553737021814/runs/4a89bb5fa9014e1ca82298f2b2a969d2
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/350371553737021814




✅ Run logged for RandomForest with top_10_features in Fraud_Detection_Exp_7
🏃 View run RandomForest_top_10_features at: http://127.0.0.1:5000/#/experiments/350371553737021814/runs/8317c7d4c29c49f985ecb544c8f35728
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/350371553737021814




✅ Run logged for SVC with top_10_features in Fraud_Detection_Exp_7
🏃 View run SVC_top_10_features at: http://127.0.0.1:5000/#/experiments/350371553737021814/runs/b6257cc6818249ab9121135847cde3cf
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/350371553737021814




✅ Run logged for LogisticRegression with all_features in Fraud_Detection_Exp_8
🏃 View run LogisticRegression_all_features at: http://127.0.0.1:5000/#/experiments/816327630247331240/runs/9813d2ee718142888cc6ff305c21cd76
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/816327630247331240




✅ Run logged for RandomForest with all_features in Fraud_Detection_Exp_8
🏃 View run RandomForest_all_features at: http://127.0.0.1:5000/#/experiments/816327630247331240/runs/4a7333b93e3b41b9b295b862d8217eb3
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/816327630247331240




✅ Run logged for SVC with all_features in Fraud_Detection_Exp_8
🏃 View run SVC_all_features at: http://127.0.0.1:5000/#/experiments/816327630247331240/runs/d009b8b8ef064b798463cdc5b5d5dd85
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/816327630247331240




✅ Run logged for LogisticRegression with half_features in Fraud_Detection_Exp_8
🏃 View run LogisticRegression_half_features at: http://127.0.0.1:5000/#/experiments/816327630247331240/runs/1c843812aa6c43598d38e8a101064d4d
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/816327630247331240




✅ Run logged for RandomForest with half_features in Fraud_Detection_Exp_8
🏃 View run RandomForest_half_features at: http://127.0.0.1:5000/#/experiments/816327630247331240/runs/8e2497554f4040b6a8d3e20019055ae9
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/816327630247331240




✅ Run logged for SVC with half_features in Fraud_Detection_Exp_8
🏃 View run SVC_half_features at: http://127.0.0.1:5000/#/experiments/816327630247331240/runs/7a103b20ab5b454aa8a737380fd06ede
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/816327630247331240




✅ Run logged for LogisticRegression with top_10_features in Fraud_Detection_Exp_8
🏃 View run LogisticRegression_top_10_features at: http://127.0.0.1:5000/#/experiments/816327630247331240/runs/9bba7df02f00488984217f6be377de9d
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/816327630247331240




✅ Run logged for RandomForest with top_10_features in Fraud_Detection_Exp_8
🏃 View run RandomForest_top_10_features at: http://127.0.0.1:5000/#/experiments/816327630247331240/runs/b45eba7cbca248bcaa7ca170a7480d4d
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/816327630247331240




✅ Run logged for SVC with top_10_features in Fraud_Detection_Exp_8
🏃 View run SVC_top_10_features at: http://127.0.0.1:5000/#/experiments/816327630247331240/runs/0e5b6b75426342a082305388ea049aeb
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/816327630247331240




✅ Run logged for LogisticRegression with all_features in Fraud_Detection_Exp_9
🏃 View run LogisticRegression_all_features at: http://127.0.0.1:5000/#/experiments/395380171902018373/runs/062ca590bd024d45aaa1dbf6201274ff
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/395380171902018373




✅ Run logged for RandomForest with all_features in Fraud_Detection_Exp_9
🏃 View run RandomForest_all_features at: http://127.0.0.1:5000/#/experiments/395380171902018373/runs/e89d874f7f944893b4c14749cea73f25
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/395380171902018373




✅ Run logged for SVC with all_features in Fraud_Detection_Exp_9
🏃 View run SVC_all_features at: http://127.0.0.1:5000/#/experiments/395380171902018373/runs/e82321dd14d444b78acdc090ad87b62d
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/395380171902018373




✅ Run logged for LogisticRegression with half_features in Fraud_Detection_Exp_9
🏃 View run LogisticRegression_half_features at: http://127.0.0.1:5000/#/experiments/395380171902018373/runs/931883d841084342a3c25d556dd1ee22
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/395380171902018373




✅ Run logged for RandomForest with half_features in Fraud_Detection_Exp_9
🏃 View run RandomForest_half_features at: http://127.0.0.1:5000/#/experiments/395380171902018373/runs/950df597c3da4e8c9f0b5d37def0e26e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/395380171902018373




✅ Run logged for SVC with half_features in Fraud_Detection_Exp_9
🏃 View run SVC_half_features at: http://127.0.0.1:5000/#/experiments/395380171902018373/runs/cdec12795483421f9a0da3dc280cf0bb
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/395380171902018373




✅ Run logged for LogisticRegression with top_10_features in Fraud_Detection_Exp_9
🏃 View run LogisticRegression_top_10_features at: http://127.0.0.1:5000/#/experiments/395380171902018373/runs/94dc6da628f549f2b3e6bc08dd5ae222
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/395380171902018373




✅ Run logged for RandomForest with top_10_features in Fraud_Detection_Exp_9
🏃 View run RandomForest_top_10_features at: http://127.0.0.1:5000/#/experiments/395380171902018373/runs/da065be0ef34406f874c85ba93fe9d7e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/395380171902018373




✅ Run logged for SVC with top_10_features in Fraud_Detection_Exp_9
🏃 View run SVC_top_10_features at: http://127.0.0.1:5000/#/experiments/395380171902018373/runs/090964ec540145fd9d43c603a02b3ab2
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/395380171902018373




✅ Run logged for LogisticRegression with all_features in Fraud_Detection_Exp_10
🏃 View run LogisticRegression_all_features at: http://127.0.0.1:5000/#/experiments/878842071363910808/runs/a4ebb4f2a709486e95fa5f322d9825cb
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/878842071363910808




✅ Run logged for RandomForest with all_features in Fraud_Detection_Exp_10
🏃 View run RandomForest_all_features at: http://127.0.0.1:5000/#/experiments/878842071363910808/runs/64885d209e90419f813b9bf6a4887577
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/878842071363910808




✅ Run logged for SVC with all_features in Fraud_Detection_Exp_10
🏃 View run SVC_all_features at: http://127.0.0.1:5000/#/experiments/878842071363910808/runs/6450c37d8fc54b339c7c8d89e28fdb39
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/878842071363910808




✅ Run logged for LogisticRegression with half_features in Fraud_Detection_Exp_10
🏃 View run LogisticRegression_half_features at: http://127.0.0.1:5000/#/experiments/878842071363910808/runs/c3c6e1574abd432da4cc97f6a0c9e5b2
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/878842071363910808




✅ Run logged for RandomForest with half_features in Fraud_Detection_Exp_10
🏃 View run RandomForest_half_features at: http://127.0.0.1:5000/#/experiments/878842071363910808/runs/efc52e1fc48c48e098601fbce7ef27cd
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/878842071363910808




✅ Run logged for SVC with half_features in Fraud_Detection_Exp_10
🏃 View run SVC_half_features at: http://127.0.0.1:5000/#/experiments/878842071363910808/runs/2fd3bd3a0d514fe1bf006aec53305822
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/878842071363910808




✅ Run logged for LogisticRegression with top_10_features in Fraud_Detection_Exp_10
🏃 View run LogisticRegression_top_10_features at: http://127.0.0.1:5000/#/experiments/878842071363910808/runs/91f6eb76a7354adc80f636d3003dba55
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/878842071363910808




✅ Run logged for RandomForest with top_10_features in Fraud_Detection_Exp_10
🏃 View run RandomForest_top_10_features at: http://127.0.0.1:5000/#/experiments/878842071363910808/runs/18316c8dcf3a4de4b5a8cba55387a9e3
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/878842071363910808




✅ Run logged for SVC with top_10_features in Fraud_Detection_Exp_10
🏃 View run SVC_top_10_features at: http://127.0.0.1:5000/#/experiments/878842071363910808/runs/02709d536b304989aaed09e168bb72d7
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/878842071363910808


In [6]:
from arize.pandas.logger import Client
from arize.utils.types import ModelTypes, Environments, Schema


# Your Arize credentials
space_key = "U3BhY2U6MjM3MTI6RThBTQ=="
api_key = "ak-8c93aa68-e105-4c23-b977-4ffb437fe7a5-rZPuli0UaGIrRAJ3x-OkK1sg_l5e5mFT"

# Get the best model from the last grid search run (assuming it's the last one executed)
# You might need to adjust this if you have multiple grid search runs in the same cell
best_model = grid.best_estimator_
predictions = best_model.predict(X_test)
# ✅ Prepare the logging dataframe
log_df = X_test.copy()
log_df["prediction_id"] = range(len(X_test))
log_df["prediction"] = predictions
log_df["actual"] = y_test.values
# ✅ Reset index to avoid validation error
log_df.reset_index(drop=True, inplace=True)

# Define schema
schema = Schema(
    prediction_id_column_name="prediction_id",
    prediction_label_column_name="prediction",
    actual_label_column_name="actual",
    feature_column_names=X_test.columns.tolist()
)

# Create Arize client
client = Client(space_key=space_key, api_key=api_key)

# ✅ Correct model_type and environment
response = client.log(
    model_id="fraud_model_v1",
    model_version="1.0.0",
    model_type=ModelTypes.BINARY_CLASSIFICATION,
    environment=Environments.PRODUCTION,   # ✅ FIXED HERE
    dataframe=log_df.head(100),
    schema=schema
)

# ✅ Result
if response.status_code == 200:
    print("✅ Successfully logged to Arize")
else:
    print(f"❌ Error: {response.status_code} - {response.text}")

[38;21m  arize.utils.logging | INFO | Success! Check out your data at https://app.arize.com/organizations/QWNjb3VudE9yZ2FuaXphdGlvbjowOjZSQ2g=/spaces/U3BhY2U6MDo1bGZD/models/modelName/fraud_model_v1?selectedTab=performance[0m
✅ Successfully logged to Arize


In [12]:
from arize.pandas.logger import Client
from arize.utils.types import ModelTypes, Environments, Schema

# Step 1: Fetch Metrics from MLflow
def get_mlflow_metrics(run_id: str):
    client = mlflow.tracking.MlflowClient()
    data = client.get_run(run_id)
    return data.data.metrics

def get_arize_metrics_from_csv(path: str):
    df = pd.read_csv(path)
    metrics = {}
    
    # Example: expected columns ['metric_name', 'value']
    for _, row in df.iterrows():
        key = row['metric_name'].strip().lower()
        val = row['value']
        metrics[key] = float(val)

    return metrics

    

# Step 3: Compare both
def compare_metrics(mlflow_metrics, arize_metrics):
    print("\n🔍 Comparing Metrics:")
    for key in mlflow_metrics:
        mlflow_val = mlflow_metrics[key]
        arize_val = arize_metrics.get(key)
        if arize_val is not None:
            print(f"{key}: MLflow = {mlflow_val:.4f}, Arize = {arize_val:.4f}")
        else:
            print(f"{key}: MLflow = {mlflow_val:.4f}, Arize = ❌ Not Found")

# Run the comparison
run_id = "76d33a1df8bd45fea4ea436e975fe2de"  # your real run_id
mlflow_metrics = get_mlflow_metrics(run_id)
arize_metrics = get_arize_metrics()
compare_metrics(mlflow_metrics, arize_metrics)


🔍 Comparing Metrics:
num_rows: MLflow = 10000.0000, Arize = 10000.0000


In [13]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import optuna
import mlflow
import mlflow.sklearn
from sklearn.metrics import accuracy_score

# Your dataset
df = df.select_dtypes(include=['number'])
target = "Class"
X_full = df.drop(columns=[target])
y_full = df[target]

X_train, X_test, y_train, y_test = train_test_split(X_full, y_full, test_size=0.2, random_state=42)


models = {
    "LogisticRegression": LogisticRegression,
    "RandomForest": RandomForestClassifier,
    "XGBoost": XGBClassifier
}

#optuna
def objective(trial):
    model_name = trial.suggest_categorical("model", list(models.keys()))
    
    if model_name == "LogisticRegression":
        C = trial.suggest_loguniform("C", 0.01, 10)
        model = LogisticRegression(C=C, max_iter=1000)
        
    elif model_name == "RandomForest":
        n_estimators = trial.suggest_int("n_estimators", 50, 200)
        max_depth = trial.suggest_int("max_depth", 3, 10)
        model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)
        
    else:  # XGBoost
        eta = trial.suggest_float("eta", 0.01, 0.3)
        max_depth = trial.suggest_int("max_depth", 3, 10)
        model = XGBClassifier(eta=eta, max_depth=max_depth, use_label_encoder=False, eval_metric='logloss')
    
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    score = accuracy_score(y_test, preds)

    # MLflow logging
    with mlflow.start_run():
        mlflow.log_param("model", model_name)
        mlflow.log_params(trial.params)
        mlflow.log_metric("accuracy", score)
        mlflow.sklearn.log_model(model, "model")
    
    return score
mlflow.set_experiment("optuna_model_selection")
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)
print("Best model:", study.best_params["model"])
print("Best accuracy:", study.best_value)


[I 2025-07-25 11:30:33,244] A new study created in memory with name: no-name-0e4fe88d-4f02-44e0-ad2b-9e135b9f3aa8
[I 2025-07-25 11:30:39,938] Trial 0 finished with value: 0.9985 and parameters: {'model': 'RandomForest', 'n_estimators': 155, 'max_depth': 6}. Best is trial 0 with value: 0.9985.
  C = trial.suggest_loguniform("C", 0.01, 10)


🏃 View run languid-swan-724 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/e8f8f726f3c543d2b0984f8d761de6a8
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:30:43,455] Trial 1 finished with value: 0.9975 and parameters: {'model': 'LogisticRegression', 'C': 0.035937104885451855}. Best is trial 0 with value: 0.9985.
  C = trial.suggest_loguniform("C", 0.01, 10)


🏃 View run classy-slug-815 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/8dad88a6d3eb4fb88a05a41df901c1d8
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:30:46,737] Trial 2 finished with value: 0.997 and parameters: {'model': 'LogisticRegression', 'C': 0.1937274144914623}. Best is trial 0 with value: 0.9985.


🏃 View run grandiose-moth-118 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/d34c5d0d42ac4782a84f8a33950a5650
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:30:53,062] Trial 3 finished with value: 0.999 and parameters: {'model': 'RandomForest', 'n_estimators': 152, 'max_depth': 6}. Best is trial 3 with value: 0.999.
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


🏃 View run lyrical-mouse-623 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/16c658956dd44791a7c6b0d576c60280
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:30:56,726] Trial 4 finished with value: 0.9985 and parameters: {'model': 'XGBoost', 'eta': 0.16401545597149098, 'max_depth': 5}. Best is trial 3 with value: 0.999.
  C = trial.suggest_loguniform("C", 0.01, 10)


🏃 View run bittersweet-moth-617 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/2c7c0cd1341f4efdb9a94e23da5ac802
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:31:00,280] Trial 5 finished with value: 0.9975 and parameters: {'model': 'LogisticRegression', 'C': 0.022790769451276394}. Best is trial 3 with value: 0.999.
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


🏃 View run bustling-loon-179 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/81e210ed222042b8bd50431b5d5ebf74
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:31:03,915] Trial 6 finished with value: 0.997 and parameters: {'model': 'XGBoost', 'eta': 0.027369457116418576, 'max_depth': 7}. Best is trial 3 with value: 0.999.
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


🏃 View run thundering-snail-603 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/11b3bc9a48c24427b8a731790e6e6048
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:31:07,357] Trial 7 finished with value: 0.998 and parameters: {'model': 'XGBoost', 'eta': 0.23102736807842683, 'max_depth': 4}. Best is trial 3 with value: 0.999.


🏃 View run bedecked-mole-345 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/ac590665deff446aab90061a7683d730
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:31:11,890] Trial 8 finished with value: 0.999 and parameters: {'model': 'RandomForest', 'n_estimators': 58, 'max_depth': 10}. Best is trial 3 with value: 0.999.
  C = trial.suggest_loguniform("C", 0.01, 10)


🏃 View run spiffy-shrimp-101 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/9d1acacf135c47f288a615b5fdc56cb6
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:31:15,226] Trial 9 finished with value: 0.9985 and parameters: {'model': 'LogisticRegression', 'C': 1.0265540844680492}. Best is trial 3 with value: 0.999.


🏃 View run skittish-donkey-415 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/0503c14897f3442881892c4a4665df36
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:31:22,565] Trial 10 finished with value: 0.9985 and parameters: {'model': 'RandomForest', 'n_estimators': 200, 'max_depth': 8}. Best is trial 3 with value: 0.999.


🏃 View run rogue-hen-113 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/d74c030fa5044d28b940069b8d12d109
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:31:27,087] Trial 11 finished with value: 0.999 and parameters: {'model': 'RandomForest', 'n_estimators': 50, 'max_depth': 10}. Best is trial 3 with value: 0.999.


🏃 View run blushing-snail-481 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/4fb5986ce78e40cbbe889ce73c00d7bb
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:31:31,636] Trial 12 finished with value: 0.9985 and parameters: {'model': 'RandomForest', 'n_estimators': 59, 'max_depth': 10}. Best is trial 3 with value: 0.999.


🏃 View run melodic-stork-88 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/d25db212b39d49ac94c30490eaa51a6b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:31:36,717] Trial 13 finished with value: 0.997 and parameters: {'model': 'RandomForest', 'n_estimators': 119, 'max_depth': 3}. Best is trial 3 with value: 0.999.


🏃 View run monumental-wolf-212 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/9fabe4470c4a4524a55abcc7fc333d5a
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:31:42,518] Trial 14 finished with value: 0.999 and parameters: {'model': 'RandomForest', 'n_estimators': 118, 'max_depth': 8}. Best is trial 3 with value: 0.999.


🏃 View run nervous-ox-863 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/30dacc556c2b4583b3591a3082c758b8
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:31:49,067] Trial 15 finished with value: 0.9985 and parameters: {'model': 'RandomForest', 'n_estimators': 156, 'max_depth': 8}. Best is trial 3 with value: 0.999.


🏃 View run traveling-ray-986 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/39706ececbd74c2f8901bb1e866954b6
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:31:54,771] Trial 16 finished with value: 0.999 and parameters: {'model': 'RandomForest', 'n_estimators': 88, 'max_depth': 6}. Best is trial 3 with value: 0.999.


🏃 View run casual-bear-12 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/3c4278ae0096455eae2f64cd81c341ae
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:32:02,810] Trial 17 finished with value: 0.999 and parameters: {'model': 'RandomForest', 'n_estimators': 154, 'max_depth': 9}. Best is trial 3 with value: 0.999.


🏃 View run rare-carp-140 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/d95c4e3131ad4dba9fc3942f2d814ed6
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:32:09,244] Trial 18 finished with value: 0.999 and parameters: {'model': 'RandomForest', 'n_estimators': 85, 'max_depth': 5}. Best is trial 3 with value: 0.999.
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


🏃 View run funny-ox-536 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/589d6c7f362941a5ba6eceb5d0e6b698
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


[I 2025-07-25 11:32:14,045] Trial 19 finished with value: 0.997 and parameters: {'model': 'XGBoost', 'eta': 0.012161397835376703, 'max_depth': 7}. Best is trial 3 with value: 0.999.


🏃 View run gregarious-calf-462 at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/169baba254d34453aeeedae87f65e61e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917
Best model: RandomForest
Best accuracy: 0.999


In [17]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
best_params = study.best_params

model = RandomForestClassifier(
    n_estimators=best_params["n_estimators"],
    max_depth=best_params["max_depth"],
    random_state=42
)

model.fit(X_train, y_train)


preds = model.predict(X_test)
print("✅ Accuracy:", accuracy_score(y_test, preds))
print("📋 Classification Report:\n", classification_report(y_test, preds))


with mlflow.start_run(run_name="best_randomforest_final"):
    mlflow.log_params({
        "model": "RandomForest",
        "n_estimators": best_params["n_estimators"],
        "max_depth": best_params["max_depth"]
    })
    mlflow.log_metric("accuracy", accuracy_score(y_test, preds))
    mlflow.sklearn.log_model(model, "model")




✅ Accuracy: 0.999
📋 Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      1994
           1       1.00      0.67      0.80         6

    accuracy                           1.00      2000
   macro avg       1.00      0.83      0.90      2000
weighted avg       1.00      1.00      1.00      2000





🏃 View run best_randomforest_final at: http://127.0.0.1:5000/#/experiments/242715092201218917/runs/cdd0294a5aac4a66bd5e886b2811f7df
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/242715092201218917


In [1]:
import mlflow
from mlflow.tracking import MlflowClient
from mlflow.sklearn import load_model
from sklearn.metrics import classification_report
from gpt4all import GPT4All

# ✅ Step 1: Connect to MLflow server
mlflow.set_tracking_uri("http://127.0.0.1:5000")
client = MlflowClient()

# ✅ Step 2: Get top N runs
top_n = 3
df_runs = mlflow.search_runs(order_by=["metrics.accuracy DESC"])

if df_runs.empty:
    print("❌ No MLflow runs found.")
    exit()

models_report = ""

# ✅ Step 3: Try to load each model from valid path
for idx in range(top_n):
    run = df_runs.iloc[idx]
    run_id = run["run_id"]
    model_name = run.get("tags.model_name", f"Model_{idx+1}")
    accuracy = run.get("metrics.accuracy", "N/A")

    # 🔍 List artifact paths
    artifacts = client.list_artifacts(run_id)
    model_paths = [a.path for a in artifacts if a.is_dir]
    
    # 🔁 Try to load model from possible paths
    loaded = False
    for path in model_paths:
        try:
            model_uri = f"runs:/{run_id}/{path}"
            model = load_model(model_uri)
            print(f"✅ Loaded model from path: {path}")
            loaded = True
            break
        except Exception as e:
            continue

    if not loaded:
        print(f"❌ Could not load model for run {run_id}. Skipping.")
        continue

    # ✅ Step 4: Predict and build report
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred)

    models_report += f"\n### {model_name} (Run ID: {run_id}, Accuracy: {accuracy}) ###\n{report}\n"

# ✅ Step 5: Use GPT4All to compare models
if models_report.strip():
    llm = GPT4All("Llama-3.2-3B-Instruct-Q4_0.gguf")
    prompt = f"Compare the following models and list the best one:\n\n{models_report}"
    response = llm.generate(prompt)
    print("\n🧠 AI Agent Recommendation:\n", response)
else:
    print("⚠️ No valid models to compare.")


❌ No MLflow runs found.


IndexError: single positional indexer is out-of-bounds

In [2]:
import mlflow
from mlflow.tracking import MlflowClient
mlflow.set_tracking_uri("http://127.0.0.1:5000")
client = MlflowClient()

# List experiments
experiments = client.list_experiments()
print("Available Experiments:")
for exp in experiments:
    print(f" - {exp.name} (ID: {exp.experiment_id})")

# Use the right experiment
experiment_id = "1"  # Replace this with your actual experiment ID
df_runs = mlflow.search_runs(experiment_ids=[experiment_id], order_by=["metrics.accuracy DESC"])

if df_runs.empty:
    print("❌ No MLflow runs found in the selected experiment.")
    exit()


AttributeError: 'MlflowClient' object has no attribute 'list_experiments'