In [21]:
import mlflow
import pandas as pd
from gpt4all import GPT4All
mlflow.set_tracking_uri("http://127.0.0.1:5000")
experiment_name = "Fraud_Detection_Comparison"
experiment = mlflow.get_experiment_by_name(experiment_name)

df_runs = mlflow.search_runs(experiment_ids=[experiment.experiment_id])

# Sort top 5 runs by f1_score
df_sorted = df_runs.sort_values(by="metrics.accuracy", ascending=False)
def create_model_prompt(df):
    prompt = "Compare the following ML models based on their available metrics:\n\n"
    for idx, row in df.iterrows():
        prompt += f"Model {idx + 1}:\n"
        prompt += f" - Run ID: {row['run_id']}\n"
        prompt += f" - Model: {row.get('params.model', 'N/A')}\n"

        for metric in ["accuracy", "precision", "recall", "f1_score"]:
            col = f"metrics.{metric}"
            if col in row and pd.notnull(row[col]):
                prompt += f" - {metric.capitalize()}: {row[col]:.4f}\n"
        prompt += "\n"
    return prompt



model = GPT4All("Llama-3.2-3B-Instruct-Q4_0.gguf")  # adjust to your model name

prompt = create_model_prompt(df_sorted.head(5))  # Compare top 5 models

with model.chat_session():
   response = model.generate(prompt)
print("📊 GPT-4LL Agent's Ranking:\n", response)


📊 GPT-4LL Agent's Ranking:
 Based on the provided metrics, here's a comparison of the five models:

**Model Comparison**

| **Metric** | **Model 11 (RandomForest)** | **Model 32 (RandomForest)** | **Model 102 (RandomForest)** | **Model 120 (RandomForest)** | **Model 16 (SVC)** |
| --- | --- | --- | --- | --- | --- |
| Accuracy | 1.0000 | 1.0000 | 1.0000 | 1.0000 | 0.9997 |
| Precision | 1.0000 | 1.0000 | 1.0000 | 1.0000 | 1.0000 |
| Recall | 1.0000 | 1.0000 | 1.0000 | 1.0000 | 0.7500 |

**Key Observations**

* All four RandomForest models have an accuracy of 1.0000


In [9]:
print(df_runs.columns[df_runs.columns.str.startswith("metrics.")].tolist())


['metrics.precision', 'metrics.recall', 'metrics.accuracy', 'metrics.num_rows']


In [16]:
! Pip install gpt4all

