In [4]:
import mlflow
import pandas as pd
from gpt4all import GPT4All
# Connect to local MLflow server
mlflow.set_tracking_uri("http://127.0.0.1:5000")
experiment = mlflow.get_experiment_by_name("Fraud_Detection_Comparison")

# Get all runs from the experiment
df_runs = mlflow.search_runs(experiment_ids=[experiment.experiment_id])

# Preview your data
print(df_runs[[
    "run_id", "params.model", 
    "metrics.accuracy", "metrics.precision", "metrics.recall",
]])
def create_comparison_prompt(df, target_run_id):
    prompt = f"Compare the following ML models to target run ID **{target_run_id}** using accuracy, precision, recall, and f1_score.\n\n"
    
    for idx, row in df.iterrows():
        prompt += f"Model {idx + 1}:\n"
        prompt += f" - Run ID: {row['run_id']}\n"
        prompt += f" - Model Type: {row.get('params.model', 'N/A')}\n"
        
        for metric in ["accuracy", "precision", "recall"]:
            col_name = f"metrics.{metric}"
            if col_name in row and pd.notnull(row[col_name]):
                prompt += f" - {metric.capitalize()}: {row[col_name]:.4f}\n"
        prompt += "\n"
        
    prompt += f"Now rank these models from best to worst compared to the target model ({target_run_id}). Give a short explanation."
    return prompt
# Choose a specific run ID to compare others against (e.g., best Logistic Regression model)
target_run_id = df_runs.iloc[0]["run_id"]  # or pick based on best f1_score, etc.

prompt = create_comparison_prompt(df_runs, target_run_id)
print(prompt)  # Optional: see what you're sending to GPT


# Load your local model — path must match your installed model
model = GPT4All("Llama-3.2-3B-Instruct-Q4_0.gguf")

# Generate response from prompt
prompt = "Compare model A (accuracy=0.93) with model B (accuracy=0.89). Which one is better and why?"
response = model.generate(prompt)

print("AI Agent Response:")
print(response)

                               run_id        params.model  metrics.accuracy  \
0    2f080b7d8ddb4b1f9e7434336e4cad97                 SVC          0.999000   
1    8367310345f2407aa2ee94cbb1c45d22        RandomForest          0.999000   
2    5993360bb554416da62b9b595775d67b  LogisticRegression          0.998333   
3    b3e2f5192700497cae7fc54f27332dbf                 SVC          0.998000   
4    8c2caeccca5844a8b43abf52e7ba9a33        RandomForest          0.998333   
..                                ...                 ...               ...   
251  dd23fe8ddbad4daa9c0b1653bd6b4b3f                None               NaN   
252  1bd8990ddc2c48eea080154b98626d03                None               NaN   
253  6f547ccf19724a27b999fe9e6fd600be                None               NaN   
254  4dc6a4af64924d27a7c9bb0eb9c80ee8                None               NaN   
255  3e5d782a089d4d84bfee96aff5f944c3                None               NaN   

     metrics.precision  metrics.recall  
0         