### Comparing Groq model and Gemini 2.5 pro on UCI Diabetes Dataset
- Let's see


In [23]:
# --- Rest of imports and setup from previous Cell 1 ---
import pandas as pd
import numpy as np
import json
import os
import warnings
from dotenv import load_dotenv

# Scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, roc_auc_score
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Opacus
from opacus import PrivacyEngine
from opacus.validators import ModuleValidator # Optional

In [24]:
# Description: Import libraries, set up constants, initialize/manage results list.

# --- Make sure results_list is either reset or you append with clear names ---
# Option 1: Reset for a fresh run on this dataset
# results_list = []
# Option 2: Keep previous results (make sure Run Type names are distinct)
#print(f"Starting Diabetes run. Current results count: {len(results_list)}")

# --- Constants for Diabetes ---
DATA_FILE_DIABETES = 'diabetic_data.csv' # Assuming this is the filename
TARGET_COLUMN_DIABETES = 'readmitted_binary' # Our new binary target

DATA_FILE_INSURANCE = 'insurance_cleaned.csv' # YOUR FILENAME HERE
TARGET_COLUMN_INSURANCE = 'charges' # Target for regression

# LLM Clients
from groq import Groq
from google import genai
from google.genai import types

# Suppress warnings
warnings.filterwarnings('ignore')
load_dotenv()

# --- Base Training Hyperparameters (can be reused) ---
LEARNING_RATE = 0.01
EPOCHS = 10
BATCH_SIZE = 64
RANDOM_STATE = 42
TEST_SIZE = 0.2

# --- Default DP Parameters (will be recalculated) ---
DEFAULT_TARGET_EPSILON = 1.0
DEFAULT_TARGET_DELTA = 1e-5 # Placeholder, recalculate based on N
DEFAULT_MAX_GRAD_NORM = 1.0

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# --- LLM Clients Initialization (reuse from previous) ---
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
GROQ_MODEL_NAME = os.getenv("GROQ_MODEL_NAME", "llama3-70b-8192")
GEMINI_MODEL_NAME = os.getenv("GEMINI_MODEL_NAME", "gemini-1.5-flash")

groq_client = None
gemini_client = None
# ... (rest of client initialization logic from previous Cell 1) ...
if GROQ_API_KEY:
    try:
        groq_client = Groq(api_key=GROQ_API_KEY)
        print(f"Groq client initialized successfully for model: {GROQ_MODEL_NAME}")
    except Exception as e:
        print(f"Error initializing Groq client: {e}")
else:
    print("Warning: GROQ_API_KEY not found. Groq LLM will not be used.")

if GEMINI_API_KEY:
    try:
        gemini_client = genai.Client(api_key=GEMINI_API_KEY)
        print(f"Gemini client initialized successfully for model: {GEMINI_MODEL_NAME}")
    except Exception as e:
        print(f"Error initializing Gemini client: {e}")
else:
    print("Warning: GEMINI_API_KEY not found. Gemini LLM will not be used.")

# --- LLM Helper Functions (reuse from previous Cell 6) ---
# Includes: create_llm_prompt, get_gemini_config, get_groq_config
# Make sure create_llm_prompt is the latest version including ML context

# --- DP Training Function (reuse from previous Cell 7) ---
# Includes: train_evaluate_dp_model (ensure it handles classification)

# --- Logistic Regression Model Class (reuse from previous Cell 4) ---
class LogisticRegression(nn.Module):
    def __init__(self, n_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_features, 1)
    def forward(self, x):
        return self.linear(x)
print("\nBasic setup complete.")

Using device: cpu
Groq client initialized successfully for model: deepseek-r1-distill-llama-70b
Gemini client initialized successfully for model: gemini-2.5-pro-exp-03-25

Basic setup complete.


In [3]:
groq_models_to_test = [
    "llama3-70b-8192",
    "mixtral-8x7b-32768",
    "gemma-7b-it" # Add other available/suitable models
]

In [8]:
results_list = []


In [11]:
# Description: Define the standard prompt structure and functions to call LLMs.

# # v2
# def create_llm_prompt(task_config, schema_string, data_shape):
#     """Creates a more detailed prompt string for the LLM, guiding parameter choices."""
#     prompt = f"""
# Analyze the provided dataset context and task to recommend **optimized and justified** Differential Privacy (DP) settings for training a Logistic Regression model using DP-SGD.
# The goal is to predict the target variable '{task_config['target_variable']}'.

# **Dataset Context:**
# - Name: {task_config['dataset_name']}
# - Domain: {task_config['data_domain']} (Note: Healthcare data is generally considered sensitive).
# - Task: {task_config['task_description']}
# - Schema (Original Columns): {schema_string}
# - Extra details: {task_config['details']} (Pay close attention to class imbalance).

# **Parameter Guidance - IMPORTANT:** Avoid generic default values. Base your recommendations *specifically* on the context provided above.

# Provide your recommendations ONLY in a structured JSON format. The JSON object must include the following keys:
# - "dp_algorithm": String, the specific DP algorithm variant recommended (e.g., "DP-SGD with Gaussian Noise").
# - "target_epsilon": Float, recommended privacy budget epsilon (e.g., 1.5). Justify this based on sensitivity, utility needs, and domain.
# - "target_delta": Float or String, recommended privacy budget delta (e.g., 1e-5 or suggest calculating as "1/N"). Justify choice.
# - "max_grad_norm": Float, recommended gradient clipping norm (e.g., 1.0). Justify based on model stability and potential gradient explosion, especially considering class imbalance if noted.
# - "preprocessing_suggestions": List of strings, specific preprocessing actions recommended BEFORE applying DP (e.g., "Remove: id", "Normalize: age, avg_glucose_level, bmi").
# - "column_sensitivity_epsilon": A dictionary where keys are original column names and values are *conceptual* relative sensitivity floats (0.0=low, 1.0=high/ID) or labels (Low, Medium, High). This guides understanding, not direct budget split in standard DP-SGD. Exclude the target variable.
# - "reasoning": String, concise reasoning behind the overall recommendations (epsilon, delta, max_grad_norm choices, linking back to context).

# JSON Output ONLY:
# """
#     return prompt

# #v3
# def create_llm_prompt(task_config, schema_string, data_shape):
#     """Creates a more detailed prompt string for the LLM, guiding parameter choices
#        and including general ML best practices context."""

#     # Calculate approximate training size N for context
#     approx_N_train = int(data_shape[0] * (1-TEST_SIZE)) if data_shape else 'Unknown'

#     prompt = f"""
# Analyze the provided dataset context and task to recommend **optimized and justified** Differential Privacy (DP) settings for training a Logistic Regression model using DP-SGD.
# The goal is to predict the target variable '{task_config['target_variable']}'.

# **Dataset Context:**
# - Name: {task_config['dataset_name']}
# - Domain: {task_config['data_domain']} (Note: Healthcare data is generally considered sensitive).
# - Task: {task_config['task_description']}
# - Schema (Original Columns): {schema_string}
# - Data Shape: {data_shape} (Approx. Training N = {approx_N_train})
# - Extra details: {task_config['details']} (Pay close attention to class imbalance).

# **General ML Best Practices Context (Keep these in mind):**
# - **Normalization/Scaling:** Features with different scales (like 'age' vs 'avg_glucose_level') MUST be normalized or standardized (e.g., StandardScaler, MinMaxScaler) for models like Logistic Regression and especially before applying gradient clipping in DP-SGD. This ensures stable gradient computations. Apply scaling AFTER splitting data and ideally AFTER imputation if applicable.
# - **Gradient Stability & Clipping:** DP-SGD uses gradient clipping (`max_grad_norm`) to bound the influence of any single data point. Choosing the norm value is a trade-off:
#     - Too low: Clips potentially useful gradient information, slowing learning or preventing convergence, especially for minority classes or complex patterns.
#     - Too high: Less protection against outliers, potentially higher noise required for the same privacy budget (ε).
#     - Imbalanced Data Impact: Gradients from rare class samples might be infrequent but large; aggressive clipping can disproportionately affect learning for that class.
# - **Imbalanced Data Handling:** Beyond class weighting in the loss (which is assumed here), model evaluation should focus on metrics like F1-score, Precision, Recall for the minority class, not just accuracy. The goal is often to improve detection of the rare class.
# - **Preprocessing Order:** Typically: Split Data -> Impute Missing -> Encode Categorical -> Scale Numerical -> Train Model. DP is applied during the training step.

# **Parameter Guidance - IMPORTANT:** Based on the Dataset Context AND the ML Best Practices above, provide specific, justified recommendations. Avoid generic defaults.

# 1.  **`target_epsilon`**: Balance '{task_config['data_domain']}' sensitivity vs. utility needed for training. Justify the specific trade-off. (Range 1.0-5.0 often considered, but justify *your* choice).
# 2.  **`target_delta`**: Recommend a specific small value (e.g., 1e-5, 1e-6) or suggest "1/N". Justify why (e.g., related to approx N={approx_N_train}).
# 3.  **`max_grad_norm`**: **Connect this directly to the Gradient Stability & Imbalanced Data points above.** Given the heavy imbalance, suggest a value (e.g., range 5.0 - 15.0, or a specific reasoned value) likely higher than a generic default (like 1.0) to preserve minority class signals. Justify based *explicitly* on imbalance and the need for stable yet informative gradients.
# 4.  **`column_sensitivity_epsilon`**: Conceptual relative sensitivity hints (0.0 low, 1.0 high). Reflect potential identifiability/sensitivity based on domain/name. Exclude target.
# 5.  **`reasoning`**: Concise but detailed justification for epsilon, delta, and max_grad_norm, *explicitly linking* choices to dataset context (domain, N, imbalance) and the relevant ML best practices mentioned (normalization, gradient stability).

# **Output Format:**
# Provide recommendations ONLY in a structured JSON format with keys: "dp_algorithm", "target_epsilon", "target_delta", "max_grad_norm", "preprocessing_suggestions", "column_sensitivity_epsilon", "reasoning".

# JSON Output ONLY:
# """
#     return prompt

# Description: Define the standard prompt structure and functions to call LLMs.

def create_llm_prompt(task_config, schema_string, data_shape, task_type="classification"): # Add task_type
    """Creates a more detailed prompt string for the LLM, guiding parameter choices
       and including general ML best practices context. Adapts for task_type."""

    approx_N_train = int(data_shape[0] * (1-TEST_SIZE)) if data_shape else 'Unknown'

    # Adjust parts of the prompt based on task_type
    if task_type == "regression":
        imbalance_guidance = "" # No class imbalance for regression
        target_guidance_metrics = "metrics like Mean Absolute Error (MAE), Mean Squared Error (MSE), or R2 Score. The goal is often to minimize error."
        max_grad_norm_focus = "focus on overall gradient stability, considering the range and scale of the target variable values, rather than specific class signals. A moderate value (e.g., 1.0-10.0, depending on target scale and feature normalization) is common."
        epsilon_utility_focus = "utility needs for accurate predictions (e.g., low MAE/MSE)"
        model_type_in_prompt = "Linear Regression" # Or generic "Regression Model"
    else: # classification (default)
        imbalance_guidance = "(Pay close attention to class imbalance if mentioned in 'Extra details')."
        target_guidance_metrics = "metrics like F1-score, Precision, Recall for the minority class, not just accuracy. The goal is often to improve detection of the rare class."
        max_grad_norm_focus = "**Connect this directly to the Gradient Stability & Imbalanced Data points above.** Given potential class imbalance (see 'Extra details'), suggest a value (e.g., range 5.0 - 15.0, or a specific reasoned value) likely higher than a generic default (like 1.0) to preserve minority class signals. Justify based *explicitly* on imbalance and the need for stable yet informative gradients."
        epsilon_utility_focus = "utility needs for model training (which often requires sufficient signal)"
        model_type_in_prompt = "Logistic Regression"


    prompt = f"""
Analyze the provided dataset context and task to recommend **optimized and justified** Differential Privacy (DP) settings for training a {model_type_in_prompt} model using DP-SGD.
The goal is to predict the target variable '{task_config['target_variable']}'.

**Dataset Context:**
- Name: {task_config['dataset_name']}
- Domain: {task_config['data_domain']}
- Task: {task_config['task_description']}
- Schema (Original Columns): {schema_string}
- Data Shape: {data_shape} (Approx. Training N = {approx_N_train})
- Extra details: {task_config['details']} {imbalance_guidance}

**General ML Best Practices Context (Keep these in mind):**
- **Normalization/Scaling:** Features MUST be normalized or standardized for models like {model_type_in_prompt} and especially before applying gradient clipping in DP-SGD.
- **Gradient Stability & Clipping (`max_grad_norm`):** Bound influence of single points. Trade-off:
    - Too low: Clips useful info, hinders learning.
    - Too high: Less protection, more noise needed.
    - { "Imbalanced Data Impact: Gradients from rare class samples might be infrequent but large; aggressive clipping can disproportionately affect learning for that class." if task_type=="classification" else "For regression, consider the scale of the target variable when thinking about gradient magnitudes."}
- **Evaluation Focus ({task_type}):** Model evaluation should focus on {target_guidance_metrics}
- **Preprocessing Order:** Typically: Split Data -> Impute -> Encode -> Scale -> Train.

**Parameter Guidance - IMPORTANT:** Base recommendations *specifically* on context. Avoid generic defaults.

1.  **`target_epsilon`**: Balance '{task_config['data_domain']}' sensitivity vs. {epsilon_utility_focus}. Justify. (Range 1.0-5.0 often considered, but justify *your* choice).
2.  **`target_delta`**: Recommend small value (e.g., 1e-5) or "1/N". Justify (e.g., N={approx_N_train}).
3.  **`max_grad_norm`**: For this {task_type} task, {max_grad_norm_focus} Justify.
4.  **`column_sensitivity_epsilon`**: Conceptual relative sensitivity hints (0.0 low, 1.0 high). Exclude target.
5.  **`reasoning`**: Detailed justification for epsilon, delta, `max_grad_norm`, linking to dataset context (domain, N, imbalance/target scale) and ML practices.

**Output Format:**
JSON ONLY with keys: "dp_algorithm", "target_epsilon", "target_delta", "max_grad_norm", "preprocessing_suggestions", "column_sensitivity_epsilon", "reasoning".

JSON Output ONLY:
"""
    return prompt



def get_gemini_config(prompt, client):
    """Gets DP config from Gemini API."""
    if not client:
        print("Gemini client not available.")
        return None
    print("\nSending request to Gemini...")
    try:
        response = client.models.generate_content(
            model=GEMINI_MODEL_NAME, contents=prompt
            )
        response_text = response.text
        print("Gemini Response Received.")
        # Extract JSON part
        start_index = response_text.find('{')
        end_index = response_text.rfind('}')
        if start_index != -1 and end_index != -1:
            json_string_only = response_text[start_index : end_index + 1]
            config = json.loads(json_string_only)
            print("Successfully parsed Gemini config.")
            print(config)
            # Basic validation
            required_keys = ["dp_algorithm", "target_epsilon", "target_delta", "max_grad_norm", "preprocessing_suggestions", "column_sensitivity_epsilon", "reasoning"]
            if not all(key in config for key in required_keys):
                print("Warning: Gemini response missing some required keys.")
            return config
        else:
            print("Error: Could not find JSON object in Gemini response.")
            print("Raw Response:", response_text)
            return None
    except Exception as e:
        print(f"Error during Gemini API call or parsing: {e}")
        try:
            print("Gemini Response Content (if available):", response.candidates) # Might show safety blocks
        except: pass
        return None

def get_groq_config(prompt, client, model_name):
    """Gets DP config from Groq API."""
    if not client:
        print("Groq client not available.")
        return None
    print("\nSending request to Groq...")
    try:
        chat_completion = client.chat.completions.create(
            messages=[{"role": "user", "content": prompt}],
            model=model_name,
            temperature=0.2,
            max_tokens=3024, 
            top_p=0.8,
            response_format={"type": "json_object"},
        )
        response_content = chat_completion.choices[0].message.content
        print("Groq Response Received.")
        config = json.loads(response_content)
        print("Successfully parsed Groq config.")
        print(config)

        # Basic validation
        required_keys = ["dp_algorithm", "target_epsilon", "target_delta", "max_grad_norm", "preprocessing_suggestions", "column_sensitivity_epsilon", "reasoning"]
        if not all(key in config for key in required_keys):
            print("Warning: Groq response missing some required keys.")
        return config
    except Exception as e:
        print(f"Error during Groq API call or parsing: {e}")
        return None

print("LLM Helper functions defined.")

LLM Helper functions defined.


In [12]:
# Description: Function to train and evaluate a DP model using Opacus, for classification or regression.

def train_evaluate_dp_model(
    config, run_name, train_loader, test_loader,
    n_features, device, epochs, learning_rate,
    task_type="classification", # New parameter
    pos_weight_tensor=None, # Only for classification
    target_metric_prefix="Class 1" # For naming metrics in results
):
    """Trains and evaluates DP model, returns results dictionary."""
    print(f"\n--- Running: {run_name} (Task: {task_type}) ---")
    if config is None: # ... (unchanged null check) ...
        print("Skipping run due to missing configuration.")
        return None

    # ... (config key extraction, delta calculation - unchanged) ...
    target_eps = config.get("target_epsilon", DEFAULT_TARGET_EPSILON)
    target_del_config = config.get("target_delta", "1/N")
    max_norm = config.get("max_grad_norm", DEFAULT_MAX_GRAD_NORM)
    llm_reasoning = config.get("reasoning", "N/A")
    llm_eps_suggestion = config.get("target_epsilon", "N/A (Default used)")

    if isinstance(target_del_config, str) and "1/N" in target_del_config:
        actual_delta = 1 / len(train_loader.dataset)
    elif isinstance(target_del_config, (int, float)):
        actual_delta = target_del_config
    else:
        actual_delta = 1 / len(train_loader.dataset)


    dp_model = LogisticRegression(n_features).to(device) # Model class is same
    dp_optimizer = optim.SGD(dp_model.parameters(), lr=learning_rate)
    privacy_engine = PrivacyEngine()
    try:
        dp_model, dp_optimizer, dp_data_loader = privacy_engine.make_private_with_epsilon(
            module=dp_model, optimizer=dp_optimizer, data_loader=train_loader,
            max_grad_norm=max_norm, target_epsilon=target_eps, target_delta=actual_delta, epochs=epochs
        )
        print(f"Opacus Attached. Target ε={target_eps:.2f}, Target δ={actual_delta:.2e}, Max Grad Norm={max_norm}")
    except Exception as e:
        print(f"Error attaching Opacus PrivacyEngine: {e}")
        return None

    # --- Set Loss Function based on task_type ---
    if task_type == "classification":
        criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight_tensor)
    elif task_type == "regression":
        criterion = nn.MSELoss()
    else:
        raise ValueError("Invalid task_type. Must be 'classification' or 'regression'.")
    # ------------------------------------------

    print(f"Training DP Model ({task_type})...")
    dp_model.train()
    for epoch in range(epochs): # ... (training loop - unchanged core logic) ...
        epoch_loss_dp = 0.0
        for batch_X, batch_y in dp_data_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            dp_optimizer.zero_grad()
            outputs = dp_model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            dp_optimizer.step()
            epoch_loss_dp += loss.item()
        # print(f"Epoch {epoch+1} Loss: {epoch_loss_dp / len(dp_data_loader):.4f}")


    final_epsilon = privacy_engine.get_epsilon(delta=actual_delta) # ... (unchanged) ...
    print(f"DP Training Complete. Final ε = {final_epsilon:.4f}")

    # --- Evaluation based on task_type ---
    print(f"Evaluating DP Model ({task_type})...")
    dp_model.eval()
    all_preds_dp = []
    all_targets_dp = []
    with torch.no_grad():
        for batch_X, batch_y in test_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            outputs = dp_model(batch_X)
            if task_type == "classification":
                preds = torch.round(torch.sigmoid(outputs))
            else: # regression
                preds = outputs # Direct output
            all_preds_dp.extend(preds.cpu().numpy().flatten())
            all_targets_dp.extend(batch_y.cpu().numpy().flatten())

    # Initialize results dictionary
    results = {
        "Run Type": run_name,
        "LLM Used": config.get("llm_model_name", "N/A"),
        "Target Epsilon": target_eps, "Final Epsilon": final_epsilon,
        "Target Delta": actual_delta, "Max Grad Norm": max_norm,
        "LLM Epsilon Suggestion": llm_eps_suggestion, "LLM Reasoning": llm_reasoning
    }

    if task_type == "classification":
        accuracy_dp = accuracy_score(all_targets_dp, all_preds_dp)
        precision_dp = precision_score(all_targets_dp, all_preds_dp, pos_label=1, zero_division=0)
        recall_dp = recall_score(all_targets_dp, all_preds_dp, pos_label=1, zero_division=0)
        f1_dp = f1_score(all_targets_dp, all_preds_dp, pos_label=1, zero_division=0)
        print(f"Accuracy: {accuracy_dp:.4f}, Precision ({target_metric_prefix}): {precision_dp:.4f}, Recall ({target_metric_prefix}): {recall_dp:.4f}, F1 ({target_metric_prefix}): {f1_dp:.4f}")
        results.update({
            "Accuracy": accuracy_dp,
            f"Precision ({target_metric_prefix})": precision_dp,
            f"Recall ({target_metric_prefix})": recall_dp,
            f"F1 ({target_metric_prefix})": f1_dp
        })
    elif task_type == "regression":
        mae_dp = mean_absolute_error(all_targets_dp, all_preds_dp)
        mse_dp = mean_squared_error(all_targets_dp, all_preds_dp)
        r2_dp = r2_score(all_targets_dp, all_preds_dp)
        print(f"MAE: {mae_dp:.2f}, MSE: {mse_dp:.2f}, R2 Score: {r2_dp:.4f}")
        results.update({"MAE": mae_dp, "MSE": mse_dp, "R2 Score": r2_dp})
    # ----------------------------------

    print(f"{run_name} results recorded.")
    return results

print("DP Training/Evaluation function (with task_type) defined/updated.")

DP Training/Evaluation function (with task_type) defined/updated.


In [13]:
# Description: Define task details for Insurance dataset (Regression) and generate prompt.

print("\n--- Preparing Config and Prompt for Insurance Regression ---")
if df_insurance is not None and 'original_columns_insurance' in locals():
    # Regression details
    regression_details = f"Predict continuous medical charges ('{TARGET_COLUMN_INSURANCE}'). Focus on minimizing MAE/MSE. Target variable likely has a skewed distribution."

    task_config_insurance = {
        "dataset_name": "Kaggle Medical Cost Personal",
        "data_domain": "Healthcare/Finance",
        "task_description": f"Train a Linear Regression model using DP-SGD to predict medical charges ('{TARGET_COLUMN_INSURANCE}').",
        "target_variable": TARGET_COLUMN_INSURANCE,
        "model_type": "Linear Regression", # For LLM context
        "dp_mechanism_family": "DP-SGD",
        "details": regression_details
    }

    schema_string_insurance = ", ".join(original_columns_insurance)
    data_shape_tuple_insurance = df_insurance.shape

    # Call create_llm_prompt with task_type="regression"
    llm_prompt_insurance = create_llm_prompt(
        task_config_insurance,
        schema_string_insurance,
        data_shape_tuple_insurance,
        task_type="regression" # Specify task type
    )
    print("Insurance Regression task config and LLM prompt prepared.")
    # Optional: print prompt
    # print("\n--- Insurance Regression LLM Prompt ---")
    # print(llm_prompt_insurance)
else:
    print("Skipping Insurance Regression prompt creation due to data loading error.")
    llm_prompt_insurance = None


--- Preparing Config and Prompt for Insurance Regression ---
Insurance Regression task config and LLM prompt prepared.


In [14]:
# Description: Execute Fixed DP run for Insurance Regression.

print("\n--- Running: Insurance Fixed DP Regression ---")
if n_features_ins is not None and train_loader_ins is not None and 'DEFAULT_TARGET_DELTA_INS' in locals():
    fixed_dp_config_ins_reg = {
        "dp_algorithm": "DP-SGD with Gaussian Noise (Fixed)",
        "target_epsilon": DEFAULT_TARGET_EPSILON,
        "target_delta": DEFAULT_TARGET_DELTA_INS,
        "max_grad_norm": DEFAULT_MAX_GRAD_NORM, # May need adjustment for regression
        "reasoning": "Using standard fixed parameters for Insurance Regression.",
        "llm_model_name": "N/A (Fixed Defaults)",
        "preprocessing_suggestions": ["Default"],
        "column_sensitivity_epsilon": {"Info": "Fixed parameters"}
    }
    results_fixed_ins_reg = train_evaluate_dp_model(
        fixed_dp_config_ins_reg,
        "Insurance Fixed DP Regression",
        train_loader_ins, test_loader_ins, n_features_ins, device,
        EPOCHS, LEARNING_RATE,
        task_type="regression" # Specify task type
    )
    if results_fixed_ins_reg:
        results_list.append(results_fixed_ins_reg)
else:
    print("Skipping Insurance Fixed DP Regression due to missing components.")


--- Running: Insurance Fixed DP Regression ---

--- Running: Insurance Fixed DP Regression (Task: regression) ---
Opacus Attached. Target ε=1.00, Target δ=9.35e-04, Max Grad Norm=1.0
Training DP Model (regression)...
DP Training Complete. Final ε = 0.9917
Evaluating DP Model (regression)...
MAE: 12967.19, MSE: 323394481.00, R2 Score: -1.0831
Insurance Fixed DP Regression results recorded.


In [15]:
# Description: Get config from Gemini for Insurance Regression and run.

print("\n--- Running: Insurance Gemini DP Regression ---")
if gemini_client and llm_prompt_insurance and n_features_ins is not None and train_loader_ins is not None:
    gemini_config_ins_reg = get_gemini_config(llm_prompt_insurance, gemini_client)
    if gemini_config_ins_reg:
        gemini_config_ins_reg["llm_model_name"] = GEMINI_MODEL_NAME
        results_gemini_ins_reg = train_evaluate_dp_model(
            gemini_config_ins_reg,
            "Insurance Gemini DP Regression",
            train_loader_ins, test_loader_ins, n_features_ins, device,
            EPOCHS, LEARNING_RATE,
            task_type="regression"
        )
        if results_gemini_ins_reg:
            results_list.append(results_gemini_ins_reg)
    else:
        print("Failed to get config from Gemini for Insurance Regression.")
elif not gemini_client:
    print("Skipping Insurance Gemini run - client not initialized.")
else:
    print("Skipping Insurance Gemini run - missing components.")


--- Running: Insurance Gemini DP Regression ---

Sending request to Gemini...
Gemini Response Received.
Successfully parsed Gemini config.
{'dp_algorithm': 'DP-SGD', 'target_epsilon': 3.0, 'target_delta': 0.0009345794392523364, 'max_grad_norm': 1.0, 'preprocessing_suggestions': ['Split data into training and testing sets first.', 'Handle missing values if any (e.g., mean/median imputation for numerical, mode for categorical).', "Encode categorical features: 'sex' and 'smoker' can be binary encoded (0/1). 'region' should be One-Hot Encoded.", "Critically, transform the target variable 'charges' due to its likely skewed distribution and large range. Apply a log transformation (e.g., `numpy.log1p`) first, then standardize the transformed target (e.g., using `sklearn.preprocessing.StandardScaler`). This is crucial for stabilizing gradients and improving model performance.", "Scale numerical features ('age', 'bmi', 'children') using `sklearn.preprocessing.StandardScaler` after transforming

In [18]:
# Description: Get config from Groq for Insurance Regression and run.

print("\n--- Running: Insurance Groq DP Regression ---")
if groq_client and llm_prompt_insurance and n_features_ins is not None and train_loader_ins is not None:
    llama_config_ins_reg = get_groq_config(llm_prompt_insurance, groq_client, GROQ_MODEL_NAME)
    if llama_config_ins_reg:
        llama_config_ins_reg["llm_model_name"] = GROQ_MODEL_NAME
        results_llama_ins_reg = train_evaluate_dp_model(
            llama_config_ins_reg,
            f"Insurance Groq ({GROQ_MODEL_NAME}) DP Regression",
            train_loader_ins, test_loader_ins, n_features_ins, device,
            EPOCHS, LEARNING_RATE,
            task_type="regression"
        )
        if results_llama_ins_reg:
            results_list.append(results_llama_ins_reg)
    else:
        print("Failed to get config from Groq for Insurance Regression.")
elif not groq_client:
    print("Skipping Insurance Groq run - client not initialized.")
else:
    print("Skipping Insurance Groq run - missing components.")


--- Running: Insurance Groq DP Regression ---

Sending request to Groq...
Groq Response Received.
Successfully parsed Groq config.
{'dp_algorithm': 'DP-SGD', 'target_epsilon': 2.0, 'target_delta': 1e-05, 'max_grad_norm': 1.0, 'preprocessing_suggestions': ['Normalize features using StandardScaler to ensure features are on a similar scale.', 'Encode categorical variables (sex, smoker, region, children) using OneHotEncoder.'], 'column_sensitivity_epsilon': {'age': 0.8, 'sex': 0.2, 'bmi': 0.8, 'children': 0.2, 'smoker': 0.2, 'region': 0.2}, 'reasoning': 'The target_epsilon of 2.0 balances privacy and utility, suitable for healthcare/finance data. Target_delta is set to 1e-5 to ensure strong privacy guarantees. max_grad_norm of 1.0 is chosen to stabilize gradients without excessive clipping. Column sensitivity values reflect the relative importance and privacy concerns of each feature.'}

--- Running: Insurance Groq (deepseek-r1-distill-llama-70b) DP Regression (Task: regression) ---
Opacu

In [19]:
# Description: Show the results from ALL runs in a table.

if results_list:
    results_df = pd.DataFrame(results_list)
    # Define desired column order, including ALL possible metrics
    # Order them logically
    cols_order = [
        "Run Type", "LLM Used",
        "Target Epsilon", "Final Epsilon", "Target Delta", "Max Grad Norm",
        "LLM Epsilon Suggestion", # Moved up for easier comparison with Target Epsilon
        # Classification Metrics
        "Accuracy", "Precision (Stroke)", "Recall (Stroke)", "F1 (Stroke)",
        "Precision (Readmit)", "Recall (Readmit)", "F1 (Readmit)",
        # Regression Metrics
        "MAE", "MSE", "R2 Score",
        # LLM Reasoning last as it can be long
        "LLM Reasoning"
    ]
    # Ensure all expected columns exist, add if missing (fill with N/A or np.nan)
    for col in cols_order:
        if col not in results_df.columns:
            results_df[col] = np.nan # Use np.nan for numerical/potentially numerical

    results_df = results_df[cols_order]
    # Fill NaNs that might have occurred if some runs failed or metrics weren't applicable
    results_df.fillna("N/A", inplace=True) # Replace np.nan with "N/A" string for display

    pd.set_option('display.max_colwidth', 100)
    pd.set_option('display.width', 1200) # Wider for more columns
    print("\n--- Combined Experiment Results (Including Insurance Regression) ---")
    display(results_df)
else:
    print("No results recorded yet.")


--- Combined Experiment Results (Including Insurance Regression) ---


Unnamed: 0,Run Type,LLM Used,Target Epsilon,Final Epsilon,Target Delta,Max Grad Norm,LLM Epsilon Suggestion,Accuracy,Precision (Stroke),Recall (Stroke),F1 (Stroke),Precision (Readmit),Recall (Readmit),F1 (Readmit),MAE,MSE,R2 Score,LLM Reasoning
0,Insurance Non-DP Regression,,,,,,,,,,,,,,6062.432718,66037190.0,0.574636,
1,Insurance Fixed DP Regression,N/A (Fixed Defaults),1.0,0.991737,0.000935,1.0,1.0,,,,,,,,12967.187698,323394500.0,-1.083073,Using standard fixed parameters for Insurance Regression.
2,Insurance Gemini DP Regression,gemini-2.5-pro-exp-03-25,3.0,2.991402,0.000935,1.0,3.0,,,,,,,,12966.904507,323384400.0,-1.083008,The recommended DP settings are tailored for training a Linear Regression model on the Kaggle Me...
3,Insurance Groq (deepseek-r1-distill-llama-70b) DP Regression,deepseek-r1-distill-llama-70b,2.0,1.996596,1e-05,1.0,2.0,,,,,,,,12967.054016,323394100.0,-1.083071,"The target_epsilon of 2.0 balances privacy and utility, suitable for healthcare/finance data. Ta..."


In [20]:
results_df.to_csv('regression_v1.csv', index_label='SrNo')

In [43]:
# Description: Show the results from all runs in a table.

if results_list:
    results_df = pd.DataFrame(results_list)
    # Reorder columns for clarity
    cols_order = [
        "Run Type", "LLM Used", "Target Epsilon", "Final Epsilon", "Target Delta",
        "Max Grad Norm", "Accuracy", "Precision (Stroke)", "Recall (Stroke)", "F1 (Stroke)",
        "LLM Epsilon Suggestion", "LLM Reasoning"
    ]
    # Ensure all expected columns exist, add if missing
    for col in cols_order:
        if col not in results_df.columns:
            results_df[col] = "N/A"

    results_df = results_df[cols_order]
    pd.set_option('display.max_colwidth', 100) # Show more of the reasoning column
    pd.set_option('display.width', 1000) # Adjust display width
    print("\n--- Combined Experiment Results ---")
    display(results_df)
else:
    print("No results recorded yet.")


--- Combined Experiment Results ---


Unnamed: 0,Run Type,LLM Used,Target Epsilon,Final Epsilon,Target Delta,Max Grad Norm,Accuracy,Precision (Stroke),Recall (Stroke),F1 (Stroke),LLM Epsilon Suggestion,LLM Reasoning
0,Diabetes Non-DP SGD,,,,,,0.606169,,,,,
1,Diabetes Gemini DP SGD,gemini-2.5-pro-exp-03-25,3.0,3.004022,1e-05,1.5,0.595428,,,,3.0,Recommendations are tailored for DP-SGD training of a Logistic Regression model on the UCI Diabe...
2,Groq (deepseek-r1-distill-llama-70b) DP SGD,deepseek-r1-distill-llama-70b,2.0,1.997348,1e-05,10.0,0.595153,0.616633,0.502479,0.553734,2.0,"The target_epsilon of 2.0 balances privacy and utility, considering the healthcare domain's sens..."
3,Diabetes Gemini DP SGD,gemini-2.5-pro-exp-03-25,3.0,3.004022,1e-05,7.5,0.608923,0.628164,0.533333,0.576877,3.0,The recommended DP settings balance the sensitivity of healthcare data with the need for model u...
4,Diabetes Non-DP SGD,,,,,,0.604792,0.628726,0.511295,0.563962,,


In [45]:
results_df.drop(0, axis=0, inplace=True)
results_df.drop(1, axis=0, inplace=True)
results_df

Unnamed: 0,Run Type,LLM Used,Target Epsilon,Final Epsilon,Target Delta,Max Grad Norm,Accuracy,Precision (Stroke),Recall (Stroke),F1 (Stroke),LLM Epsilon Suggestion,LLM Reasoning
2,Groq (deepseek-r1-distill-llama-70b) DP SGD,deepseek-r1-distill-llama-70b,2.0,1.997348,1e-05,10.0,0.595153,0.616633,0.502479,0.553734,2.0,"The target_epsilon of 2.0 balances privacy and utility, considering the healthcare domain's sens..."
3,Diabetes Gemini DP SGD,gemini-2.5-pro-exp-03-25,3.0,3.004022,1e-05,7.5,0.608923,0.628164,0.533333,0.576877,3.0,The recommended DP settings balance the sensitivity of healthcare data with the need for model u...
4,Diabetes Non-DP SGD,,,,,,0.604792,0.628726,0.511295,0.563962,,


In [46]:
results_df.to_csv('results2.csv', index=False)