In [1]:
import pandas as pd
import numpy as np
import json
import os
import warnings

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from opacus import PrivacyEngine
from opacus.validators import ModuleValidator

from groq import Groq
from google import genai
from google.genai import types
from dotenv import load_dotenv



In [2]:
load_dotenv()
warnings.filterwarnings('ignore')

LEARNING_RATE = 0.01
EPOCHS = 10 
BATCH_SIZE = 64

DEFAULT_TARGET_EPSILON = 1.0
DEFAULT_TARGET_DELTA = 1e-5 
DEFAULT_MAX_GRAD_NORM = 1.0

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")



Using device: cpu


In [3]:
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GROQ_MODEL_NAME = os.getenv("GROQ_MODEL_NAME", "llama3-70b-8192")

client = None 
if not GROQ_API_KEY:
    print("Error: GROQ_API_KEY not found in .env file or environment variables.")
    print("Please ensure a .env file exists with GROQ_API_KEY='your_key_here'")
else:
    try:
        client = Groq(api_key=GROQ_API_KEY)
        print(f"Groq client initialized successfully for model: {GROQ_MODEL_NAME}")
    except Exception as e:
        print(f"Error initializing Groq client: {e}")

Groq client initialized successfully for model: deepseek-r1-distill-llama-70b


In [None]:
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
GEMINI_MODEL_NAME = os.getenv("GEMINI_MODEL_NAME", "llama3-70b-8192")

client = None 
if not GEMINI_API_KEY:
    print("Error: GEMINI_API_KEY not found in .env file or environment variables.")
    print("Please ensure a .env file exists with GEMINI_API_KEY='your_key_here'")
else:
    try:
        client = genai.Client(api_key=GEMINI_API_KEY)
        print(f"Gemini client initialized successfully for model: {GEMINI_MODEL_NAME}")
    except Exception as e:
        print(f"Error initializing Gemini client: {e}")

Gemini client initialized successfully for model: gemini-2.5-pro-exp-03-25


In [5]:
DATA_FILE = 'cleaned_healthcare_stroke.csv' 
TARGET_COLUMN = 'stroke'
RANDOM_STATE = 42
TEST_SIZE = 0.2

try:
    df = pd.read_csv(DATA_FILE)
    print("Dataset loaded successfully.")
    print("Dataset shape:", df.shape)
    print("First 5 rows:")
    print(df.head())
    print("\nDataset Info:")
    df.info()
except FileNotFoundError:
    print(f"Error: File not found at {DATA_FILE}. Please ensure the cleaned data file exists.")
    df = None 

if df is not None:
    original_columns = df.columns.tolist()
    print("\nOriginal Columns:", original_columns)
else:
    original_columns = []

Dataset loaded successfully.
Dataset shape: (5110, 12)
First 5 rows:
   Unnamed: 0  gender   age  hypertension  heart_disease ever_married  \
0           0    Male  67.0             0              1          Yes   
1           1  Female  61.0             0              0          Yes   
2           2    Male  80.0             0              1          Yes   
3           3  Female  49.0             0              0          Yes   
4           4  Female  79.0             1              0          Yes   

       work_type Residence_type  avg_glucose_level   bmi   smoking_status  \
0        Private          Urban             228.69  36.6  formerly smoked   
1  Self-employed          Rural             202.21  28.1     never smoked   
2        Private          Rural             105.92  32.5     never smoked   
3        Private          Urban             171.23  34.4           smokes   
4  Self-employed          Rural             174.12  24.0     never smoked   

   stroke  
0       1  
1    

In [6]:
df.head()

Unnamed: 0.1,Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,0,Male,67.0,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
1,1,Female,61.0,0,0,Yes,Self-employed,Rural,202.21,28.1,never smoked,1
2,2,Male,80.0,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
3,3,Female,49.0,0,0,Yes,Private,Urban,171.23,34.4,smokes,1
4,4,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1


In [7]:
df['stroke'].value_counts()

stroke
0    4861
1     249
Name: count, dtype: int64

In [8]:
df = df.drop('Unnamed: 0', axis=1)

#### Data preprocessing for modelling

In [9]:
X = df.drop(TARGET_COLUMN, axis=1)
y = df[TARGET_COLUMN]

categorical_features = X.select_dtypes(include=['object', 'category']).columns.tolist()

potential_numerical_features = ['age', 'avg_glucose_level', 'bmi']
numerical_features = [col for col in potential_numerical_features if col in X.columns and pd.api.types.is_numeric_dtype(X[col])]

print(f"\nIdentified Categorical Features: {categorical_features}")
print(f"Identified Numerical Features: {numerical_features}")

numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore', drop='first') 

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ],
    remainder='passthrough'
)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=y
)

X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

feature_names = preprocessor.get_feature_names_out()
n_features = X_train_processed.shape[1]
print(f"\nNumber of features after preprocessing: {n_features}")

X_train_tensor = torch.tensor(X_train_processed.astype(np.float32)).to(device)
y_train_tensor = torch.tensor(y_train.values.astype(np.float32)).unsqueeze(1).to(device) 
X_test_tensor = torch.tensor(X_test_processed.astype(np.float32)).to(device)
y_test_tensor = torch.tensor(y_test.values.astype(np.float32)).unsqueeze(1).to(device)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print("\nData preprocessing and splitting complete.")
print(f"Training set size: {len(X_train_tensor)}")
print(f"Test set size: {len(X_test_tensor)}")


Identified Categorical Features: ['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status']
Identified Numerical Features: ['age', 'avg_glucose_level', 'bmi']

Number of features after preprocessing: 15

Data preprocessing and splitting complete.
Training set size: 4088
Test set size: 1022


In [10]:
if 'y_train' in locals():
    neg_count = (y_train == 0).sum()
    pos_count = (y_train == 1).sum()

    if pos_count > 0:
        pos_weight_value = neg_count / pos_count
        print(f"\nCalculated pos_weight for minority class (stroke=1): {pos_weight_value:.2f}")
        pos_weight_tensor = torch.tensor([pos_weight_value], dtype=torch.float32).to(device)
    else:
        print("Warning: No positive samples found in training data. Cannot calculate pos_weight.")
        pos_weight_tensor = None 
else:
    pos_weight_tensor = None


Calculated pos_weight for minority class (stroke=1): 19.54


### Logistic regression train Without DP

In [11]:

class LogisticRegression(nn.Module):
    def __init__(self, n_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_features, 1)

    def forward(self, x):
        return self.linear(x)

if 'n_features' in locals():
    model_non_dp = LogisticRegression(n_features).to(device)
    print("\nLogistic Regression model defined:")
    print(model_non_dp)
else:
    print("Cannot instantiate model - n_features not defined due to earlier error.")
    model_non_dp = None


Logistic Regression model defined:
LogisticRegression(
  (linear): Linear(in_features=15, out_features=1, bias=True)
)


In [12]:

criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight_tensor)
optimizer_non_dp = optim.SGD(model_non_dp.parameters(), lr=LEARNING_RATE)

print("\n--- Training Standard Logistic Regression (SGD) ---")
model_non_dp.train()
for epoch in range(EPOCHS):
    epoch_loss = 0.0
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)

        optimizer_non_dp.zero_grad()

        outputs = model_non_dp(batch_X)

        loss = criterion(outputs, batch_y)

        loss.backward()

        optimizer_non_dp.step()

        epoch_loss += loss.item()

    avg_epoch_loss = epoch_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{EPOCHS}], Loss: {avg_epoch_loss:.4f}")

print("--- Standard Training Complete ---")



--- Training Standard Logistic Regression (SGD) ---
Epoch [1/10], Loss: 1.2298
Epoch [2/10], Loss: 1.1303
Epoch [3/10], Loss: 1.0696
Epoch [4/10], Loss: 1.0313
Epoch [5/10], Loss: 1.0050
Epoch [6/10], Loss: 0.9845
Epoch [7/10], Loss: 0.9690
Epoch [8/10], Loss: 0.9581
Epoch [9/10], Loss: 0.9497
Epoch [10/10], Loss: 0.9425
--- Standard Training Complete ---


In [13]:
print("\n--- Evaluating Standard Model ---")
model_non_dp.eval()
all_preds_non_dp = []
all_targets_non_dp = []

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = model_non_dp(batch_X)
        preds = torch.round(torch.sigmoid(outputs))
        all_preds_non_dp.extend(preds.cpu().numpy())
        all_targets_non_dp.extend(batch_y.cpu().numpy())
accuracy = accuracy_score(all_targets_non_dp, all_preds_non_dp)
precision = precision_score(all_targets_non_dp, all_preds_non_dp, zero_division=0)
recall = recall_score(all_targets_non_dp, all_preds_non_dp, zero_division=0)
f1 = f1_score(all_targets_non_dp, all_preds_non_dp, zero_division=0)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print("\nClassification Report:")
print(classification_report(all_targets_non_dp, all_preds_non_dp, zero_division=0))



--- Evaluating Standard Model ---
Accuracy: 0.7250
Precision: 0.1333
Recall: 0.8400
F1-Score: 0.2301

Classification Report:
              precision    recall  f1-score   support

         0.0       0.99      0.72      0.83       972
         1.0       0.13      0.84      0.23        50

    accuracy                           0.73      1022
   macro avg       0.56      0.78      0.53      1022
weighted avg       0.95      0.73      0.80      1022



### LLM output testing

In [14]:
df['stroke'].value_counts()

stroke
0    4861
1     249
Name: count, dtype: int64

In [15]:

print("\n--- Requesting DP Configuration from LLM ---")

task_config = {
    "dataset_name": "Kaggle Stroke Prediction",
    "data_domain": "Healthcare",
    "task_description": "Train a binary classification model (Logistic Regression using DP-SGD) to predict stroke occurrence.",
    "target_variable": TARGET_COLUMN,
    "model_type": "Logistic Regression",
    "dp_mechanism_family": "DP-SGD",
    "details": "The stroke (target column) is heavily imbalanced, there are way more 0's than 1's. So keep in mind that disbalance"
}
print("Task Configuration:")
print(json.dumps(task_config, indent=2))

schema_string = ", ".join(df.columns.tolist())

prompt = f"""
Analyze the provided dataset context and task to recommend Differential Privacy (DP) settings for training a Logistic Regression model using DP-SGD. 
The goal is to predict the target variable '{task_config['target_variable']}'. 
Don't just suggest standard values, they should be accurate enough based on provided context to require minimal tuning. 
For 'max_grad_norm' suggest large enough value if target variable is largely skewed for enough accuracy.

Dataset Context:
- Name: {task_config['dataset_name']}
- Domain: {task_config['data_domain']}
- Task: {task_config['task_description']}
- Schema (Original Columns): {schema_string}
- Data Shape: {df.shape}
- Extra details: {task_config['details']}

Provide your recommendations ONLY in a structured JSON format. The JSON object must include the following keys:
- "dp_algorithm": String, the specific DP algorithm variant recommended (e.g., "DP-SGD with Gaussian Noise").
- "target_epsilon": Float, recommended privacy budget epsilon (e.g., 1.5). Consider domain sensitivity and model training needs.
- "target_delta": Float, recommended privacy budget delta (e.g., 1e-5 or suggest calculating based on dataset size N as 1/N).
- "max_grad_norm": Float, recommended gradient clipping norm.
- "preprocessing_suggestions": List of strings, specific preprocessing actions recommended BEFORE applying DP (e.g., "Remove: id", "Normalize: age, avg_glucose_level, bmi").
- "column_sensitivity_epsilon": A dictionary where keys are original column names and values are relative epsilons value for adaptive privacy budget for each column. Example: {{"id": 1.0, "age": 0.7, "gender": 0.6}}. This is best privacy budget for these columns.
- "reasoning": String, the reasoning behind output, concisely (include reasoning for column_sensitivity hints too)
JSON Output ONLY:
"""

print("\nSending request to LLM...")
llm_config = None
try:
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model=GEMINI_MODEL_NAME,   
        temperature=0.2, 
        max_tokens=8000,
        top_p=0.8,
        response_format={"type": "json_object"}, 
    )

    response_content = chat_completion.choices[0].message.content
    print("LLM Response Received.")

    llm_config = json.loads(response_content)
    print("\nParsed LLM DP Configuration:")
    print(json.dumps(llm_config, indent=2))

    required_keys = ["dp_algorithm", "target_epsilon", "target_delta", "max_grad_norm", "preprocessing_suggestions", "column_sensitivity_epsilon"]
    if not all(key in llm_config for key in required_keys):
        print("Warning: LLM response missing some required keys.")

except Exception as e:
    print(f"Error during LLM API call or parsing: {e}")
    llm_config = None

if llm_config is None:
    print("\nLLM configuration failed or invalid. Using default DP parameters.")
    llm_config = {
        "dp_algorithm": "DP-SGD with Gaussian Noise (Default)",
        "target_epsilon": DEFAULT_TARGET_EPSILON,
        "target_delta": DEFAULT_TARGET_DELTA, # Calculate based on actual train size later
        "max_grad_norm": DEFAULT_MAX_GRAD_NORM,
        "preprocessing_suggestions": ["Normalize numerical features", "Remove identifiers (if any)"],
        "column_sensitivity_epsilon": {"Info": "Using default parameters"}
    }
    print(json.dumps(llm_config, indent=2))



--- Requesting DP Configuration from LLM ---
Task Configuration:
{
  "dataset_name": "Kaggle Stroke Prediction",
  "data_domain": "Healthcare",
  "task_description": "Train a binary classification model (Logistic Regression using DP-SGD) to predict stroke occurrence.",
  "target_variable": "stroke",
  "model_type": "Logistic Regression",
  "dp_mechanism_family": "DP-SGD",
  "details": "The stroke (target column) is heavily imbalanced, there are way more 0's than 1's. So keep in mind that disbalance"
}

Sending request to LLM...
Error during LLM API call or parsing: 'Client' object has no attribute 'chat'

LLM configuration failed or invalid. Using default DP parameters.
{
  "dp_algorithm": "DP-SGD with Gaussian Noise (Default)",
  "target_epsilon": 1.0,
  "target_delta": 1e-05,
  "max_grad_norm": 1.0,
  "preprocessing_suggestions": [
    "Normalize numerical features",
    "Remove identifiers (if any)"
  ],
  "column_sensitivity_epsilon": {
    "Info": "Using default parameters"
  }
}

In [16]:
response = client.models.generate_content(
    model=GEMINI_MODEL_NAME, contents=prompt
)
print(response.text)

```json
{
  "dp_algorithm": "DP-SGD with Gaussian Noise",
  "target_epsilon": 3.0,
  "target_delta": 1.95e-4,
  "max_grad_norm": 3.0,
  "preprocessing_suggestions": [
    "Handle missing values (e.g., median imputation for 'bmi')",
    "One-hot encode categorical features: gender, hypertension, heart_disease, ever_married, work_type, Residence_type, smoking_status",
    "Standardize numerical features: age, avg_glucose_level, bmi"
  ],
  "column_sensitivity_epsilon": {
    "gender": 0.8,
    "age": 0.2,
    "hypertension": 0.3,
    "heart_disease": 0.3,
    "ever_married": 0.6,
    "work_type": 0.6,
    "Residence_type": 0.8,
    "avg_glucose_level": 0.1,
    "bmi": 0.1,
    "smoking_status": 0.4
  },
  "reasoning": "DP-SGD with Gaussian noise is standard for DP model training. Epsilon=3.0 is chosen as a starting point balancing the sensitivity of healthcare data with the need for model utility on a relatively small dataset (N=5110). Delta is set to 1/N (1/5110 ≈ 1.95e-4), a standard r

In [17]:
import json
start_index = response.text.find('{')
end_index = response.text.rfind('}')

llm_config = None

json_string_only = response.text[start_index : end_index + 1]

llm_config = json.loads(json_string_only)
print("Successfully parsed LLM config from Gemini response:")
print(json.dumps(llm_config, indent=2))


Successfully parsed LLM config from Gemini response:
{
  "dp_algorithm": "DP-SGD with Gaussian Noise",
  "target_epsilon": 3.0,
  "target_delta": 0.000195,
  "max_grad_norm": 3.0,
  "preprocessing_suggestions": [
    "Handle missing values (e.g., median imputation for 'bmi')",
    "One-hot encode categorical features: gender, hypertension, heart_disease, ever_married, work_type, Residence_type, smoking_status",
    "Standardize numerical features: age, avg_glucose_level, bmi"
  ],
  "column_sensitivity_epsilon": {
    "gender": 0.8,
    "age": 0.2,
    "hypertension": 0.3,
    "heart_disease": 0.3,
    "ever_married": 0.6,
    "work_type": 0.6,
    "Residence_type": 0.8,
    "avg_glucose_level": 0.1,
    "bmi": 0.1,
    "smoking_status": 0.4
  },
  "reasoning": "DP-SGD with Gaussian noise is standard for DP model training. Epsilon=3.0 is chosen as a starting point balancing the sensitivity of healthcare data with the need for model utility on a relatively small dataset (N=5110). Delta 

### Training with LLM configured DP-SGD

In [18]:
llm_config['target_epsilon'] = 10

In [19]:

if 'n_features' in locals() and llm_config is not None and 'train_loader' in locals():
    print("\n--- Training Differentially Private Logistic Regression (DP-SGD) ---")

    dp_model = LogisticRegression(n_features).to(device)
    dp_optimizer = optim.SGD(dp_model.parameters(), lr=LEARNING_RATE)

    privacy_engine = PrivacyEngine()
    dp_model, dp_optimizer, dp_data_loader = privacy_engine.make_private_with_epsilon(
        module=dp_model,
        optimizer=dp_optimizer,
        data_loader=train_loader,
        max_grad_norm=llm_config.get("max_grad_norm", DEFAULT_MAX_GRAD_NORM),
        target_epsilon=llm_config.get("target_epsilon", DEFAULT_TARGET_EPSILON),
        target_delta=llm_config.get("target_delta", DEFAULT_TARGET_DELTA),
        epochs=EPOCHS,
    )


    dp_model.train()
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight_tensor)
    for epoch in range(EPOCHS):
        epoch_loss_dp = 0.0
        for batch_X, batch_y in dp_data_loader: 
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)


            dp_optimizer.zero_grad() 

            outputs = dp_model(batch_X)
            loss = criterion(outputs, batch_y)

            loss.backward()

            dp_optimizer.step()

            epoch_loss_dp += loss.item()

        avg_epoch_loss_dp = epoch_loss_dp / len(dp_data_loader)

        current_epsilon = privacy_engine.get_epsilon(delta=llm_config.get("target_delta", DEFAULT_TARGET_DELTA))
        print(f"Epoch [{epoch+1}/{EPOCHS}], Loss: {avg_epoch_loss_dp:.4f}, Current ε: {current_epsilon:.4f}")
        print(f"Epoch [{epoch+1}/{EPOCHS}], Loss: {avg_epoch_loss_dp:.4f}")


    # Get final privacy budget spent
    final_epsilon = privacy_engine.get_epsilon(delta=llm_config.get("target_delta", DEFAULT_TARGET_DELTA))
    print("--- DP Training Complete ---")
    print(f"Final privacy budget spent: ε = {final_epsilon:.4f} for δ = {llm_config.get('target_delta', DEFAULT_TARGET_DELTA):.2e}")

else:
    print("Skipping DP training due to missing components (model, config, or data loader).")


--- Training Differentially Private Logistic Regression (DP-SGD) ---
Epoch [1/10], Loss: 1.3178, Current ε: 4.2780
Epoch [1/10], Loss: 1.3178
Epoch [2/10], Loss: 1.6448, Current ε: 5.2813
Epoch [2/10], Loss: 1.6448
Epoch [3/10], Loss: 1.4848, Current ε: 6.0792
Epoch [3/10], Loss: 1.4848
Epoch [4/10], Loss: 1.4421, Current ε: 6.7716
Epoch [4/10], Loss: 1.4421
Epoch [5/10], Loss: 1.9714, Current ε: 7.3971
Epoch [5/10], Loss: 1.9714
Epoch [6/10], Loss: 1.7374, Current ε: 7.9752
Epoch [6/10], Loss: 1.7374
Epoch [7/10], Loss: 1.8112, Current ε: 8.5177
Epoch [7/10], Loss: 1.8112
Epoch [8/10], Loss: 1.7101, Current ε: 9.0320
Epoch [8/10], Loss: 1.7101
Epoch [9/10], Loss: 2.0938, Current ε: 9.5233
Epoch [9/10], Loss: 2.0938
Epoch [10/10], Loss: 2.1845, Current ε: 9.9953
Epoch [10/10], Loss: 2.1845
--- DP Training Complete ---
Final privacy budget spent: ε = 9.9953 for δ = 1.95e-04


In [20]:

if 'dp_model' in locals() and 'test_loader' in locals():
    print("\n--- Evaluating DP Model ---")
    dp_model.eval()
    all_preds_dp = []
    all_targets_dp = []

    with torch.no_grad():
        for batch_X, batch_y in test_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            outputs = dp_model(batch_X)
            preds = torch.round(torch.sigmoid(outputs)) 
            all_preds_dp.extend(preds.cpu().numpy())
            all_targets_dp.extend(batch_y.cpu().numpy())

    accuracy_dp = accuracy_score(all_targets_dp, all_preds_dp)
    precision_dp = precision_score(all_targets_dp, all_preds_dp, zero_division=0)
    recall_dp = recall_score(all_targets_dp, all_preds_dp, zero_division=0)
    f1_dp = f1_score(all_targets_dp, all_preds_dp, zero_division=0)

    print(f"Accuracy (DP): {accuracy_dp:.4f}")
    print(f"Precision (DP): {precision_dp:.4f}")
    print(f"Recall (DP): {recall_dp:.4f}")
    print(f"F1-Score (DP): {f1_dp:.4f}")
    print("\nClassification Report (DP):")
    print(classification_report(all_targets_dp, all_preds_dp, zero_division=0))

    if 'accuracy' in locals():
         print("\n--- Comparison ---")
         print(f"Metric      | Non-DP | DP")
         print(f"------------|--------|--------")
         print(f"Accuracy    | {accuracy:.4f} | {accuracy_dp:.4f}")
         print(f"Precision   | {precision:.4f} | {precision_dp:.4f}")
         print(f"Recall      | {recall:.4f} | {recall_dp:.4f}")
         print(f"F1-Score    | {f1:.4f} | {f1_dp:.4f}")
         if 'final_epsilon' in locals():
             print(f"(DP Model used ε ≈ {final_epsilon:.2f} for δ ≈ {llm_config.get('target_delta', DEFAULT_TARGET_DELTA):.1e})")

else:
    print("Skipping DP evaluation.")


--- Evaluating DP Model ---
Accuracy (DP): 0.9511
Precision (DP): 0.0000
Recall (DP): 0.0000
F1-Score (DP): 0.0000

Classification Report (DP):
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.97       972
         1.0       0.00      0.00      0.00        50

    accuracy                           0.95      1022
   macro avg       0.48      0.50      0.49      1022
weighted avg       0.90      0.95      0.93      1022


--- Comparison ---
Metric      | Non-DP | DP
------------|--------|--------
Accuracy    | 0.7250 | 0.9511
Precision   | 0.1333 | 0.0000
Recall      | 0.8400 | 0.0000
F1-Score    | 0.2301 | 0.0000
(DP Model used ε ≈ 10.00 for δ ≈ 1.9e-04)
