In [1]:
from dotenv import load_dotenv

import torch, sys
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F

sys.path.append("..")

import pandas as pd

load_dotenv()

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.expand_frame_repr', False)

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import numpy as np

def prepare_data_for_runner(df, xp_features, xt_parts, cpr_setting='semi_synthetic', scenario='population', model_type='nn'):
    """
    Prepares and encodes data for the specified CPR setting, scenario, and model type.
    """
    working_df = df.copy()

    # ====== Apply CPR Feature Setting ======
    if cpr_setting == 'fixed':
        # Apply fixed CPR logic here if you have one
        pass  # 🔥 Add your fixed CPR logic if needed

    # ====== Define Features ======
    categorical_features = [col for col in xp_features if col not in ['religious_importance', 'dementia_worry']]
    for part in xt_parts.values():
        categorical_features.extend(part)
    categorical_features = list(set(categorical_features))

    df_encoded = working_df.copy()
    encoders = {}

    for col in categorical_features:
        le = LabelEncoder()
        df_encoded[col] = le.fit_transform(df_encoded[col])
        encoders[col] = le

    target_encoder = LabelEncoder()
    df_encoded['choice'] = target_encoder.fit_transform(df_encoded['choice'])

    baseline_features = [col for col in df_encoded.columns if col != 'choice']

    # 👉 IMPORTANT: Keep X as a DataFrame (not NumPy)
    X = df_encoded[baseline_features]
    y = df_encoded['choice']

    # ====== Apply Scenario ======
    if scenario == 'agnostic':
        X_copy = X.copy()
        for col in X.columns:
            if col in encoders:  # Categorical feature
                X_copy[col] = 0  # Valid embedding index
            else:  # Continuous feature
                X_copy[col] = 0.0  # Neutral value for continuous features
        X = X_copy
    elif scenario == 'population':
        if model_type == 'simple':
            xp_indices = [i for i, col in enumerate(X.columns) if col in xp_features]
            X.iloc[:, xp_indices] = 0
        # NN: no adjustment
    elif scenario == 'individual':
        if model_type == 'nn':
            for feature in xp_features:
                if feature in encoders:
                    # For categorical features: set to 0 (safe embedding index)
                    X[feature] = 0
                else:
                    # For continuous features: set to 1 (any constant is fine)
                    X[feature] = 1

        # Simple models: no adjustment

    return X, y, X.columns.tolist(), encoders, target_encoder



In [3]:
from sklearn.metrics import classification_report
import numpy as np

def train_model_with_val(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=30, device='cpu'):
    train_losses = []
    train_accuracies = []
    val_losses = []
    val_accuracies = []

    all_train_labels = []
    all_train_preds = []

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for Xp_batch, Xt_parts_batch, y_batch in train_loader:
            Xp_batch = {k: v.to(device).squeeze(1) for k, v in Xp_batch.items()}
            Xt_parts_batch = {part: {f: v.to(device).squeeze(1) for f, v in features.items()} for part, features in Xt_parts_batch.items()}
            y_batch = y_batch.to(device)

            optimizer.zero_grad()
            logits = model(Xp_batch, Xt_parts_batch)
            loss = criterion(logits, y_batch)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            probs = torch.softmax(logits, dim=1)
            preds = torch.argmax(probs, dim=1)

            all_train_labels.extend(y_batch.cpu().numpy())
            all_train_preds.extend(preds.cpu().numpy())

            correct += (preds == y_batch).sum().item()
            total += y_batch.size(0)

        scheduler.step()

        avg_train_loss = running_loss / len(train_loader)
        train_acc = correct / total

        train_losses.append(avg_train_loss)
        train_accuracies.append(train_acc)

        # Validation
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for Xp_batch, Xt_parts_batch, y_batch in val_loader:
                Xp_batch = {k: v.to(device).squeeze(1) for k, v in Xp_batch.items()}
                Xt_parts_batch = {part: {f: v.to(device).squeeze(1) for f, v in features.items()} for part, features in Xt_parts_batch.items()}
                y_batch = y_batch.to(device)

                logits = model(Xp_batch, Xt_parts_batch)
                loss = criterion(logits, y_batch)
                val_loss += loss.item()

                probs = torch.softmax(logits, dim=1)
                preds = torch.argmax(probs, dim=1)

                val_correct += (preds == y_batch).sum().item()
                val_total += y_batch.size(0)

        avg_val_loss = val_loss / len(val_loader)
        val_acc = val_correct / val_total

        val_losses.append(avg_val_loss)
        val_accuracies.append(val_acc)

        print(f"Epoch [{epoch+1}/{num_epochs}] Train Loss: {avg_train_loss:.4f} | Train Acc: {train_acc:.4f} | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.4f}")

    print("Training complete!")

    # Generate final train loss report
    train_loss_report = classification_report(all_train_labels, all_train_preds, zero_division=0, output_dict=True)

    return train_losses, train_accuracies, val_losses, val_accuracies, train_loss_report


In [4]:
from sklearn.metrics import classification_report
import numpy as np

def test_model_with_loss_report(model, test_loader, criterion, target_encoder, device='cpu'):
    model.eval()
    all_labels, all_preds, all_losses = [], [], []

    with torch.no_grad():
        for Xp_batch, Xt_parts_batch, y_batch in test_loader:
            Xp_batch = {k: v.to(device).squeeze(1) for k, v in Xp_batch.items()}
            Xt_parts_batch = {part: {f: v.to(device).squeeze(1) for f, v in features.items()} for part, features in Xt_parts_batch.items()}
            y_batch = y_batch.to(device)

            logits = model(Xp_batch, Xt_parts_batch)
            loss = criterion(logits, y_batch)
            probs = torch.softmax(logits, dim=1)
            preds = torch.argmax(probs, dim=1)

            all_labels.extend(y_batch.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

            per_sample_loss = -torch.log(torch.gather(probs, 1, y_batch.unsqueeze(1))).squeeze(1)
            all_losses.extend(per_sample_loss.cpu().numpy())

    avg_loss = np.mean(all_losses)
    accuracy = np.mean(np.array(all_labels) == np.array(all_preds))
    class_report = classification_report(all_labels, all_preds, target_names=target_encoder.classes_, output_dict=True)

    # Add loss per class
    class_losses = {label: [] for label in np.unique(all_labels)}
    for true_label, sample_loss in zip(all_labels, all_losses):
        class_losses[true_label].append(sample_loss)

    for cls_idx, losses in class_losses.items():
        class_report[target_encoder.classes_[cls_idx]]['loss'] = np.mean(losses) if losses else 0.0

    return avg_loss, accuracy, class_report


In [5]:
from sklearn.metrics import classification_report, log_loss, accuracy_score

def train_simple_model_with_loss_report(model, X_train, y_train, target_encoder):
    model.fit(X_train, y_train)

    train_probs = model.predict_proba(X_train)
    train_preds = model.predict(X_train)

    train_loss = log_loss(y_train, train_probs)
    train_acc = accuracy_score(y_train, train_preds)

    train_loss_report = classification_report(y_train, train_preds, zero_division=0, output_dict=True, target_names=target_encoder.classes_)

    print(f"Train Loss: {train_loss:.4f} | Train Accuracy: {train_acc:.4f}")
    print("\n===== Train Classification Report =====\n")
    print(train_loss_report)

    return train_loss, train_acc, train_loss_report



In [6]:
def test_simple_model_with_loss_report(model, X_test, y_test, target_encoder):
    test_probs = model.predict_proba(X_test)
    test_preds = model.predict(X_test)

    test_loss = log_loss(y_test, test_probs)
    test_acc = accuracy_score(y_test, test_preds)

    test_loss_report = classification_report(y_test, test_preds, zero_division=0, output_dict=True, target_names=target_encoder.classes_)

    print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.4f}")
    print("\n===== Test Classification Report =====\n")
    print(test_loss_report)

    return test_loss, test_acc, test_loss_report


In [7]:
import torch
from torch.utils.data import Dataset

class MultiClassTreatmentDataset(Dataset):
    def __init__(self, X, y, xp_features, xt_parts):
        self.X = X.reset_index(drop=True)
        self.y = y.reset_index(drop=True)
        self.xp_features = xp_features
        self.xt_parts = xt_parts

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        row = self.X.iloc[idx]

        # Extract Xp
        Xp = {feature: torch.tensor(row[feature], dtype=torch.float32).unsqueeze(0) for feature in self.xp_features}

        # Extract Xt parts
        Xt_parts = {}
        for part_name, features in self.xt_parts.items():
            Xt_parts[part_name] = {feature: torch.tensor(row[feature], dtype=torch.float32).unsqueeze(0) for feature in features}

        # Target
        y_target = torch.tensor(self.y.iloc[idx], dtype=torch.long)

        return Xp, Xt_parts, y_target


In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ModularMultiClassTreatmentModel(nn.Module):
    def __init__(self, xp_features, xt_parts, embedding_sizes, num_classes):
        super(ModularMultiClassTreatmentModel, self).__init__()

        self.xp_features = xp_features
        self.xt_parts = xt_parts
        self.num_classes = num_classes

        # ===== Embeddings =====
        self.embeddings = nn.ModuleDict()
        for feature, num_categories in embedding_sizes.items():
            self.embeddings[feature] = nn.Embedding(num_embeddings=num_categories, embedding_dim=4)

        # ===== Patient Encoder =====
        patient_input_dim = self.calculate_total_embedding_dim(xp_features)
        self.patient_encoder = nn.Sequential(
            nn.Linear(patient_input_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU()
        )

        # ===== Xt Part Encoders =====
        self.part_encoders = nn.ModuleDict()
        for part_name, features in xt_parts.items():
            part_input_dim = self.calculate_total_embedding_dim(features)
            self.part_encoders[part_name] = nn.Sequential(
                nn.Linear(part_input_dim, 64),
                nn.BatchNorm1d(64),
                nn.ReLU()
            )

        # ===== Attention Mechanism =====
        self.attention = nn.Sequential(
            nn.Linear(64, 32),
            nn.Tanh(),
            nn.Linear(32, 1)
        )

        # ===== Final Decision Head =====
        self.decision_head = nn.Sequential(
            nn.Linear(64 + 64, 128),  # patient vector + weighted Xt vector
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Linear(64, num_classes)  # Multi-class output
        )

    def calculate_total_embedding_dim(self, features):
        dim = 0
        for feature in features:
            dim += 4 if feature in self.embeddings else 1  # Categorical: embedding | Continuous: 1-dim
        return dim

    def forward(self, Xp, Xt_parts):
        # ===== Process Patient Features =====
        patient_embeds = []
        for feature in self.xp_features:
            if feature in self.embeddings:
                patient_embeds.append(self.embeddings[feature](Xp[feature].long()).squeeze(1))
            else:
                patient_embeds.append(Xp[feature].unsqueeze(1))  # 🔥 FIXED HERE

        patient_embeds = torch.cat(patient_embeds, dim=1)
        patient_vector = self.patient_encoder(patient_embeds)

        # ===== Process Xt Parts =====
        part_vectors = []
        attention_scores = []

        for part_name, features in self.xt_parts.items():
            part_embeds = []
            for feature in features:
                if feature in self.embeddings:
                    part_embeds.append(self.embeddings[feature](Xt_parts[part_name][feature].long()).squeeze(1))
                else:
                    part_embeds.append(Xt_parts[part_name][feature].unsqueeze(1))  # 🔥 FIXED HERE

            part_embeds = torch.cat(part_embeds, dim=1)
            part_vector = self.part_encoders[part_name](part_embeds)
            part_vectors.append(part_vector)

            attn_score = self.attention(part_vector)
            attention_scores.append(attn_score)

        # ===== Attention Weighted Sum =====
        attention_scores = torch.cat(attention_scores, dim=1)
        attn_weights = F.softmax(attention_scores, dim=1)

        weighted_Xt = torch.stack(part_vectors, dim=1)  # Shape: [batch_size, num_parts, 64]
        weighted_Xt = (attn_weights.unsqueeze(2) * weighted_Xt).sum(dim=1)

        # ===== Final Decision =====
        combined = torch.cat([patient_vector, weighted_Xt], dim=1)
        logits = self.decision_head(combined)

        return logits  # Raw logits for CrossEntropyLoss


In [9]:
from sklearn.model_selection import StratifiedKFold
from collections import defaultdict
import numpy as np

def run_full_experiment(df, xp_features, xt_parts, num_folds=1, num_epochs=1):
    scenarios = ['agnostic', 'population', 'individual']
    cpr_settings = ['semi_synthetic', 'fixed']
    model_types = ['nn', 'simple']

    report_collector = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: {
        "summary": {},  # To be computed later
        "folds": {}
    })))

    for cpr_setting in cpr_settings:
        for scenario in scenarios:
            for model_type in model_types:
                print(f"\n===== Running: {cpr_setting} | {scenario} | {model_type} =====")

                X, y, feature_names, encoders, target_encoder = prepare_data_for_runner(
                    df, cpr_setting=cpr_setting, scenario=scenario, model_type=model_type,
                    xp_features=xp_features, xt_parts=xt_parts
                )

                kf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)

                fold_counter = 1
                for train_index, test_index in kf.split(X, y):
                    print(f"\n--- Fold {fold_counter} ---")

                    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
                    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

                    if model_type == 'nn':
                        train_dataset = MultiClassTreatmentDataset(pd.DataFrame(X_train, columns=feature_names), pd.Series(y_train), xp_features, xt_parts)
                        test_dataset = MultiClassTreatmentDataset(pd.DataFrame(X_test, columns=feature_names), pd.Series(y_test), xp_features, xt_parts)

                        train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, drop_last=True)
                        test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, drop_last=False)

                        embedding_sizes = {}
                        for col in feature_names:
                            if col in encoders:
                                embedding_sizes[col] = len(encoders[col].classes_)

                        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
                        model = ModularMultiClassTreatmentModel(xp_features, xt_parts, embedding_sizes, num_classes=len(target_encoder.classes_))
                        model = model.to(device)

                        criterion = nn.CrossEntropyLoss()
                        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
                        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

                        train_losses, train_accuracies, val_losses, val_accuracies, train_loss_report = train_model_with_val(
                            model, train_loader, test_loader, criterion, optimizer, scheduler, num_epochs=num_epochs, device=device
                        )

                        test_loss, test_acc, test_loss_report = test_model_with_loss_report(
                            model, test_loader, criterion, target_encoder, device=device
                        )

                        report_collector[cpr_setting][scenario][model_type]["folds"][str(fold_counter)] = {
                            "accuracy": test_acc,
                            "precision": None,  # Optional: Fill if calculated
                            "recall": None,     # Optional: Fill if calculated
                            "f1_score": None,   # Optional: Fill if calculated
                            "train_loss_history": train_losses,
                            "train_acc_history": train_accuracies,
                            "val_loss_history": val_losses,
                            "val_acc_history": val_accuracies,
                            "classification_report": {
                                "accuracy_report": train_loss_report,  # Use this if it's per-class accuracy
                                "loss_report": test_loss_report        # Use this if it's per-class loss
                            }
                        }

                    else:
                        from sklearn.linear_model import LogisticRegression

                        model = LogisticRegression(max_iter=1000)
                        train_loss, train_acc, train_loss_report = train_simple_model_with_loss_report(model, X_train, y_train, target_encoder)
                        test_loss, test_acc, test_loss_report = test_simple_model_with_loss_report(model, X_test, y_test, target_encoder)

                        report_collector[cpr_setting][scenario][model_type]["folds"][str(fold_counter)] = {
                            "accuracy": test_acc,
                            "precision": None,  # Optional: Fill if calculated
                            "recall": None,     # Optional: Fill if calculated
                            "f1_score": None,   # Optional: Fill if calculated
                            "train_loss_history": [train_loss],
                            "train_acc_history": [train_acc],
                            "val_loss_history": [],
                            "val_acc_history": [],
                            "classification_report": {
                                "accuracy_report": train_loss_report,
                                "loss_report": test_loss_report
                            }
                        }

                    fold_counter += 1

    print("\n✅ All runs complete! You can now proceed to compute summaries and generate HTML reports.")

    return report_collector


In [10]:
torch.cuda.empty_cache()

In [11]:
df = pd.read_csv(f"data.csv")

xp_features = [
    'age',
    'location', 'eol_preference',
    'family_preference', 'biological_sex', 'gender_identity', 'political_leaning',
    'marital_status', 'religion', 'religious_importance', 'annual_income',
    'education', 'family_history_dementia', 'personal_history_dementia',
    'dementia_worry'
]

# Xt Modular Components
xt_parts = {
    'medical': ['crisis_type'],
    'patient_condition': ['crisis_chance','emotional_state', 'agitation_frequency', 'agitation_severity',
    'family_visit_frequency', 'family_inconvenience', 'interaction_ability',
    'functional_ability', 'behavior', 'affordability'],
    'treatment': ['crisis_wean', 'crisis_tube'],
    'prognosis': ['crisis_comfort', 'resuscitation_chance', 'leave_hospital', 'internal_damage', 'future_arrest']
}


columns_to_drop = list(set(df.columns) - set(xp_features) - set(sum(xt_parts.values(), [])) - {"choice"})

df = df.drop(columns=columns_to_drop)

# Run the full experiment
report = run_full_experiment(df, xp_features, xt_parts, num_folds=5, num_epochs=30)


===== Running: semi_synthetic | agnostic | nn =====

--- Fold 1 ---
Epoch [1/30] Train Loss: 0.5853 | Train Acc: 0.7420 | Val Loss: 0.6097 | Val Acc: 0.7673
Epoch [2/30] Train Loss: 0.5638 | Train Acc: 0.7649 | Val Loss: 4.1249 | Val Acc: 0.7673
Epoch [3/30] Train Loss: 0.5544 | Train Acc: 0.7652 | Val Loss: 203.0509 | Val Acc: 0.7673
Epoch [4/30] Train Loss: 0.5548 | Train Acc: 0.7660 | Val Loss: 96.1202 | Val Acc: 0.7673
Epoch [5/30] Train Loss: 0.5538 | Train Acc: 0.7652 | Val Loss: 104.6798 | Val Acc: 0.7673
Epoch [6/30] Train Loss: 0.5548 | Train Acc: 0.7652 | Val Loss: 129.1451 | Val Acc: 0.7673
Epoch [7/30] Train Loss: 0.5554 | Train Acc: 0.7645 | Val Loss: 116.4091 | Val Acc: 0.7673
Epoch [8/30] Train Loss: 0.5468 | Train Acc: 0.7660 | Val Loss: 7.7944 | Val Acc: 0.7673
Epoch [9/30] Train Loss: 0.5516 | Train Acc: 0.7656 | Val Loss: 3.8832 | Val Acc: 0.7673
Epoch [10/30] Train Loss: 0.5470 | Train Acc: 0.7660 | Val Loss: 64.3776 | Val Acc: 0.7673
Epoch [11/30] Train Loss: 0.54

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



--- Fold 2 ---
Epoch [1/30] Train Loss: 0.6215 | Train Acc: 0.6822 | Val Loss: 0.6089 | Val Acc: 0.7673
Epoch [2/30] Train Loss: 0.5574 | Train Acc: 0.7641 | Val Loss: 0.5832 | Val Acc: 0.7673
Epoch [3/30] Train Loss: 0.5545 | Train Acc: 0.7664 | Val Loss: 13.7002 | Val Acc: 0.7673
Epoch [4/30] Train Loss: 0.5560 | Train Acc: 0.7656 | Val Loss: 28.1591 | Val Acc: 0.7673
Epoch [5/30] Train Loss: 0.5549 | Train Acc: 0.7664 | Val Loss: 27.7063 | Val Acc: 0.7673
Epoch [6/30] Train Loss: 0.5536 | Train Acc: 0.7656 | Val Loss: 18.1242 | Val Acc: 0.7673
Epoch [7/30] Train Loss: 0.5481 | Train Acc: 0.7649 | Val Loss: 56.7586 | Val Acc: 0.7673
Epoch [8/30] Train Loss: 0.5505 | Train Acc: 0.7652 | Val Loss: 85.1215 | Val Acc: 0.2327
Epoch [9/30] Train Loss: 0.5490 | Train Acc: 0.7649 | Val Loss: 1.1608 | Val Acc: 0.7673
Epoch [10/30] Train Loss: 0.5524 | Train Acc: 0.7671 | Val Loss: 6.6608 | Val Acc: 0.7673
Epoch [11/30] Train Loss: 0.5472 | Train Acc: 0.7671 | Val Loss: 10.1888 | Val Acc: 0.7

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



--- Fold 3 ---
Epoch [1/30] Train Loss: 0.5695 | Train Acc: 0.7561 | Val Loss: 0.5421 | Val Acc: 0.7658
Epoch [2/30] Train Loss: 0.5543 | Train Acc: 0.7668 | Val Loss: 20.2833 | Val Acc: 0.2342
Epoch [3/30] Train Loss: 0.5478 | Train Acc: 0.7675 | Val Loss: 1.1939 | Val Acc: 0.7658
Epoch [4/30] Train Loss: 0.5534 | Train Acc: 0.7660 | Val Loss: 44.9340 | Val Acc: 0.7658
Epoch [5/30] Train Loss: 0.5523 | Train Acc: 0.7687 | Val Loss: 94.5606 | Val Acc: 0.7658
Epoch [6/30] Train Loss: 0.5497 | Train Acc: 0.7664 | Val Loss: 38.7383 | Val Acc: 0.7658
Epoch [7/30] Train Loss: 0.5518 | Train Acc: 0.7664 | Val Loss: 42.6356 | Val Acc: 0.7658
Epoch [8/30] Train Loss: 0.5468 | Train Acc: 0.7687 | Val Loss: 13.0636 | Val Acc: 0.7658
Epoch [9/30] Train Loss: 0.5509 | Train Acc: 0.7675 | Val Loss: 16.9170 | Val Acc: 0.7658
Epoch [10/30] Train Loss: 0.5509 | Train Acc: 0.7660 | Val Loss: 7.0573 | Val Acc: 0.7658
Epoch [11/30] Train Loss: 0.5476 | Train Acc: 0.7660 | Val Loss: 11.1643 | Val Acc: 0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



--- Fold 4 ---
Epoch [1/30] Train Loss: 0.6038 | Train Acc: 0.6936 | Val Loss: 0.5505 | Val Acc: 0.7658
Epoch [2/30] Train Loss: 0.5625 | Train Acc: 0.7652 | Val Loss: 7.0503 | Val Acc: 0.7658
Epoch [3/30] Train Loss: 0.5552 | Train Acc: 0.7668 | Val Loss: 57.8419 | Val Acc: 0.7658
Epoch [4/30] Train Loss: 0.5529 | Train Acc: 0.7660 | Val Loss: 43.8575 | Val Acc: 0.7658
Epoch [5/30] Train Loss: 0.5480 | Train Acc: 0.7668 | Val Loss: 0.5634 | Val Acc: 0.7658
Epoch [6/30] Train Loss: 0.5551 | Train Acc: 0.7660 | Val Loss: 172.3379 | Val Acc: 0.2342
Epoch [7/30] Train Loss: 0.5494 | Train Acc: 0.7668 | Val Loss: 8.5290 | Val Acc: 0.7658
Epoch [8/30] Train Loss: 0.5587 | Train Acc: 0.7637 | Val Loss: 1.1602 | Val Acc: 0.2342
Epoch [9/30] Train Loss: 0.5497 | Train Acc: 0.7671 | Val Loss: 8.0638 | Val Acc: 0.7658
Epoch [10/30] Train Loss: 0.5481 | Train Acc: 0.7649 | Val Loss: 15.3664 | Val Acc: 0.7658
Epoch [11/30] Train Loss: 0.5485 | Train Acc: 0.7649 | Val Loss: 8.1711 | Val Acc: 0.765

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



--- Fold 5 ---
Epoch [1/30] Train Loss: 0.5859 | Train Acc: 0.7325 | Val Loss: 0.5641 | Val Acc: 0.7658
Epoch [2/30] Train Loss: 0.5572 | Train Acc: 0.7664 | Val Loss: 23.7251 | Val Acc: 0.7658
Epoch [3/30] Train Loss: 0.5550 | Train Acc: 0.7671 | Val Loss: 8.8869 | Val Acc: 0.7658
Epoch [4/30] Train Loss: 0.5524 | Train Acc: 0.7668 | Val Loss: 66.1171 | Val Acc: 0.7658
Epoch [5/30] Train Loss: 0.5468 | Train Acc: 0.7671 | Val Loss: 21.4608 | Val Acc: 0.7658
Epoch [6/30] Train Loss: 0.5540 | Train Acc: 0.7652 | Val Loss: 5.8913 | Val Acc: 0.7658
Epoch [7/30] Train Loss: 0.5519 | Train Acc: 0.7652 | Val Loss: 11.6092 | Val Acc: 0.7658
Epoch [8/30] Train Loss: 0.5454 | Train Acc: 0.7675 | Val Loss: 0.5596 | Val Acc: 0.7658
Epoch [9/30] Train Loss: 0.5498 | Train Acc: 0.7664 | Val Loss: 16.8648 | Val Acc: 0.7658
Epoch [10/30] Train Loss: 0.5423 | Train Acc: 0.7683 | Val Loss: 47.7040 | Val Acc: 0.7658
Epoch [11/30] Train Loss: 0.5492 | Train Acc: 0.7656 | Val Loss: 25.8315 | Val Acc: 0.7

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



===== Running: semi_synthetic | agnostic | simple =====

--- Fold 1 ---
Train Loss: 0.5439 | Train Accuracy: 0.7661

===== Train Classification Report =====

{'no treatment': {'precision': 0.7661411411411412, 'recall': 1.0, 'f1-score': 0.8675876726886291, 'support': 2041.0}, 'treatment': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 623.0}, 'accuracy': 0.7661411411411412, 'macro avg': {'precision': 0.3830705705705706, 'recall': 0.5, 'f1-score': 0.43379383634431457, 'support': 2664.0}, 'weighted avg': {'precision': 0.58697224814905, 'recall': 0.7661411411411412, 'f1-score': 0.6646946095936532, 'support': 2664.0}}
Test Loss: 0.5426 | Test Accuracy: 0.7673

===== Test Classification Report =====

{'no treatment': {'precision': 0.7672672672672672, 'recall': 1.0, 'f1-score': 0.8683092608326253, 'support': 511.0}, 'treatment': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 155.0}, 'accuracy': 0.7672672672672672, 'macro avg': {'precision': 0.3836336336336336, '

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[feature] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[feature] = 1


Epoch [1/30] Train Loss: 0.3463 | Train Acc: 0.8521 | Val Loss: 0.3114 | Val Acc: 0.8604
Epoch [2/30] Train Loss: 0.2500 | Train Acc: 0.8929 | Val Loss: 0.3095 | Val Acc: 0.8649
Epoch [3/30] Train Loss: 0.1715 | Train Acc: 0.9405 | Val Loss: 1.3305 | Val Acc: 0.3003
Epoch [4/30] Train Loss: 0.1236 | Train Acc: 0.9604 | Val Loss: 0.1290 | Val Acc: 0.9489
Epoch [5/30] Train Loss: 0.0926 | Train Acc: 0.9668 | Val Loss: 0.0989 | Val Acc: 0.9700
Epoch [6/30] Train Loss: 0.0741 | Train Acc: 0.9760 | Val Loss: 0.0909 | Val Acc: 0.9760
Epoch [7/30] Train Loss: 0.0472 | Train Acc: 0.9859 | Val Loss: 0.0867 | Val Acc: 0.9745
Epoch [8/30] Train Loss: 0.0437 | Train Acc: 0.9851 | Val Loss: 0.0854 | Val Acc: 0.9835
Epoch [9/30] Train Loss: 0.0320 | Train Acc: 0.9931 | Val Loss: 0.0706 | Val Acc: 0.9850
Epoch [10/30] Train Loss: 0.0418 | Train Acc: 0.9855 | Val Loss: 0.1119 | Val Acc: 0.9655
Epoch [11/30] Train Loss: 0.0282 | Train Acc: 0.9916 | Val Loss: 0.0657 | Val Acc: 0.9850
Epoch [12/30] Train

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Train Loss: 0.2338 | Train Accuracy: 0.8825

===== Train Classification Report =====

{'no treatment': {'precision': 0.8970588235294118, 'recall': 0.9563939245467908, 'f1-score': 0.9257766184491345, 'support': 2041.0}, 'treatment': {'precision': 0.8176229508196722, 'recall': 0.6404494382022472, 'f1-score': 0.7182718271827183, 'support': 623.0}, 'accuracy': 0.8825075075075075, 'macro avg': {'precision': 0.857340887174542, 'recall': 0.7984216813745191, 'f1-score': 0.8220242228159265, 'support': 2664.0}, 'weighted avg': {'precision': 0.8784820409850546, 'recall': 0.8825075075075075, 'f1-score': 0.8772497847558248, 'support': 2664.0}}
Test Loss: 0.2456 | Test Accuracy: 0.8769

===== Test Classification Report =====

{'no treatment': {'precision': 0.8994413407821229, 'recall': 0.9452054794520548, 'f1-score': 0.9217557251908397, 'support': 511.0}, 'treatment': {'precision': 0.7829457364341085, 'recall': 0.6516129032258065, 'f1-score': 0.7112676056338029, 'support': 155.0}, 'accuracy': 0.8768

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Train Loss: 0.2345 | Train Accuracy: 0.8836

===== Train Classification Report =====

{'no treatment': {'precision': 0.8954337899543379, 'recall': 0.9603330068560235, 'f1-score': 0.9267485822306238, 'support': 2042.0}, 'treatment': {'precision': 0.8291139240506329, 'recall': 0.6318327974276527, 'f1-score': 0.7171532846715328, 'support': 622.0}, 'accuracy': 0.8836336336336337, 'macro avg': {'precision': 0.8622738570024854, 'recall': 0.7960829021418381, 'f1-score': 0.8219509334510784, 'support': 2664.0}, 'weighted avg': {'precision': 0.8799491966389833, 'recall': 0.8836336336336337, 'f1-score': 0.87781154203477, 'support': 2664.0}}
Test Loss: 0.2429 | Test Accuracy: 0.8919

===== Test Classification Report =====

{'no treatment': {'precision': 0.9025735294117647, 'recall': 0.9627450980392157, 'f1-score': 0.9316888045540797, 'support': 510.0}, 'treatment': {'precision': 0.8442622950819673, 'recall': 0.6602564102564102, 'f1-score': 0.7410071942446043, 'support': 156.0}, 'accuracy': 0.89189

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Train Loss: 0.2356 | Train Accuracy: 0.8859

===== Train Classification Report =====

{'no treatment': {'precision': 0.8968036529680365, 'recall': 0.9618021547502449, 'f1-score': 0.9281663516068053, 'support': 2042.0}, 'treatment': {'precision': 0.8354430379746836, 'recall': 0.6366559485530546, 'f1-score': 0.7226277372262774, 'support': 622.0}, 'accuracy': 0.8858858858858859, 'macro avg': {'precision': 0.86612334547136, 'recall': 0.7992290516516498, 'f1-score': 0.8253970444165413, 'support': 2664.0}, 'weighted avg': {'precision': 0.8824769628306998, 'recall': 0.8858858858858859, 'f1-score': 0.8801764799308712, 'support': 2664.0}}
Test Loss: 0.2397 | Test Accuracy: 0.8889

===== Test Classification Report =====

{'no treatment': {'precision': 0.9052044609665427, 'recall': 0.9549019607843138, 'f1-score': 0.9293893129770993, 'support': 510.0}, 'treatment': {'precision': 0.8203125, 'recall': 0.6730769230769231, 'f1-score': 0.7394366197183099, 'support': 156.0}, 'accuracy': 0.88888888888888

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



--- Fold 2 ---
Epoch [1/30] Train Loss: 0.6011 | Train Acc: 0.7035 | Val Loss: 0.5448 | Val Acc: 0.7673
Epoch [2/30] Train Loss: 0.5555 | Train Acc: 0.7664 | Val Loss: 6.2296 | Val Acc: 0.7673
Epoch [3/30] Train Loss: 0.5512 | Train Acc: 0.7664 | Val Loss: 9.5357 | Val Acc: 0.7673
Epoch [4/30] Train Loss: 0.5575 | Train Acc: 0.7652 | Val Loss: 87.1274 | Val Acc: 0.7673
Epoch [5/30] Train Loss: 0.5523 | Train Acc: 0.7664 | Val Loss: 12.2738 | Val Acc: 0.7673
Epoch [6/30] Train Loss: 0.5494 | Train Acc: 0.7668 | Val Loss: 21.0914 | Val Acc: 0.7673
Epoch [7/30] Train Loss: 0.5505 | Train Acc: 0.7649 | Val Loss: 61.1019 | Val Acc: 0.7673
Epoch [8/30] Train Loss: 0.5497 | Train Acc: 0.7660 | Val Loss: 43.1619 | Val Acc: 0.2327
Epoch [9/30] Train Loss: 0.5506 | Train Acc: 0.7652 | Val Loss: 10.3838 | Val Acc: 0.7673
Epoch [10/30] Train Loss: 0.5457 | Train Acc: 0.7679 | Val Loss: 0.5350 | Val Acc: 0.7673
Epoch [11/30] Train Loss: 0.5494 | Train Acc: 0.7645 | Val Loss: 3.0636 | Val Acc: 0.76

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



--- Fold 3 ---
Epoch [1/30] Train Loss: 0.5848 | Train Acc: 0.7359 | Val Loss: 0.5621 | Val Acc: 0.7658
Epoch [2/30] Train Loss: 0.5625 | Train Acc: 0.7626 | Val Loss: 19.9982 | Val Acc: 0.2342
Epoch [3/30] Train Loss: 0.5541 | Train Acc: 0.7664 | Val Loss: 16.5171 | Val Acc: 0.7658
Epoch [4/30] Train Loss: 0.5522 | Train Acc: 0.7668 | Val Loss: 51.5087 | Val Acc: 0.7658
Epoch [5/30] Train Loss: 0.5495 | Train Acc: 0.7656 | Val Loss: 15.8928 | Val Acc: 0.7658
Epoch [6/30] Train Loss: 0.5449 | Train Acc: 0.7687 | Val Loss: 28.0809 | Val Acc: 0.7658
Epoch [7/30] Train Loss: 0.5491 | Train Acc: 0.7660 | Val Loss: 5.8366 | Val Acc: 0.7658
Epoch [8/30] Train Loss: 0.5520 | Train Acc: 0.7656 | Val Loss: 0.9242 | Val Acc: 0.7658
Epoch [9/30] Train Loss: 0.5490 | Train Acc: 0.7675 | Val Loss: 184.0265 | Val Acc: 0.2342
Epoch [10/30] Train Loss: 0.5448 | Train Acc: 0.7683 | Val Loss: 13.8648 | Val Acc: 0.2342
Epoch [11/30] Train Loss: 0.5474 | Train Acc: 0.7660 | Val Loss: 4.1167 | Val Acc: 0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



--- Fold 4 ---
Epoch [1/30] Train Loss: 0.6344 | Train Acc: 0.6593 | Val Loss: 0.5518 | Val Acc: 0.7658
Epoch [2/30] Train Loss: 0.5564 | Train Acc: 0.7637 | Val Loss: 4.3211 | Val Acc: 0.7658
Epoch [3/30] Train Loss: 0.5541 | Train Acc: 0.7664 | Val Loss: 3.4578 | Val Acc: 0.7658
Epoch [4/30] Train Loss: 0.5536 | Train Acc: 0.7664 | Val Loss: 14.7049 | Val Acc: 0.7658
Epoch [5/30] Train Loss: 0.5527 | Train Acc: 0.7664 | Val Loss: 97.6532 | Val Acc: 0.7658
Epoch [6/30] Train Loss: 0.5460 | Train Acc: 0.7679 | Val Loss: 22.3774 | Val Acc: 0.7658
Epoch [7/30] Train Loss: 0.5493 | Train Acc: 0.7671 | Val Loss: 3.2532 | Val Acc: 0.7658
Epoch [8/30] Train Loss: 0.5507 | Train Acc: 0.7668 | Val Loss: 3.1210 | Val Acc: 0.7658
Epoch [9/30] Train Loss: 0.5474 | Train Acc: 0.7668 | Val Loss: 0.5581 | Val Acc: 0.7658
Epoch [10/30] Train Loss: 0.5462 | Train Acc: 0.7671 | Val Loss: 28.6144 | Val Acc: 0.7658
Epoch [11/30] Train Loss: 0.5495 | Train Acc: 0.7660 | Val Loss: 6.3044 | Val Acc: 0.2342

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



--- Fold 5 ---
Epoch [1/30] Train Loss: 0.5927 | Train Acc: 0.7233 | Val Loss: 0.5613 | Val Acc: 0.7658
Epoch [2/30] Train Loss: 0.5498 | Train Acc: 0.7687 | Val Loss: 13.2915 | Val Acc: 0.7658
Epoch [3/30] Train Loss: 0.5507 | Train Acc: 0.7664 | Val Loss: 127.7951 | Val Acc: 0.7658
Epoch [4/30] Train Loss: 0.5554 | Train Acc: 0.7652 | Val Loss: 214.7433 | Val Acc: 0.7658
Epoch [5/30] Train Loss: 0.5559 | Train Acc: 0.7652 | Val Loss: 263.3932 | Val Acc: 0.7658
Epoch [6/30] Train Loss: 0.5524 | Train Acc: 0.7652 | Val Loss: 42.3846 | Val Acc: 0.7658
Epoch [7/30] Train Loss: 0.5485 | Train Acc: 0.7675 | Val Loss: 38.8398 | Val Acc: 0.7658
Epoch [8/30] Train Loss: 0.5504 | Train Acc: 0.7660 | Val Loss: 57.4607 | Val Acc: 0.7658
Epoch [9/30] Train Loss: 0.5500 | Train Acc: 0.7675 | Val Loss: 10.4664 | Val Acc: 0.7658
Epoch [10/30] Train Loss: 0.5482 | Train Acc: 0.7652 | Val Loss: 16.0860 | Val Acc: 0.7658
Epoch [11/30] Train Loss: 0.5483 | Train Acc: 0.7671 | Val Loss: 2.6299 | Val Acc

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



===== Running: fixed | agnostic | simple =====

--- Fold 1 ---
Train Loss: 0.5439 | Train Accuracy: 0.7661

===== Train Classification Report =====

{'no treatment': {'precision': 0.7661411411411412, 'recall': 1.0, 'f1-score': 0.8675876726886291, 'support': 2041.0}, 'treatment': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 623.0}, 'accuracy': 0.7661411411411412, 'macro avg': {'precision': 0.3830705705705706, 'recall': 0.5, 'f1-score': 0.43379383634431457, 'support': 2664.0}, 'weighted avg': {'precision': 0.58697224814905, 'recall': 0.7661411411411412, 'f1-score': 0.6646946095936532, 'support': 2664.0}}
Test Loss: 0.5426 | Test Accuracy: 0.7673

===== Test Classification Report =====

{'no treatment': {'precision': 0.7672672672672672, 'recall': 1.0, 'f1-score': 0.8683092608326253, 'support': 511.0}, 'treatment': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 155.0}, 'accuracy': 0.7672672672672672, 'macro avg': {'precision': 0.3836336336336336, 'recall': 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[feature] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[feature] = 1


Epoch [1/30] Train Loss: 0.3752 | Train Acc: 0.8342 | Val Loss: 0.3443 | Val Acc: 0.8559
Epoch [2/30] Train Loss: 0.2666 | Train Acc: 0.8841 | Val Loss: 9.5691 | Val Acc: 0.7673
Epoch [3/30] Train Loss: 0.1907 | Train Acc: 0.9245 | Val Loss: 33.0594 | Val Acc: 0.7673
Epoch [4/30] Train Loss: 0.1238 | Train Acc: 0.9604 | Val Loss: 0.3838 | Val Acc: 0.8844
Epoch [5/30] Train Loss: 0.0797 | Train Acc: 0.9790 | Val Loss: 0.1292 | Val Acc: 0.9685
Epoch [6/30] Train Loss: 0.0657 | Train Acc: 0.9813 | Val Loss: 0.1312 | Val Acc: 0.9685
Epoch [7/30] Train Loss: 0.0486 | Train Acc: 0.9874 | Val Loss: 0.0934 | Val Acc: 0.9850
Epoch [8/30] Train Loss: 0.0377 | Train Acc: 0.9901 | Val Loss: 0.0749 | Val Acc: 0.9850
Epoch [9/30] Train Loss: 0.0334 | Train Acc: 0.9886 | Val Loss: 0.0659 | Val Acc: 0.9850
Epoch [10/30] Train Loss: 0.0281 | Train Acc: 0.9924 | Val Loss: 0.1073 | Val Acc: 0.9805
Epoch [11/30] Train Loss: 0.0211 | Train Acc: 0.9939 | Val Loss: 0.0717 | Val Acc: 0.9835
Epoch [12/30] Trai

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Train Loss: 0.2338 | Train Accuracy: 0.8825

===== Train Classification Report =====

{'no treatment': {'precision': 0.8970588235294118, 'recall': 0.9563939245467908, 'f1-score': 0.9257766184491345, 'support': 2041.0}, 'treatment': {'precision': 0.8176229508196722, 'recall': 0.6404494382022472, 'f1-score': 0.7182718271827183, 'support': 623.0}, 'accuracy': 0.8825075075075075, 'macro avg': {'precision': 0.857340887174542, 'recall': 0.7984216813745191, 'f1-score': 0.8220242228159265, 'support': 2664.0}, 'weighted avg': {'precision': 0.8784820409850546, 'recall': 0.8825075075075075, 'f1-score': 0.8772497847558248, 'support': 2664.0}}
Test Loss: 0.2456 | Test Accuracy: 0.8769

===== Test Classification Report =====

{'no treatment': {'precision': 0.8994413407821229, 'recall': 0.9452054794520548, 'f1-score': 0.9217557251908397, 'support': 511.0}, 'treatment': {'precision': 0.7829457364341085, 'recall': 0.6516129032258065, 'f1-score': 0.7112676056338029, 'support': 155.0}, 'accuracy': 0.8768

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Train Loss: 0.2345 | Train Accuracy: 0.8836

===== Train Classification Report =====

{'no treatment': {'precision': 0.8954337899543379, 'recall': 0.9603330068560235, 'f1-score': 0.9267485822306238, 'support': 2042.0}, 'treatment': {'precision': 0.8291139240506329, 'recall': 0.6318327974276527, 'f1-score': 0.7171532846715328, 'support': 622.0}, 'accuracy': 0.8836336336336337, 'macro avg': {'precision': 0.8622738570024854, 'recall': 0.7960829021418381, 'f1-score': 0.8219509334510784, 'support': 2664.0}, 'weighted avg': {'precision': 0.8799491966389833, 'recall': 0.8836336336336337, 'f1-score': 0.87781154203477, 'support': 2664.0}}
Test Loss: 0.2429 | Test Accuracy: 0.8919

===== Test Classification Report =====

{'no treatment': {'precision': 0.9025735294117647, 'recall': 0.9627450980392157, 'f1-score': 0.9316888045540797, 'support': 510.0}, 'treatment': {'precision': 0.8442622950819673, 'recall': 0.6602564102564102, 'f1-score': 0.7410071942446043, 'support': 156.0}, 'accuracy': 0.89189

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [12]:
torch.cuda.empty_cache()

In [13]:
import json
import os

def save_report_to_json(report_collector, filename="final_report2.json", folder="reports"):
    os.makedirs(folder, exist_ok=True)
    file_path = os.path.join(folder, filename)

    # Convert any numpy data types to Python types to make them JSON serializable
    def convert(o):
        if isinstance(o, (np.integer, np.int64)): return int(o)
        if isinstance(o, (np.floating, np.float64)): return float(o)
        if isinstance(o, (np.ndarray,)): return o.tolist()
        return o

    with open(file_path, "w") as f:
        json.dump(report_collector, f, indent=4, default=convert)

    print(f"✅ Saved final report to {file_path}")


In [14]:
save_report_to_json(report)


✅ Saved final report to reports\final_report2.json


In [15]:
import numpy as np
import json

def prepare_final_json(report_collector):
    final_json = {}

    for cpr_setting, cpr_data in report_collector.items():
        final_json[cpr_setting] = {}

        for scenario, scenario_data in cpr_data.items():
            final_json[cpr_setting][scenario] = {}

            for model_type, model_data in scenario_data.items():
                folds_data = model_data["folds"]

                # Collect all histories across folds
                train_losses_all = []
                train_accs_all = []
                val_losses_all = []
                val_accs_all = []
                test_accuracies = []

                for fold_num, fold_metrics in folds_data.items():
                    train_losses_all.append(fold_metrics["train_loss_history"])
                    train_accs_all.append(fold_metrics["train_acc_history"])
                    val_losses_all.append(fold_metrics["val_loss_history"])
                    val_accs_all.append(fold_metrics["val_acc_history"])
                    test_accuracies.append(fold_metrics["accuracy"])

                # Make sure all histories are same length
                min_train_len = min(len(seq) for seq in train_losses_all)
                min_val_len = min(len(seq) for seq in val_losses_all) if val_losses_all and all(val_losses_all) else 0

                # Trim histories to the shortest run if necessary
                train_losses_all = [seq[:min_train_len] for seq in train_losses_all]
                train_accs_all = [seq[:min_train_len] for seq in train_accs_all]
                val_losses_all = [seq[:min_val_len] for seq in val_losses_all] if min_val_len > 0 else []
                val_accs_all = [seq[:min_val_len] for seq in val_accs_all] if min_val_len > 0 else []

                # Calculate summaries
                summary = {}

                if test_accuracies:
                    summary["avg_accuracy"] = float(np.mean(test_accuracies))
                    summary["std_accuracy"] = float(np.std(test_accuracies))

                else:
                    summary["avg_accuracy"] = None
                    summary["std_accuracy"] = None

                # Training Loss
                if train_losses_all:
                    summary["train_loss_history"] = [float(x) for x in np.mean(train_losses_all, axis=0)]
                    summary["train_loss_std"] = [float(x) for x in np.std(train_losses_all, axis=0)]

                else:
                    summary["train_loss_history"] = []
                    summary["train_loss_std"] = []

                # Training Accuracy
                if train_accs_all:
                    summary["train_acc_history"] = [float(x) for x in np.mean(train_accs_all, axis=0)]
                    summary["train_acc_std"] = [float(x) for x in np.std(train_accs_all, axis=0)]
                else:
                    summary["train_acc_history"] = []
                    summary["train_acc_std"] = []

                # Validation Loss
                if val_losses_all:
                    summary["val_loss_history"] = [float(x) for x in np.mean(val_losses_all, axis=0)]
                    summary["val_loss_std"] = [float(x) for x in np.std(val_losses_all, axis=0)]
                else:
                    summary["val_loss_history"] = []
                    summary["val_loss_std"] = []

                # Validation Accuracy
                if val_accs_all:
                    summary["val_acc_history"] = [float(x) for x in np.mean(val_accs_all, axis=0)]
                    summary["val_acc_std"] = [float(x) for x in np.std(val_accs_all, axis=0)]
                else:
                    summary["val_acc_history"] = []
                    summary["val_acc_std"] = []

                # Store in final JSON structure
                final_json[cpr_setting][scenario][model_type] = {
                    "summary": summary,
                    "folds": folds_data  # Direct copy, already in correct format
                }

    return final_json

def save_json(data, filename="final_report2.json"):
    with open(filename, "w") as f:
        json.dump(data, f, indent=4)

    print(f"\n✅ JSON successfully saved to {filename}")


In [16]:
import numpy as np

def clean_json(obj):
    """
    Recursively converts NumPy types to native Python types for JSON serialization.
    """
    if isinstance(obj, dict):
        return {k: clean_json(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [clean_json(item) for item in obj]
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, (np.float32, np.float64)):
        return float(obj)
    elif isinstance(obj, (np.int32, np.int64)):
        return int(obj)
    else:
        return obj


In [17]:
final_json = prepare_final_json(report)

# After building your final_json
final_json_clean = clean_json(final_json)

# Save to file
save_json(final_json_clean)



✅ JSON successfully saved to final_report2.json


In [18]:
final_json_clean

{'semi_synthetic': {'agnostic': {'nn': {'summary': {'avg_accuracy': 0.7663663663663662,
     'std_accuracy': 0.0007355825053403044,
     'train_loss_history': [0.5931899860137847,
      0.5590149851833901,
      0.5533666837506178,
      0.5538954002101246,
      0.5511747245381519,
      0.5534228348150487,
      0.5513192086684995,
      0.5496226483728828,
      0.5501733076281663,
      0.5481136281315873,
      0.5482521234489068,
      0.5472193797913993,
      0.547727445131395,
      0.5477795506395944,
      0.5466178885320337,
      0.5456303715705871,
      0.5471868721450248,
      0.5484233846024769,
      0.5463837052263865,
      0.5460514376803143,
      0.5453607912470655,
      0.545860188617939,
      0.545261319672189,
      0.5446322548680189,
      0.5461316347122193,
      0.5465474307537079,
      0.5467766894073021,
      0.5461723881523783,
      0.5449032857650664,
      0.5455020653038491],
     'train_loss_std': [0.017847068935598367,
      0.00355217750899