In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import brier_score_loss, roc_auc_score
from sklearn.preprocessing import StandardScaler

np.random.seed(42)

n = 800
age = np.random.normal(52, 12, n)
biomarker_a = np.random.normal(0, 1, n)
biomarker_b = np.random.normal(0, 1, n)
biomarker_c = np.random.normal(0, 1, n)
symptom_score = np.random.uniform(0, 10, n)

latent = (
    0.18*age +
    0.9*biomarker_a -
    0.6*biomarker_b +
    1.1*biomarker_c -
    0.32*symptom_score +
    np.random.normal(0, 8, n)
)

prob = 1 / (1 + np.exp(-0.015 * latent))
outcome = (prob > np.random.rand(n)).astype(int)

df = pd.DataFrame({
    "age": age,
    "biomarker_a": biomarker_a,
    "biomarker_b": biomarker_b,
    "biomarker_c": biomarker_c,
    "symptom_score": symptom_score,
    "outcome": outcome
})

X = df.drop(columns=["outcome"])
y = df["outcome"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.25, shuffle=True
)

model = LogisticRegression(max_iter=500)
model.fit(X_train, y_train)

pred_prob = model.predict_proba(X_test)[:, 1]

auc = roc_auc_score(y_test, pred_prob)
brier = brier_score_loss(y_test, pred_prob)

results = {
    "AUC": auc,
    "BrierScore": brier,
    "Coefficients": model.coef_[0],
    "Intercept": model.intercept_[0]
}

results_df = pd.DataFrame({
    "metric": ["AUC", "BrierScore"],
    "value": [auc, brier]
})

results_df.to_csv("treatment_prediction_blinded_results.csv", index=False)
df.to_csv("treatment_prediction_blinded_dataset.csv", index=False)
