<a href="https://colab.research.google.com/github/nmansour67/skills-introduction-to-github/blob/main/First_Causal_Inference_Model_step3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# ==========================================
# STEP 4: THE MATCHING (Revealing the Truth)
# ==========================================

import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import NearestNeighbors

# --- RE-GENERATING DATA (To ensure 'df' exists) ---
np.random.seed(42)
n_patients = 1000
age = np.random.normal(65, 10, n_patients)
severity_score = np.random.normal(5, 2, n_patients)
prob_treatment = 1 / (1 + np.exp(-(0.1 * age + 0.5 * severity_score - 10)))
treatment = np.random.binomial(1, prob_treatment)
mortality_risk = 1 / (1 + np.exp(-(0.05 * age + 0.3 * severity_score - 0.8 * treatment - 5)))
outcome = np.random.binomial(1, mortality_risk)

df = pd.DataFrame({
    'Age': age,
    'Severity': severity_score,
    'Treated_with_AI': treatment,
    'Mortality': outcome
})

# Re-calculate Propensity Score needed for matching
ps_model = LogisticRegression()
ps_model.fit(df[['Age', 'Severity']], df['Treated_with_AI'])
df['Propensity_Score'] = ps_model.predict_proba(df[['Age', 'Severity']])[:, 1]
# --------------------------------------------------

# Separate the groups
treated_group = df[df['Treated_with_AI'] == 1]
control_group = df[df['Treated_with_AI'] == 0]

# 1. Find the "Twin" for every treated patient
# We use NearestNeighbors to find the untreated patient with the closest Propensity Score
nbrs = NearestNeighbors(n_neighbors=1).fit(control_group[['Propensity_Score']])
distances, indices = nbrs.kneighbors(treated_group[['Propensity_Score']])

# 2. Build the "Matched" Control Group
# We select only the untreated patients who were identified as matches
matched_control = control_group.iloc[indices.flatten()]

# ==========================================
# THE VERDICT
# ==========================================

print("-" * 40)
print("THE FINAL VERDICT (Apples-to-Apples)")
print("-" * 40)

# Calculate Mortality in the Matched Groups
mortality_treated_adj = treated_group['Mortality'].mean()
mortality_control_adj = matched_control['Mortality'].mean()

# Recalculate naive mortality for display
mortality_treated = df[df['Treated_with_AI']==1]['Mortality'].mean()
mortality_untreated = df[df['Treated_with_AI']==0]['Mortality'].mean()

print(f"Unadjusted Mortality (The Lie):")
print(f"   Treated: {mortality_treated*100:.1f}% vs Untreated: {mortality_untreated*100:.1f}%")
print(f"   (Looks like the AI hurts patients)\n")

print(f"Adjusted Mortality (The Truth):")
print(f"   Treated: {mortality_treated_adj*100:.1f}%")
print(f"   Matched Control: {mortality_control_adj*100:.1f}%")

# Calculate the Benefit
benefit = mortality_control_adj - mortality_treated_adj
print(f"\nCONCLUSION:")
print(f"When we compare 'Twins', the AI Protocol SAVES lives.")
print(f"Absolute Risk Reduction: {benefit*100:.1f}%")
print("-" * 40)

----------------------------------------
THE FINAL VERDICT (Apples-to-Apples)
----------------------------------------
Unadjusted Mortality (The Lie):
   Treated: 42.2% vs Untreated: 38.6%
   (Looks like the AI hurts patients)

Adjusted Mortality (The Truth):
   Treated: 42.2%
   Matched Control: 62.0%

CONCLUSION:
When we compare 'Twins', the AI Protocol SAVES lives.
Absolute Risk Reduction: 19.8%
----------------------------------------
