<a href="https://colab.research.google.com/github/shrutimalik123/python-collab-3/blob/main/Predictive_Model_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Day 3: Predictive Model Training (Logistic Regression)
# Objective: Demonstrate proficiency in machine learning frameworks (scikit-learn)
# by training a simple, interpretable model to predict a binary outcome (Risk).

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import numpy as np

print("--- Starting Predictive Model Training: Patient Risk Classifier ---")

# --- 1. Simulate Cleaned and Prepared Data ---

# We use the cleaned features (Age, MAP) and need a target variable (High_Risk)
data = {
    'Patient_ID': [1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010],
    'Age': [55.0, 32.0, 65.0, 78.0, 41.0, 60.0, 58.0, 78.0, 45.0, 62.0],
    # MAP is the engineered feature from Day 1/2
    'MAP': [108.3, 93.3, 100.0, 116.6, 86.6, 103.3, 111.3, 98.0, 95.0, 105.0],
    'Medication_Count': [4, 1, 2, 6, 1, 3, 5, 2, 3, 4],
    # TARGET VARIABLE (0 = Low Risk, 1 = High Risk for adverse event/readmission)
    'High_Risk': [1, 0, 0, 1, 0, 1, 1, 0, 0, 1]
}

df = pd.DataFrame(data)
print("\n[STEP 1] Data Snapshot (Features and Target):\n", df.head())

# --- 2. Prepare Data for Modeling ---

# Define Features (X) and Target (y)
features = ['Age', 'MAP', 'Medication_Count']
X = df[features]
y = df['High_Risk']

# Standardize features (important for distance-based models like Logistic Regression)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.3, random_state=42, stratify=y
)
print(f"\n[STEP 2] Training Set Size: {X_train.shape[0]} samples. Test Set Size: {X_test.shape[0]} samples.")

# --- 3. Model Training ---

# Initialize and train the Logistic Regression model
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)
print("\n[STEP 3] Logistic Regression Model Training Complete.")

# --- 4. Model Evaluation ---

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate Accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print Detailed Evaluation Report
print("\n[STEP 4] Model Performance Evaluation:")
print(f"Accuracy Score: {accuracy:.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# --- 5. Interpreting Coefficients (Crucial for Healthcare ML) ---

# Logistic Regression coefficients show the relationship between features and the log-odds of the outcome.
print("\n[STEP 5] Feature Importance (Model Interpretation):")
coef_df = pd.DataFrame({
    'Feature': features,
    'Coefficient': model.coef_[0]
}).sort_values(by='Coefficient', ascending=False)

print(coef_df.to_markdown(index=False))

print("\n--- Predictive Modeling Exercise Complete ---")
# High positive coefficients (MAP) suggest a strong positive correlation with the High_Risk outcome.

--- Starting Predictive Model Training: Patient Risk Classifier ---

[STEP 1] Data Snapshot (Features and Target):
    Patient_ID   Age    MAP  Medication_Count  High_Risk
0        1001  55.0  108.3                 4          1
1        1002  32.0   93.3                 1          0
2        1003  65.0  100.0                 2          0
3        1004  78.0  116.6                 6          1
4        1005  41.0   86.6                 1          0

[STEP 2] Training Set Size: 7 samples. Test Set Size: 3 samples.

[STEP 3] Logistic Regression Model Training Complete.

[STEP 4] Model Performance Evaluation:
Accuracy Score: 1.0000

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         1

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3


[STEP 5] Fe