# Health Risk Prediction (Mini Version)

This mini project trains a simple regression model to **predict BMI** from lifestyle features using **PyTorch** if available, or a **NumPy fallback** if PyTorch is not installed.

**Steps covered:**
1. Dataset Generation (10–15 rows)
2. Normalization (custom StandardScaler)
3. Training (PyTorch Linear Regression or NumPy GD)
4. Evaluation (MSE and R²)
5. Prediction for a new sample (7h sleep, 7000 steps, 2200 calories)

> You can run all cells in order. If `torch` isn’t available in your environment, the notebook will automatically use the NumPy implementation.


In [None]:

import numpy as np

# ---------- Helper: simple StandardScaler (no sklearn needed) ----------
class StandardScalerLite:
    def fit(self, X):
        self.mean_ = X.mean(axis=0)
        self.std_ = X.std(axis=0)
        self.std_[self.std_ == 0] = 1.0
        return self
    def transform(self, X):
        return (X - self.mean_) / self.std_
    def fit_transform(self, X):
        return self.fit(X).transform(X)
    def inverse_transform(self, X_scaled):
        return (X_scaled * self.std_) + self.mean_

# ---------- Step 1: create dummy dataset ----------
np.random.seed(42)
sleep = np.random.randint(5, 9, size=12)
steps = np.random.randint(3500, 12000, size=12)
cal   = np.random.randint(1600, 3200, size=12)

bmi   = (
    28.0
    - 0.22 * (sleep - 6.5)
    - 0.0006 * (steps - 7000)
    + 0.0020 * (cal - 2200)
    + np.random.normal(0, 0.7, size=12)
)

X = np.column_stack([sleep, steps, cal]).astype(float)
y = bmi.astype(float).reshape(-1, 1)

print("Sample rows:\n", np.column_stack([X, y])[:5])

# ---------- Step 2: normalization ----------
scaler = StandardScalerLite()
X_scaled = scaler.fit_transform(X)

y_mean, y_std = y.mean(), y.std()
if y_std == 0:
    y_std = 1.0
y_scaled = (y - y_mean) / y_std

# ---------- Metrics ----------
def mse(y_true, y_pred):
    return float(np.mean((y_true - y_pred) ** 2))

def r2_score(y_true, y_pred):
    y_true = y_true.reshape(-1, 1)
    y_pred = y_pred.reshape(-1, 1)
    ss_res = float(np.sum((y_true - y_pred) ** 2))
    ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
    return 1.0 - (ss_res / ss_tot if ss_tot != 0 else 0.0)

# ---------- Step 3: Train (PyTorch if available else NumPy) ----------
use_torch = False
try:
    import torch
    use_torch = True
except Exception as e:
    use_torch = False

if use_torch:
    import torch
    torch.manual_seed(42)

    X_t = torch.tensor(X_scaled, dtype=torch.float32)
    y_t = torch.tensor(y_scaled, dtype=torch.float32)

    model = torch.nn.Sequential(torch.nn.Linear(3, 1))
    optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
    loss_fn = torch.nn.MSELoss()

    epochs = 800
    losses = []
    for ep in range(epochs):
        optimizer.zero_grad()
        preds = model(X_t)
        loss = loss_fn(preds, y_t)
        loss.backward()
        optimizer.step()
        losses.append(float(loss.item()))

    with torch.no_grad():
        y_pred_scaled = model(X_t).numpy()

    y_pred = (y_pred_scaled * y_std) + y_mean
    framework = "PyTorch"
    final_train_loss = losses[-1]
else:
    np.random.seed(42)
    n, d = X_scaled.shape
    W = np.random.randn(d, 1) * 0.01
    b = np.zeros((1,))
    lr = 0.05
    epochs = 800
    losses = []

    for ep in range(epochs):
        y_hat = X_scaled @ W + b
        loss = np.mean((y_hat - y_scaled)**2)
        losses.append(float(loss))
        dW = (2.0/n) * (X_scaled.T @ (y_hat - y_scaled))
        db = (2.0/n) * np.sum(y_hat - y_scaled, axis=0)
        W -= lr * dW
        b -= lr * db

    y_pred_scaled = X_scaled @ W + b
    y_pred = (y_pred_scaled * y_std) + y_mean
    framework = "NumPy (fallback)"
    final_train_loss = losses[-1]

# ---------- Step 4: Evaluation ----------
final_mse = mse(y, y_pred)
final_r2  = r2_score(y, y_pred)

print(f"Framework used     : {framework}")
print(f"Final train loss   : {final_train_loss:.6f} (on scaled y)")
print(f"MSE (original y)   : {final_mse:.6f}")
print(f"R^2 (original y)   : {final_r2:.6f}")

# ---------- Step 5: Predict on a new sample ----------
new_sample = np.array([[7.0, 7000.0, 2200.0]])
new_sample_scaled = scaler.transform(new_sample)

if use_torch:
    with torch.no_grad():
        pred_scaled = model(torch.tensor(new_sample_scaled, dtype=torch.float32)).numpy()
else:
    pred_scaled = new_sample_scaled @ W + b

pred_bmi = float((pred_scaled * y_std) + y_mean)
print(f"New sample (7h, 7000 steps, 2200 cal) -> Predicted BMI: {pred_bmi:.3f}")
