# Q7 – The "Z‑Day" Survival Algorithm (Logistic Regression from Scratch)

Requirements:
- **No sklearn/scipy**. Only `numpy` and `matplotlib` are used.
- Read `zday_data.csv` with two features: `Speed,Ammo,Label` where Label is 0/1.
- Normalize the features (z-score), implement logistic regression with gradient descent.
- Report the prediction for **Speed=25 km/h, Ammo=1**.
- Plot the **loss curve** and the **decision boundary**.

If the CSV is missing, the notebook uses a reasonable **fallback dataset** embedded below so it still runs.

In [None]:
import csv
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

CSV_PATH = Path('zday_data.csv')  # expected columns: Speed,Ammo,Label

def load_or_fallback(csv_path: Path):
    X, y = [], []
    if csv_path.exists():
        with open(csv_path, 'r', newline='') as f:
            rdr = csv.reader(f)
            header = next(rdr, None)
            for row in rdr:
                if not row or len(row) < 3: continue
                X.append([float(row[0]), float(row[1])])
                y.append(int(float(row[2])))
        return np.array(X, dtype=float), np.array(y, dtype=int)
    
    # Fallback dataset (synthetic but realistic):
    data = [
        # speed, ammo, label (1=survive)
        (18,0,0),(20,0,0),(15,1,0),(16,0,0),(17,1,0),
        (19,1,0),(21,0,0),(22,0,0),(23,0,0),(17,2,0),
        (20,2,1),(21,1,1),(22,1,1),(23,1,1),(24,1,1),
        (24,2,1),(25,1,1),(26,1,1),(27,1,1),(28,2,1),
        (19,3,1),(21,3,1),(18,2,0),(16,3,0),(26,0,0),
    ]
    X = np.array([[a,b] for a,b,_ in data], dtype=float)
    y = np.array([c for _,_,c in data], dtype=int)
    return X, y

X, y = load_or_fallback(CSV_PATH)
X[:5], y[:5], X.shape, y.shape

## Normalize features (z-score)

In [None]:
mu = X.mean(axis=0)
sigma = X.std(axis=0, ddof=0)
Xn = (X - mu) / sigma

def normalize_point(p):
    p = np.asarray(p, dtype=float)
    return (p - mu) / sigma

Xn[:3], mu, sigma

## Logistic regression from scratch

In [None]:
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def loss(X, y, w, b):
    z = X @ w + b
    p = sigmoid(z)
    eps = 1e-12
    return -np.mean(y*np.log(p+eps) + (1-y)*np.log(1-p+eps))

def train_logreg_gd(X, y, lr=0.1, steps=3000):
    n, d = X.shape
    w = np.zeros(d)
    b = 0.0
    history = []
    for t in range(steps):
        z = X @ w + b
        p = sigmoid(z)
        # gradients
        dw = (1.0/n) * (X.T @ (p - y))
        db = (1.0/n) * np.sum(p - y)
        # update
        w -= lr * dw
        b -= lr * db
        if t % 10 == 0 or t == steps-1:
            history.append(loss(X, y, w, b))
    return w, b, np.array(history)

w,b,history = train_logreg_gd(Xn, y, lr=0.2, steps=4000)
print('Weights:', w)
print('Bias:', b)
print('Final loss:', history[-1])

## 1) Test Prediction – Runner: 25 km/h, 1 Ammo Clip

In [None]:
test_raw = np.array([25.0, 1.0])
test_norm = normalize_point(test_raw)
prob = sigmoid(test_norm @ w + b)
pred = 1 if prob >= 0.5 else 0
print(f"Probability of survival for (25 km/h, 1 ammo): {prob:.4f}")
print(f"Predicted class: {pred}")

## 2) Visualizing the Cost (Loss) dropping

In [None]:
plt.figure()
plt.plot(np.arange(history.size)*10, history)
plt.xlabel('Training step')
plt.ylabel('Loss (cross-entropy)')
plt.title('Logistic Regression – Loss Curve')
plt.tight_layout()
plt.savefig('zday_cost.png', dpi=160)
plt.close()
print('Saved zday_cost.png')

## 3) Plot the decision boundary

In [None]:
# grid in normalized space for boundary
x_min, x_max = Xn[:,0].min()-0.5, Xn[:,0].max()+0.5
y_min, y_max = Xn[:,1].min()-0.5, Xn[:,1].max()+0.5
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),
                     np.linspace(y_min, y_max, 200))
zz = sigmoid(w[0]*xx + w[1]*yy + b)

plt.figure()
cs = plt.contourf(xx, yy, zz, levels=[0,0.5,1], alpha=0.2)
plt.contour(xx, yy, zz, levels=[0.5], linewidths=2)
plt.scatter(Xn[y==0,0], Xn[y==0,1], marker='o', label='Infected (0)')
plt.scatter(Xn[y==1,0], Xn[y==1,1], marker='^', label='Survive (1)')
plt.scatter([test_norm[0]], [test_norm[1]], c='red', edgecolors='k', s=80, label='Test (25,1)')
plt.xlabel('Speed (normalized)')
plt.ylabel('Ammo (normalized)')
plt.legend()
plt.title('Decision Boundary (normalized feature space)')
plt.tight_layout()
plt.savefig('zday_boundary.png', dpi=160)
plt.close()
print('Saved zday_boundary.png')