# ðŸ’¨ Air Purifier Impact LSTM â€” Training Notebook

Train an LSTM model to predict the air-quality impact of an **air purifier**,
based on a 24-step time-series of environmental conditions.

**Inputs (per time-step):** `current_aqi`, `current_pm25`, `room_size_sqft`, `ventilation_rate`

**Outputs:** `pm25_reduction_percent`, `cadr_m3_per_hr`, `effective_coverage_sqft`

Saves:
- `models/purifier_lstm.pth`
- `models/purifier_scaler.json`

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import json
import os
import matplotlib.pyplot as plt

print(f"PyTorch {torch.__version__}")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
# ======================== CONFIG ========================
SEQ_LEN      = 24       # 24 time-steps (hours)
INPUT_DIM    = 4        # aqi, pm25, room_size, ventilation_rate
OUTPUT_DIM   = 3        # pm25_red_pct, cadr, coverage
HIDDEN_DIM   = 64
NUM_LAYERS   = 2
DROPOUT      = 0.2
EPOCHS       = 100
BATCH_SIZE   = 32
LR           = 0.001
NUM_SAMPLES  = 5000

In [None]:
# ===================== DATA GENERATION =====================
def generate_purifier_data(num_samples: int):
    """
    Generate synthetic time-series data for air purifier impact prediction.
    Each sample is a 24-step sequence of environmental readings,
    with the target being the expected impact of an air purifier.
    Formula mirrors calculations.ts -> estimatePurifierImpact().
    """
    np.random.seed(42)
    X, y = [], []

    for _ in range(num_samples):
        base_aqi        = np.random.uniform(50, 400)
        base_pm25       = np.random.uniform(20, 200)
        room_size_sqft  = np.random.uniform(100, 800)   # Room size (sq ft)
        ventilation     = np.random.uniform(0.5, 8.0)   # Air changes per hour

        seq = []
        for t in range(SEQ_LEN):
            aqi  = base_aqi  + np.random.normal(0, 8) + t * np.random.normal(0, 0.3)
            pm25 = base_pm25 + np.random.normal(0, 4) + t * np.random.normal(0, 0.15)
            vent = max(0.1, ventilation + np.random.normal(0, 0.2))
            seq.append([aqi, pm25, room_size_sqft, vent])

        X.append(seq)

        # ---------- Target calculation (mirrors calculations.ts) ----------
        # Base purifier specs influenced by room size + AQI conditions
        coverage_sqft        = 300 + np.random.random() * 300     # 300 â€“ 600 sqft
        cadr                 = 250 + np.random.random() * 200     # 250 â€“ 450 mÂ³/hr
        pm25_reduction_pct   = 60  + np.random.random() * 30      # 60 â€“ 90 %

        # Adjustments based on room conditions
        # Smaller rooms -> better coverage efficiency
        room_factor = np.clip(500 / max(room_size_sqft, 100), 0.5, 1.5)
        # Higher AQI -> slightly lower effective reduction (filter saturation)
        aqi_factor  = np.clip(1.2 - (base_aqi / 500) * 0.4, 0.6, 1.2)
        # Better ventilation -> slightly lower efficiency (more outdoor air influx)
        vent_factor = np.clip(1.1 - ventilation * 0.03, 0.7, 1.1)

        pm25_reduction_pct *= room_factor * aqi_factor * vent_factor
        pm25_reduction_pct  = np.clip(pm25_reduction_pct, 30, 99)

        cadr           *= room_factor
        coverage_sqft  *= room_factor

        y.append([pm25_reduction_pct, cadr, coverage_sqft])

    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)


X, y = generate_purifier_data(NUM_SAMPLES)
print(f"X shape: {X.shape}  |  y shape: {y.shape}")
print(f"Sample target: {y[0]}")

In [None]:
# =================== NORMALISATION ===================
x_flat = X.reshape(-1, INPUT_DIM)
x_min  = x_flat.min(axis=0)
x_max  = x_flat.max(axis=0)
y_min  = y.min(axis=0)
y_max  = y.max(axis=0)

X_norm = (X - x_min) / (x_max - x_min + 1e-8)
y_norm = (y - y_min) / (y_max - y_min + 1e-8)

# Train / Validation split (80 / 20)
split = int(0.8 * NUM_SAMPLES)
X_train, X_val = X_norm[:split], X_norm[split:]
y_train, y_val = y_norm[:split], y_norm[split:]

train_ds = TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train))
val_ds   = TensorDataset(torch.FloatTensor(X_val),   torch.FloatTensor(y_val))

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE)

print(f"Train: {len(train_ds)} | Val: {len(val_ds)}")

In [None]:
# ==================== LSTM MODEL ====================
class PurifierLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, dropout=0.2):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(
            input_dim, hidden_dim, num_layers,
            batch_first=True, dropout=dropout
        )
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim, 32),
            nn.ReLU(),
            nn.Linear(32, output_dim),
        )

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        return self.fc(out[:, -1, :])


model     = PurifierLSTM(INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM, NUM_LAYERS, DROPOUT).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, factor=0.5)

total_params = sum(p.numel() for p in model.parameters())
print(f"PurifierLSTM  |  Parameters: {total_params:,}")
print(model)

In [None]:
# ==================== TRAINING LOOP ====================
train_losses = []
val_losses   = []

for epoch in range(EPOCHS):
    # --- train ---
    model.train()
    epoch_loss = 0.0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        pred = model(xb)
        loss = criterion(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    avg_train = epoch_loss / len(train_loader)
    train_losses.append(avg_train)

    # --- validate ---
    model.eval()
    v_loss = 0.0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            v_loss += criterion(model(xb), yb).item()
    avg_val = v_loss / len(val_loader)
    val_losses.append(avg_val)
    scheduler.step(avg_val)

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1:3d}/{EPOCHS}  Train: {avg_train:.6f}  Val: {avg_val:.6f}")

print("\nâœ… Training complete.")

In [None]:
# ==================== LOSS CURVES ====================
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label="Train Loss")
plt.plot(val_losses,   label="Val Loss")
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.title("Purifier LSTM â€” Training Progress")
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# ==================== SAVE MODEL & SCALER ====================
os.makedirs("models", exist_ok=True)

torch.save(model.state_dict(), "models/purifier_lstm.pth")

scaler_data = {
    "x_min": x_min.tolist(),
    "x_max": x_max.tolist(),
    "y_min": y_min.tolist(),
    "y_max": y_max.tolist(),
}
with open("models/purifier_scaler.json", "w") as f:
    json.dump(scaler_data, f, indent=2)

print("Saved  models/purifier_lstm.pth")
print("Saved  models/purifier_scaler.json")
print(f"Model size: {os.path.getsize('models/purifier_lstm.pth') / 1024:.1f} KB")

In [None]:
# ==================== QUICK TEST ====================
model.eval()
test_x = torch.FloatTensor(X_norm[:1]).to(device)

with torch.no_grad():
    pred_norm = model(test_x).cpu().numpy()[0]

pred_actual = pred_norm * (y_max - y_min) + y_min
actual      = y[0]

labels = ["pm25_reduction_pct", "cadr_m3_per_hr", "coverage_sqft"]

print("=" * 55)
print(f"{'Metric':<22} {'Predicted':>12} {'Actual':>12}")
print("-" * 55)
for lbl, p, a in zip(labels, pred_actual, actual):
    print(f"{lbl:<22} {p:>12.4f} {a:>12.4f}")
print("=" * 55)

# JSON output example
result_json = {
    "type": "air_purifier",
    "predictions": {
        lbl: round(float(p), 4) for lbl, p in zip(labels, pred_actual)
    },
}
print("\nJSON output:")
print(json.dumps(result_json, indent=2))