# ðŸ’§ Water Harvesting LSTM â€” Training Notebook

Train an LSTM model to predict **rainwater harvesting potential** for building
rooftops, based on a 24-step time-series of rain and building conditions.

**Inputs (per time-step):**
- `rain_intensity` â€” rainfall intensity multiplier (0.1â€“2.0)
- `rain_angle` â€” rain tilt angle (âˆ’1.5 to 1.5, 0 = vertical)
- `rain_size` â€” droplet size factor (0.1â€“2.0)
- `rain_speed` â€” droplet speed (10â€“100)
- `roof_area_m2` â€” building roof area (mÂ²)
- `roof_angle` â€” roof slope angle (0â€“45Â°)

**Outputs:**
- `collection_efficiency_pct` â€” overall collection efficiency (%)
- `liters_per_hour` â€” water collected per hour (L)
- `liters_per_day` â€” water collected per day (L)
- `harvesting_potential_pct` â€” harvesting potential score (%)

Saves:
- `models/water_lstm.pth`
- `models/water_scaler.json`

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import json
import os
import matplotlib.pyplot as plt

print(f"PyTorch {torch.__version__}")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
# ======================== CONFIG ========================
SEQ_LEN      = 24       # 24 time-steps (hourly readings)
INPUT_DIM    = 6        # intensity, angle, size, speed, roof_area, roof_angle
OUTPUT_DIM   = 4        # efficiency, liters/hr, liters/day, potential
HIDDEN_DIM   = 64
NUM_LAYERS   = 2
DROPOUT      = 0.2
EPOCHS       = 100
BATCH_SIZE   = 32
LR           = 0.001
NUM_SAMPLES  = 5000

# Constants from calculations.ts
BASE_RAINFALL_RATE = 5     # mm/hour for moderate rain
RAIN_HOURS_PER_DAY = 6     # assumed rain hours per rainy day

In [None]:
# ===================== DATA GENERATION =====================
def calculate_water_harvesting_potential(area, roof_angle=0, rain_angle=0):
    """Mirrors calculateWaterHarvestingPotential() in calculations.ts"""
    base_potential = 60
    area_effect = np.log(area + 1) * 1.5
    roof_angle_effect = max(0, 20 - roof_angle / 1.5)
    rain_angle_effect = abs(rain_angle) * 5
    total = base_potential + area_effect + roof_angle_effect - rain_angle_effect
    return np.clip(total, 40, 95)


def calculate_collection_efficiency(rain_angle, rain_size, rain_speed, roof_angle):
    """Mirrors the efficiency calc in calculateAdvancedWaterCollection()"""
    angle_efficiency = 1 - abs(rain_angle) * 0.2
    size_efficiency = 0.8 + rain_size * 0.1
    speed_optimal = 50
    speed_deviation = abs(rain_speed - speed_optimal) / 100
    speed_efficiency = 1 - speed_deviation * 0.15
    roof_efficiency = 1 - (roof_angle / 100)
    combined = angle_efficiency * size_efficiency * speed_efficiency * roof_efficiency
    return np.clip(combined, 0.3, 0.95)


def generate_water_data(num_samples: int):
    """
    Generate synthetic time-series data for water harvesting prediction.
    Formula mirrors calculations.ts -> calculateAdvancedWaterCollection().
    
    Time-series captures hourly variations in rainfall conditions.
    """
    np.random.seed(42)
    X, y = [], []

    for _ in range(num_samples):
        # Base rain conditions (randomised per sample)
        base_intensity = np.random.uniform(0.1, 2.0)
        base_angle     = np.random.uniform(-1.5, 1.5)
        base_size      = np.random.uniform(0.1, 2.0)
        base_speed     = np.random.uniform(10, 100)
        roof_area      = np.random.uniform(50, 600)      # mÂ²
        roof_angle     = np.random.uniform(0, 40)         # degrees

        seq = []
        for t in range(SEQ_LEN):
            # Rain varies over time â€” intensity surges, angle shifts with wind
            storm_factor = 1.0 + 0.3 * np.sin(t * np.pi / 6)  # storm surges

            intensity = base_intensity * storm_factor + np.random.normal(0, 0.1)
            intensity = np.clip(intensity, 0.05, 3.0)

            angle = base_angle + np.random.normal(0, 0.15) + 0.1 * np.sin(t * np.pi / 8)
            angle = np.clip(angle, -2.0, 2.0)

            size = base_size + np.random.normal(0, 0.08)
            size = np.clip(size, 0.05, 3.0)

            speed = base_speed + np.random.normal(0, 3) + 5 * np.sin(t * np.pi / 10)
            speed = np.clip(speed, 5, 120)

            seq.append([intensity, angle, size, speed, roof_area, roof_angle])

        X.append(seq)

        # ---------- Target: mirrors calculateAdvancedWaterCollection() ----------
        # Use averaged rain params from the sequence
        avg_intensity = np.mean([s[0] for s in seq])
        avg_angle     = np.mean([s[1] for s in seq])
        avg_size      = np.mean([s[2] for s in seq])
        avg_speed     = np.mean([s[3] for s in seq])

        # Collection efficiency
        efficiency = calculate_collection_efficiency(avg_angle, avg_size, avg_speed, roof_angle)

        # Harvesting potential
        potential = calculate_water_harvesting_potential(roof_area, roof_angle, avg_angle)

        # Effective rainfall rate (mm/hr)
        effective_rainfall_rate = BASE_RAINFALL_RATE * avg_intensity

        # Liters per hour = roof_area (mÂ²) Ã— rainfall (mm) Ã— efficiency Ã— 1 L/mÂ²/mm
        liters_per_hour = roof_area * effective_rainfall_rate * efficiency

        # Liters per day (assume 6 rain hours)
        liters_per_day = liters_per_hour * RAIN_HOURS_PER_DAY

        y.append([
            efficiency * 100,       # collection efficiency %
            liters_per_hour,
            liters_per_day,
            potential,              # harvesting potential %
        ])

    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)


X, y = generate_water_data(NUM_SAMPLES)
print(f"X shape: {X.shape}  |  y shape: {y.shape}")
print(f"Sample input (last step): {X[0][-1]}")
print(f"Sample target: {y[0]}")

In [None]:
# =================== NORMALISATION ===================
x_flat = X.reshape(-1, INPUT_DIM)
x_min  = x_flat.min(axis=0)
x_max  = x_flat.max(axis=0)
y_min  = y.min(axis=0)
y_max  = y.max(axis=0)

X_norm = (X - x_min) / (x_max - x_min + 1e-8)
y_norm = (y - y_min) / (y_max - y_min + 1e-8)

# Train / Validation split (80 / 20)
split = int(0.8 * NUM_SAMPLES)
X_train, X_val = X_norm[:split], X_norm[split:]
y_train, y_val = y_norm[:split], y_norm[split:]

train_ds = TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train))
val_ds   = TensorDataset(torch.FloatTensor(X_val),   torch.FloatTensor(y_val))

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE)

print(f"Train: {len(train_ds)} | Val: {len(val_ds)}")
print(f"\nFeature ranges:")
feature_names = ["intensity", "angle", "size", "speed", "roof_area", "roof_angle"]
for i, name in enumerate(feature_names):
    print(f"  {name:>12}: [{x_min[i]:.2f}, {x_max[i]:.2f}]")
print(f"\nTarget ranges:")
target_names = ["efficiency%", "liters/hr", "liters/day", "potential%"]
for i, name in enumerate(target_names):
    print(f"  {name:>12}: [{y_min[i]:.2f}, {y_max[i]:.2f}]")

In [None]:
# ==================== LSTM MODEL ====================
class WaterLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, dropout=0.2):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(
            input_dim, hidden_dim, num_layers,
            batch_first=True, dropout=dropout
        )
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim, 32),
            nn.ReLU(),
            nn.Linear(32, output_dim),
        )

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        return self.fc(out[:, -1, :])


model     = WaterLSTM(INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM, NUM_LAYERS, DROPOUT).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, factor=0.5)

total_params = sum(p.numel() for p in model.parameters())
print(f"WaterLSTM  |  Parameters: {total_params:,}")
print(model)

In [None]:
# ==================== TRAINING LOOP ====================
train_losses = []
val_losses   = []

for epoch in range(EPOCHS):
    # --- train ---
    model.train()
    epoch_loss = 0.0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        pred = model(xb)
        loss = criterion(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    avg_train = epoch_loss / len(train_loader)
    train_losses.append(avg_train)

    # --- validate ---
    model.eval()
    v_loss = 0.0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            v_loss += criterion(model(xb), yb).item()
    avg_val = v_loss / len(val_loader)
    val_losses.append(avg_val)
    scheduler.step(avg_val)

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1:3d}/{EPOCHS}  Train: {avg_train:.6f}  Val: {avg_val:.6f}")

print("\nâœ… Training complete.")

In [None]:
# ==================== LOSS CURVES ====================
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label="Train Loss")
plt.plot(val_losses,   label="Val Loss")
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.title("Water LSTM â€” Training Progress")
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# ==================== SAVE MODEL & SCALER ====================
os.makedirs("models", exist_ok=True)

torch.save(model.state_dict(), "models/water_lstm.pth")

scaler_data = {
    "x_min": x_min.tolist(),
    "x_max": x_max.tolist(),
    "y_min": y_min.tolist(),
    "y_max": y_max.tolist(),
}
with open("models/water_scaler.json", "w") as f:
    json.dump(scaler_data, f, indent=2)

print("Saved  models/water_lstm.pth")
print("Saved  models/water_scaler.json")
print(f"Model size: {os.path.getsize('models/water_lstm.pth') / 1024:.1f} KB")

In [None]:
# ==================== QUICK TEST ====================
model.eval()
test_x = torch.FloatTensor(X_norm[:1]).to(device)

with torch.no_grad():
    pred_norm = model(test_x).cpu().numpy()[0]

pred_actual = pred_norm * (y_max - y_min) + y_min
actual      = y[0]

labels = ["collection_efficiency_pct", "liters_per_hour", "liters_per_day", "harvesting_potential_pct"]

print("=" * 65)
print(f"{'Metric':<30} {'Predicted':>14} {'Actual':>14}")
print("-" * 65)
for lbl, p, a in zip(labels, pred_actual, actual):
    print(f"{lbl:<30} {p:>14.2f} {a:>14.2f}")
print("=" * 65)

# JSON output example
result_json = {
    "type": "water_harvesting",
    "predictions": {
        lbl: round(float(p), 2) for lbl, p in zip(labels, pred_actual)
    },
}
print("\nJSON output:")
print(json.dumps(result_json, indent=2))