In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from torch.utils.data import DataLoader, Dataset
from itertools import product

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Kathmandu-Precipitation/data/Outlier-removed-dataset.csv")
features = [ i for i in df.columns if i not in ["precipitation","datetime"]]
target = ["precipitation"]
X = df[features]
y = df[target]

(1707, 7)
(1707, 1)


In [None]:
scaler_features = StandardScaler()
scaler_target = StandardScaler()
features_scaled = scaler_features.fit_transform(X)
target_scaled = scaler_target.fit_transform(y).flatten()

In [None]:
# Create sequences for GRU
def create_sequences(data, target, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        x = data[i:i + seq_length]
        y = target[i + seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

seq_length = 10  # Number of time steps in each sequence
X, y = create_sequences(features_scaled, target_scaled, seq_length)

In [None]:
# Split into training, validation, and testing sets (rolling window)
train_size = int(len(X) * 0.7)
val_size = int(len(X) * 0.15)
test_size = len(X) - train_size - val_size

X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [None]:
seq_length = 10  # Number of time steps in each sequence
X, y = create_sequences(features_scaled, target_scaled, seq_length)

# Split into training and testing sets (rolling window)
train_size = int(len(X) * 0.8)
test_size = len(X) - train_size

X_train_full, X_test = X[:train_size], X[train_size:]
y_train_full, y_test = y[:train_size], y[train_size:]

# Convert to PyTorch tensors
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [None]:
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # GRU layer
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)

        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

In [None]:
# Hyperparameter grid
param_grid = {
    'hidden_size': [32, 64, 128],
    'num_layers': [1, 2],
    'learning_rate': [0.001, 0.01],
    'batch_size': [16, 32]
}

best_rmse = float('inf')
best_params = None

for params in product(*param_grid.values()):
    hidden_size, num_layers, learning_rate, batch_size = params

    # Instantiate the model
    model = GRUModel(
        input_size=7,
        hidden_size=hidden_size,
        num_layers=num_layers,
        output_size=1
    )
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Training loop
    num_epochs = 20
    train_data = torch.utils.data.TensorDataset(X_train, y_train)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, targets in train_loader:
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), targets)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

    # Evaluate on validation set
    model.eval()
    with torch.no_grad():
        val_predictions = model(X_val).squeeze().numpy()
        val_predictions_rescaled = scaler_target.inverse_transform(val_predictions.reshape(-1, 1)).flatten()
        y_val_rescaled = scaler_target.inverse_transform(y_val.numpy().reshape(-1, 1)).flatten()

        val_rmse = np.sqrt(mean_squared_error(y_val_rescaled, val_predictions_rescaled))

    print(f"Params: {params}, Validation RMSE: {val_rmse:.4f}")

    # Save best parameters
    if val_rmse < best_rmse:
        best_rmse = val_rmse
        best_params = params

print(f"Best Params: {best_params}, Best Validation RMSE: {best_rmse:.4f}")

Params: (32, 1, 0.001, 16), Validation RMSE: 4.7928
Params: (32, 1, 0.001, 32), Validation RMSE: 4.7304
Params: (32, 1, 0.01, 16), Validation RMSE: 5.1396
Params: (32, 1, 0.01, 32), Validation RMSE: 5.4496
Params: (32, 2, 0.001, 16), Validation RMSE: 4.9356
Params: (32, 2, 0.001, 32), Validation RMSE: 4.8307
Params: (32, 2, 0.01, 16), Validation RMSE: 5.4276
Params: (32, 2, 0.01, 32), Validation RMSE: 5.3561
Params: (64, 1, 0.001, 16), Validation RMSE: 5.0024
Params: (64, 1, 0.001, 32), Validation RMSE: 4.9233
Params: (64, 1, 0.01, 16), Validation RMSE: 5.3268
Params: (64, 1, 0.01, 32), Validation RMSE: 5.3675
Params: (64, 2, 0.001, 16), Validation RMSE: 5.1317
Params: (64, 2, 0.001, 32), Validation RMSE: 4.9618
Params: (64, 2, 0.01, 16), Validation RMSE: 5.1444
Params: (64, 2, 0.01, 32), Validation RMSE: 5.1062
Params: (128, 1, 0.001, 16), Validation RMSE: 5.0865
Params: (128, 1, 0.001, 32), Validation RMSE: 5.0622
Params: (128, 1, 0.01, 16), Validation RMSE: 5.1563
Params: (128, 1, 0

In [None]:
# Retrain with best parameters
hidden_size, num_layers, learning_rate, batch_size = best_params
model = GRUModel(
    input_size=7,
    hidden_size=hidden_size,
    num_layers=num_layers,
    output_size=1
)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Combine training and validation sets
X_train_full = torch.cat((X_train, X_val), dim=0)
y_train_full = torch.cat((y_train, y_val), dim=0)
train_data = torch.utils.data.TensorDataset(X_train_full, y_train_full)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False)

# Train the model
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {running_loss/len(train_loader):.4f}")

# Evaluate on test set
model.eval()
with torch.no_grad():
    test_predictions = model(X_test).squeeze().numpy()
    test_predictions_rescaled = scaler_target.inverse_transform(test_predictions.reshape(-1, 1)).flatten()
    y_test_rescaled = scaler_target.inverse_transform(y_test.numpy().reshape(-1, 1)).flatten()

    test_mse = mean_squared_error(y_test_rescaled, test_predictions_rescaled)
    test_rmse = np.sqrt(test_mse)
    test_mae = mean_absolute_error(y_test_rescaled, test_predictions_rescaled)

print(f"Test Metrics:")
print(f"  Test RMSE: {test_rmse:.4f}")
print(f"  Test MAE: {test_mae:.4f}")

Epoch [1/20], Training Loss: 0.9413
Epoch [2/20], Training Loss: 0.9202
Epoch [3/20], Training Loss: 0.9145
Epoch [4/20], Training Loss: 0.9122
Epoch [5/20], Training Loss: 0.9101
Epoch [6/20], Training Loss: 0.9082
Epoch [7/20], Training Loss: 0.9065
Epoch [8/20], Training Loss: 0.9048
Epoch [9/20], Training Loss: 0.9033
Epoch [10/20], Training Loss: 0.9018
Epoch [11/20], Training Loss: 0.9004
Epoch [12/20], Training Loss: 0.8990
Epoch [13/20], Training Loss: 0.8977
Epoch [14/20], Training Loss: 0.8964
Epoch [15/20], Training Loss: 0.8950
Epoch [16/20], Training Loss: 0.8937
Epoch [17/20], Training Loss: 0.8924
Epoch [18/20], Training Loss: 0.8910
Epoch [19/20], Training Loss: 0.8896
Epoch [20/20], Training Loss: 0.8882
Test Metrics:
  Test RMSE: 4.0655
  Test MAE: 2.8418
