In [5]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import torch
from torch.utils.data import DataLoader, TensorDataset

# Load and preprocess data
file_path = '/content/data.xlsx'  # Update with your file path
data = pd.read_excel(file_path)

# Normalize SPI, VCI, and TCI
scaler = MinMaxScaler()
data[['SPI', 'VCI', 'TCI']] = scaler.fit_transform(data[['SPI', 'VCI', 'TCI']])

# Create sequences
def create_sequences(data, sequence_length):
    sequences = []
    for i in range(len(data) - sequence_length):
        sequences.append(data[i:i + sequence_length])
    return np.array(sequences)

data['location'] = list(zip(data['X'], data['Y']))
data_pivoted = data.pivot_table(index=['Year', 'Month'], columns='location', values=['SPI', 'VCI', 'TCI'])
data_pivoted = data_pivoted.fillna(method='ffill').fillna(method='bfill')

sequence_length = 12
spi_sequences = create_sequences(data_pivoted['SPI'].values, sequence_length)
vci_sequences = create_sequences(data_pivoted['VCI'].values, sequence_length)
tci_sequences = create_sequences(data_pivoted['TCI'].values, sequence_length)
combined_sequences = np.stack((spi_sequences, vci_sequences, tci_sequences), axis=-1)

split_ratio = 0.8
split_index = int(len(combined_sequences) * split_ratio)
train_sequences = combined_sequences[:split_index]
test_sequences = combined_sequences[split_index:]

# Convert sequences to PyTorch tensors
train_sequences_tensor = torch.tensor(train_sequences, dtype=torch.float32)
test_sequences_tensor = torch.tensor(test_sequences, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(train_sequences_tensor[:, :-1], train_sequences_tensor[:, -1]), batch_size=32, shuffle=True)
test_loader = DataLoader(TensorDataset(test_sequences_tensor[:, :-1], test_sequences_tensor[:, -1]), batch_size=32, shuffle=False)


In [15]:
import torch.nn as nn
import torch.optim as optim

class LSTMELM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_nodes):
        super(LSTMELM, self).__init__()
        self.num_nodes = num_nodes
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc_elm = nn.Linear(hidden_dim, hidden_dim)  # FC layer for ELM
        self.fc_output = nn.Linear(hidden_dim, output_dim)  # Output layer

    def forward(self, x):
        batch_size, seq_len, num_nodes, num_features = x.size()
        x = x.view(batch_size * num_nodes, seq_len, num_features)  # Reshape to (batch_size * num_nodes, seq_len, num_features)
        lstm_out, _ = self.lstm(x)  # lstm_out: (batch_size * num_nodes, seq_len, hidden_dim)
        lstm_out = lstm_out[:, -1, :]  # Take the output from the last timestep, shape: (batch_size * num_nodes, hidden_dim)
        elm_out = self.fc_elm(lstm_out)
        output = self.fc_output(elm_out)
        output = output.view(batch_size, num_nodes, -1)  # Reshape to (batch_size, num_nodes, output_dim)
        return output

input_dim = 3
hidden_dim = 64
output_dim = 3
num_nodes = train_sequences.shape[2]  # number of nodes

model = LSTMELM(input_dim, hidden_dim, output_dim, num_nodes)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [16]:
num_epochs = 100

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_loss:.4f}')


Epoch 1/100, Loss: 0.4840
Epoch 2/100, Loss: 0.3678
Epoch 3/100, Loss: 0.2483
Epoch 4/100, Loss: 0.1199
Epoch 5/100, Loss: 0.0530
Epoch 6/100, Loss: 0.0583
Epoch 7/100, Loss: 0.0322
Epoch 8/100, Loss: 0.0309
Epoch 9/100, Loss: 0.0347
Epoch 10/100, Loss: 0.0306
Epoch 11/100, Loss: 0.0272
Epoch 12/100, Loss: 0.0261
Epoch 13/100, Loss: 0.0269
Epoch 14/100, Loss: 0.0262
Epoch 15/100, Loss: 0.0262
Epoch 16/100, Loss: 0.0255
Epoch 17/100, Loss: 0.0253
Epoch 18/100, Loss: 0.0256
Epoch 19/100, Loss: 0.0254
Epoch 20/100, Loss: 0.0252
Epoch 21/100, Loss: 0.0253
Epoch 22/100, Loss: 0.0252
Epoch 23/100, Loss: 0.0263
Epoch 24/100, Loss: 0.0251
Epoch 25/100, Loss: 0.0252
Epoch 26/100, Loss: 0.0255
Epoch 27/100, Loss: 0.0239
Epoch 28/100, Loss: 0.0255
Epoch 29/100, Loss: 0.0254
Epoch 30/100, Loss: 0.0254
Epoch 31/100, Loss: 0.0246
Epoch 32/100, Loss: 0.0246
Epoch 33/100, Loss: 0.0252
Epoch 34/100, Loss: 0.0259
Epoch 35/100, Loss: 0.0243
Epoch 36/100, Loss: 0.0251
Epoch 37/100, Loss: 0.0256
Epoch 38/1

In [18]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Function to flatten the 3D arrays into 2D arrays
def flatten_predictions(predictions, targets):
    num_batches, num_nodes, num_features = predictions.shape
    predictions_flat = predictions.reshape(num_batches * num_nodes, num_features)
    targets_flat = targets.reshape(num_batches * num_nodes, num_features)
    return predictions_flat, targets_flat

# Evaluation with scaled scores
model.eval()
train_predictions = []
train_targets = []
test_predictions = []
test_targets = []

# Collect predictions and targets for train set
with torch.no_grad():
    for inputs, targets in train_loader:
        outputs = model(inputs)
        train_predictions.append(outputs.numpy())
        train_targets.append(targets.numpy())

# Collect predictions and targets for test set
with torch.no_grad():
    for inputs, targets in test_loader:
        outputs = model(inputs)
        test_predictions.append(outputs.numpy())
        test_targets.append(targets.numpy())

# Concatenate all predictions and targets
train_predictions = np.concatenate(train_predictions, axis=0)
train_targets = np.concatenate(train_targets, axis=0)
test_predictions = np.concatenate(test_predictions, axis=0)
test_targets = np.concatenate(test_targets, axis=0)

# Flatten the predictions and targets
train_predictions_flat, train_targets_flat = flatten_predictions(train_predictions, train_targets)
test_predictions_flat, test_targets_flat = flatten_predictions(test_predictions, test_targets)

# Calculate evaluation metrics for train set
train_mae = mean_absolute_error(train_targets_flat, train_predictions_flat)
train_rmse = np.sqrt(mean_squared_error(train_targets_flat, train_predictions_flat))
train_r2 = r2_score(train_targets_flat, train_predictions_flat)

# Calculate evaluation metrics for test set
test_mae = mean_absolute_error(test_targets_flat, test_predictions_flat)
test_rmse = np.sqrt(mean_squared_error(test_targets_flat, test_predictions_flat))
test_r2 = r2_score(test_targets_flat, test_predictions_flat)

print(f'Train MAE: {train_mae:.4f}, RMSE: {train_rmse:.4f}, R2: {train_r2:.4f}')
print(f'Test MAE: {test_mae:.4f}, RMSE: {test_rmse:.4f}, R2: {test_r2:.4f}')

# Unscale predictions and targets
train_predictions_unscaled = scaler.inverse_transform(train_predictions_flat)
train_targets_unscaled = scaler.inverse_transform(train_targets_flat)
test_predictions_unscaled = scaler.inverse_transform(test_predictions_flat)
test_targets_unscaled = scaler.inverse_transform(test_targets_flat)

# Calculate unscaled RMSE and MAE
train_rmse_unscaled = np.sqrt(mean_squared_error(train_targets_unscaled, train_predictions_unscaled))
train_mae_unscaled = mean_absolute_error(train_targets_unscaled, train_predictions_unscaled)
test_rmse_unscaled = np.sqrt(mean_squared_error(test_targets_unscaled, test_predictions_unscaled))
test_mae_unscaled = mean_absolute_error(test_targets_unscaled, test_predictions_unscaled)

print(f'Unscaled Train RMSE: {train_rmse_unscaled:.4f}, MAE: {train_mae_unscaled:.4f}')
print(f'Unscaled Test RMSE: {test_rmse_unscaled:.4f}, MAE: {test_mae_unscaled:.4f}')


Train MAE: 0.0955, RMSE: 0.1582, R2: -0.0217
Test MAE: 0.0942, RMSE: 0.1550, R2: -0.0809
Unscaled Train RMSE: 181.3720, MAE: 31.7369
Unscaled Test RMSE: 435.8298, MAE: 60.5315
