In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Data Preprocessing
df = pd.read_csv('./processedData.csv')
df['Date'] = pd.to_datetime(df['Full date'], format="%Y-%m-%d")

df['day_of_year'] = df['Date'].dt.dayofyear
df['sine_day'] = np.sin(2 * np.pi * df['day_of_year'] / 365)
df['cosine_day'] = np.cos(2 * np.pi * df['day_of_year'] / 365)

df = df.drop(columns=['Date', 'day_of_year'])

features = ['Rain', 'sine_day', 'cosine_day']
target = ['Temp Max', 'Temp Min']

scaler = MinMaxScaler()
df[features + target] = scaler.fit_transform(df[features + target])




Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md



In [6]:

sequence_length = 50
X, y = [], []
for i in range(sequence_length, len(df)):
    X.append(df[features].iloc[i-sequence_length:i].values)
    y.append(df[target].iloc[i].values)
X, y = np.array(X), np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)


In [7]:

# Define the PyTorch model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=2):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=0.2)
        self.fc1 = nn.Linear(hidden_size, 32)
        self.fc2 = nn.Linear(32, output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        lstm_out = lstm_out[:, -1, :]  # Take the last output from the LSTM for the dense layers
        x = self.relu(self.fc1(lstm_out))
        x = self.fc2(x)
        return x

In [8]:

# Instantiate the model
input_size = X_train.shape[2]
hidden_size = 64
output_size = 2  # 'Temp Max' and 'Temp Min'
model = LSTMModel(input_size, hidden_size, output_size)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training the model
epochs = 15
batch_size = 16

for epoch in range(epochs):
    model.train()
    for i in range(0, len(X_train), batch_size):
        X_batch = X_train[i:i + batch_size]
        y_batch = y_train[i:i + batch_size]

        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

    # Print progress
    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')

Epoch 1/15, Loss: 0.024192549288272858
Epoch 2/15, Loss: 0.0036964823957532644
Epoch 3/15, Loss: 0.004264137241989374
Epoch 4/15, Loss: 0.004433068446815014
Epoch 5/15, Loss: 0.0038162937853485346
Epoch 6/15, Loss: 0.0030033160001039505
Epoch 7/15, Loss: 0.0034567592665553093
Epoch 8/15, Loss: 0.002903485205024481
Epoch 9/15, Loss: 0.0027908538468182087
Epoch 10/15, Loss: 0.002624724991619587
Epoch 11/15, Loss: 0.0029571279883384705
Epoch 12/15, Loss: 0.0027860081754624844
Epoch 13/15, Loss: 0.0033301643561571836
Epoch 14/15, Loss: 0.0030648699030280113
Epoch 15/15, Loss: 0.0037758233956992626


In [9]:
from sklearn.metrics import r2_score

# Evaluating the model with test data
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    test_loss = criterion(test_outputs, y_test)
    print("Test Loss (MSE):", test_loss.item())

    # Convert predictions and true values back to numpy for R^2 calculation
    test_outputs = test_outputs.numpy()
    y_test_np = y_test.numpy()

    # Calculate R² score for 'Temp Max' and 'Temp Min'
    r2_temp_max = r2_score(y_test_np[:, 0], test_outputs[:, 0])
    r2_temp_min = r2_score(y_test_np[:, 1], test_outputs[:, 1])

    print(f"R² score for 'Temp Max': {r2_temp_max}")
    print(f"R² score for 'Temp Min': {r2_temp_min}")


Test Loss (MSE): 0.005617884919047356
R² score for 'Temp Max': 0.7963274717330933
R² score for 'Temp Min': 0.8146586418151855
