In [2]:
import pandas as pd 
import numpy as np

In [3]:
df = pd.read_csv('./processedData.csv')
df.head()

Unnamed: 0,id,Date,Rain,Temp Max,Temp Min,Full date,Year,Month
0,0,1,0.0,28.940001,15.58,1951-01-01,1951,1
1,1,2,0.0,27.889999,13.86,1951-01-02,1951,1
2,2,3,0.0,27.32,12.38,1951-01-03,1951,1
3,3,4,0.0,27.67,13.06,1951-01-04,1951,1
4,4,5,0.0,28.32,14.79,1951-01-05,1951,1


In [5]:
from sklearn.preprocessing import MinMaxScaler


trainSet = df['Temp Max'].values.reshape(-1, 1)
scaler = MinMaxScaler(feature_range=(0, 1))
trainSetScaled = scaler.fit_transform(trainSet)

trainSet

array([[28.94000053],
       [27.88999939],
       [27.31999969],
       ...,
       [28.23999977],
       [28.12000084],
       [29.55999947]])

In [41]:
sequenceLength = 365
xTrain = []
yTrain = []

for i in range(sequenceLength, len(trainSetScaled)):
    # Append the last `sequenceLength` values to xTrain
    xTrain.append(trainSetScaled[i-sequenceLength:i, 0])  # Take the last 60 values
    yTrain.append(trainSetScaled[i, 0])  # The next value to predict

# Convert to numpy arrays
xTrain, yTrain = np.array(xTrain), np.array(yTrain)

xTrain,yTrain

(array([[0.37515034, 0.33306613, 0.31022045, ..., 0.36833671, 0.37474953,
         0.37555115],
        [0.33306613, 0.31022045, 0.32424852, ..., 0.37474953, 0.37555115,
         0.37314629],
        [0.31022045, 0.32424852, 0.35030061, ..., 0.37555115, 0.37314629,
         0.36873752],
        ...,
        [0.4593187 , 0.43326653, 0.44088178, ..., 0.38356714, 0.37194393,
         0.36072146],
        [0.43326653, 0.44088178, 0.44008016, ..., 0.37194393, 0.36072146,
         0.3470942 ],
        [0.44088178, 0.44008016, 0.40961923, ..., 0.36072146, 0.3470942 ,
         0.34228462]]),
 array([0.37314629, 0.36873752, 0.36152308, ..., 0.3470942 , 0.34228462,
        0.4       ]))

In [42]:
xTrain = np.reshape(xTrain, (xTrain.shape[0], xTrain.shape[1], 1))
#(From documentation) Reshaping is necessary to ensure that the LSTM can interpret the data correctly, where it expects data in the form of (samples, timesteps, features).

In [43]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Check if GPU is available and set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the LSTM model class
class LSTMModel(nn.Module):
    def __init__(self):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(input_size=1, hidden_size=50, batch_first=True)
        self.dropout1 = nn.Dropout(0.2)
        self.lstm2 = nn.LSTM(input_size=50, hidden_size=50, batch_first=True)
        self.dropout2 = nn.Dropout(0.2)
        self.lstm3 = nn.LSTM(input_size=50, hidden_size=50, batch_first=True)
        self.dropout3 = nn.Dropout(0.2)
        self.lstm4 = nn.LSTM(input_size=50, hidden_size=50, batch_first=True)
        self.dropout4 = nn.Dropout(0.2)
        self.fc = nn.Linear(in_features=50, out_features=1)  # Output layer

    def forward(self, x):
        x, _ = self.lstm1(x)  # Forward pass through the first LSTM layer
        x = self.dropout1(x)  # Apply dropout
        x, _ = self.lstm2(x)  # Forward pass through the second LSTM layer
        x = self.dropout2(x)  # Apply dropout
        x, _ = self.lstm3(x)  # Forward pass through the third LSTM layer
        x = self.dropout3(x)  # Apply dropout
        x, _ = self.lstm4(x)  # Forward pass through the fourth LSTM layer
        x = self.dropout4(x)  # Apply dropout
        x = x[:, -1, :]  # Get the last time step
        x = self.fc(x)  # Output layer
        return x

In [44]:

model = LSTMModel().to(device)
xTrain_tensor = torch.tensor(xTrain, dtype=torch.float32).to(device)
yTrain_tensor = torch.tensor(yTrain, dtype=torch.float32).view(-1, 1).to(device)  # Reshape to match output

dataset = TensorDataset(xTrain_tensor, yTrain_tensor)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)  # Shuffle for better training

criterion = nn.MSELoss().to(device)  
optimizer = optim.Adam(model.parameters(), lr=0.0005)  

# Set the model to training mode
model.train()

# Training loop
epochs = 3
for epoch in range(epochs):
    epoch_loss = 0 
    
    for inputs, targets in dataloader:
        optimizer.zero_grad()
        
        outputs = model(inputs) 

        loss = criterion(outputs, targets)
        epoch_loss += loss.item() 

        loss.backward() 
        optimizer.step() 

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {epoch_loss / len(dataloader):.4f}')

Epoch [1/3], Loss: 0.0214
Epoch [2/3], Loss: 0.0079
Epoch [3/3], Loss: 0.0066


[[30.01000023]
 [30.64999962]
 [29.81999969]
 [29.98999977]
 [29.29000092]
 [30.64999962]
 [31.25      ]
 [30.75      ]
 [30.86000061]
 [31.29000092]
 [31.60000038]
 [32.38000107]
 [32.52999878]
 [33.61000061]
 [33.72999954]
 [34.06999969]
 [33.93000031]
 [34.06000137]
 [34.49000168]
 [34.45999908]
 [34.70000076]
 [34.93000031]
 [34.81000137]
 [34.41999817]
 [34.20000076]
 [34.25      ]
 [34.06999969]
 [33.75999832]
 [33.90000153]
 [34.15000153]
 [33.61000061]
 [32.63000107]
 [33.34000015]
 [34.43000031]
 [34.56999969]
 [34.61000061]
 [33.84999847]
 [32.86999893]
 [34.27000046]
 [34.20999908]
 [34.47999954]
 [35.04999924]
 [35.79999924]
 [36.54000092]
 [37.16999817]
 [38.22000122]
 [38.43999863]
 [38.68000031]
 [38.15000153]
 [38.18000031]
 [38.29999924]
 [37.70999908]
 [37.95999908]
 [38.13999939]
 [38.97999954]
 [38.36999893]
 [36.86000061]
 [36.47999954]
 [34.11000061]
 [35.54999924]
 [36.86000061]
 [36.06999969]
 [34.15000153]
 [33.75999832]
 [35.84999847]
 [36.15000153]
 [36.83000