In [1]:
# Creating Recurrent Neural Network

In [2]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
import torch.nn.functional as F

from tqdm.notebook import trange, tqdm

In [3]:
class WeatherDataset(Dataset):
    def __init__(self, dataset_file, day_range, split_date, train_test="train"):
        df = pd.read_csv(dataset_file)
        df['Date'] = pd.to_datetime(df['Date'])  # Convert 'Date' column to datetime
        df.set_index('Date', inplace=True)

        # Calculate the mean and std to normalise the data
        mean = df.mean()
        std = df.std()
        df = (df - mean) / std

        self.mean = torch.tensor(mean.to_numpy()).reshape(1, -1)
        self.std = torch.tensor(std.to_numpy()).reshape(1, -1)

        # Split the dataset to test/train set based on a split date
        if train_test == "train":
            self.dataset = df[df.index < split_date]
        elif train_test == "test":
            self.dataset = df[df.index >= split_date]
        else:
            ValueError("train_test should be train or test")

        self.day_range = day_range

    def __getitem__(self, index):
        # Index a range of days
        end_index = index + self.day_range
        current_series = self.dataset.iloc[index:end_index]

        day_tensor = torch.LongTensor(current_series.index.day.to_numpy())
        month_tensor = torch.LongTensor(current_series.index.month.to_numpy())
        data_values = torch.FloatTensor(current_series.values)

        return day_tensor, month_tensor, data_values

    def __len__(self):
        return len(self.dataset) - self.day_range

In [None]:
# Dataset filename
dataset_file = "../datasets/weather.csv"

# Test-Train split on date
split_date = pd.to_datetime('2023-01-01')

# Number of days in input sequence
day_range = 30

# Number of days the MLP will take as an input
days_in = 14

# Days in input seq must be larger that the MLP input size
assert day_range > days_in

# Define the hyperparameters
learning_rate = 1e-4

nepochs = 512

batch_size = 32

dataset_train = WeatherDataset(dataset_file, day_range, split_date, train_test="train")
dataset_test = WeatherDataset(dataset_file, day_range, split_date, train_test="test")

In [None]:
print("Number of training examples:", len(dataset_train))
print("Number of testing examples:", len(dataset_test))

In [None]:
data_loader_train = DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True, drop_last=True)
data_loader_test = DataLoader(dataset=dataset_test, batch_size=batch_size, shuffle=False, drop_last=True)
