In [None]:
import pandas as pd

import numpy as np
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # Input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # Forecast sequence (t, t+1, ... t+n_out)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # Put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # Drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [None]:
# Define MLP Model with adjusted input dimension
class MLP(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(MLP, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim)
        )

    def forward(self, x):
        return self.network(x)

In [None]:
# Evaluate the model
def get_predictions_and_targets(loader, model):
    model.eval()  # Set the model to evaluation mode
    predictions, targets = [], []
    with torch.no_grad():
        for inputs, labels in loader:
            outputs = model(inputs)
            predictions.append(outputs.numpy())
            targets.append(labels.numpy())
    predictions = np.vstack(predictions)
    targets = np.vstack(targets)
    return predictions, targets

In [None]:
def evaluate_model(model, test_X, test_y, train_loader, test_loader, criterion):
    model.eval()
    with torch.no_grad():
        predictions = model(test_X)
        test_loss = criterion(predictions, test_y)
    print(f'Test Loss: {test_loss.item()}')

    train_predictions, train_targets = get_predictions_and_targets(train_loader, model)
    test_predictions, test_targets = get_predictions_and_targets(test_loader, model)

    train_mae = mean_absolute_error(train_targets, train_predictions)
    train_mse = mean_squared_error(train_targets, train_predictions)
    test_mae = mean_absolute_error(test_targets, test_predictions)
    test_mse = mean_squared_error(test_targets, test_predictions)

    print(f"Training MAE: {train_mae}, Training MSE: {train_mse}")
    print(f"Testing MAE: {test_mae}, Testing MSE: {test_mse}")


In [None]:
if __name__ == "__main__":
  # Load the dataset
  url = 'https://raw.githubusercontent.com/noobstang/NNtraining/master/Weather49Sets/weatherstats_ottawa_daily.csv'
  data = pd.read_csv(url)

  data['date'] = pd.to_datetime(data['date'])

  # Filter data for dates between May 1st and November 30th for each year
  filtered_data = data[(data['date'].dt.month >= 5) & (data['date'].dt.month <= 11)]
  data_filtered = data[(data['date'].dt.year >= 2010) & (data['date'].dt.year <= 2023)]

  # Select the required columns and preprocess
  columns_required = ['avg_hourly_temperature', 'precipitation', 'avg_hourly_pressure_station', 'solar_radiation']
  data_filtered = data_filtered[columns_required].dropna()

  # Scale the data
  scaler = MinMaxScaler(feature_range=(0, 1))
  data_scaled = scaler.fit_transform(data_filtered)

  # Convert to supervised learning problem with a window size of 3DAYS
  data_supervised = series_to_supervised(data_scaled, 3, 1)

  # Split the dataset
  n_obs = 3 * 4  # Update for 3DAYS window size
  values = data_supervised.values
  n_train_days = int(len(values) * 0.8)
  train = values[:n_train_days, :]
  test = values[n_train_days:, :]
  train_X, train_y = train[:, :n_obs], train[:, -4:]
  test_X, test_y = test[:, :n_obs], test[:, -4:]

  # Convert to tensors
  train_X = torch.tensor(train_X, dtype=torch.float32)
  train_y = torch.tensor(train_y, dtype=torch.float32)
  test_X = torch.tensor(test_X, dtype=torch.float32)
  test_y = torch.tensor(test_y, dtype=torch.float32)

  # DataLoader
  train_dataset = TensorDataset(train_X, train_y)
  train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
  test_dataset = TensorDataset(test_X, test_y)
  test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

  # Initialize the model with the updated input dimension
  model = MLP(12, 4)  # Update for 3DAYS window size
  criterion = nn.MSELoss()
  optimizer = optim.Adam(model.parameters(), lr=0.001)

  # Training loop
  epochs = 50
  for epoch in range(epochs):
      model.train()
      for inputs, targets in train_loader:
          optimizer.zero_grad()
          output = model(inputs)
          loss = criterion(output, targets)
          loss.backward()
          optimizer.step()
      print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')


  evaluate_model(model, test_X, test_y, train_loader, test_loader, criterion)


  data = pd.read_csv(url)


Epoch 1/50, Loss: 0.020672952756285667
Epoch 2/50, Loss: 0.012685954570770264
Epoch 3/50, Loss: 0.016097551211714745
Epoch 4/50, Loss: 0.015308748930692673
Epoch 5/50, Loss: 0.011971816420555115
Epoch 6/50, Loss: 0.013332940638065338
Epoch 7/50, Loss: 0.013219349086284637
Epoch 8/50, Loss: 0.011462919414043427
Epoch 9/50, Loss: 0.012313046492636204
Epoch 10/50, Loss: 0.011954618617892265
Epoch 11/50, Loss: 0.011617901735007763
Epoch 12/50, Loss: 0.01180655974894762
Epoch 13/50, Loss: 0.012854933738708496
Epoch 14/50, Loss: 0.011891594156622887
Epoch 15/50, Loss: 0.011092856526374817
Epoch 16/50, Loss: 0.010265654884278774
Epoch 17/50, Loss: 0.012778714299201965
Epoch 18/50, Loss: 0.011400456540286541
Epoch 19/50, Loss: 0.011212590150535107
Epoch 20/50, Loss: 0.013361241668462753
Epoch 21/50, Loss: 0.011505961418151855
Epoch 22/50, Loss: 0.013025465421378613
Epoch 23/50, Loss: 0.012058861553668976
Epoch 24/50, Loss: 0.008756631053984165
Epoch 25/50, Loss: 0.007730954326689243
Epoch 26/5



```
Epoch 50/50, Loss: 0.009859252721071243
Test Loss: 0.01037069596350193
Training MAE: 0.07271228730678558, Training MSE: 0.011210594326257706
Testing MAE: 0.07027967274188995, Testing MSE: 0.01037069596350193
```



In [None]:
import pandas as pd

# Assuming 'data_filtered' contains the original dataset with columns including 'date'
# and has been used to generate 'test_X' and 'test_y'
# Also assuming 'test_predictions' and 'test_targets' are the outputs from 'evaluate_model'

# Convert predictions and actual values back to DataFrame with original scale
test_predictions_scaled = scaler.inverse_transform(test_predictions)
test_targets_scaled = scaler.inverse_transform(test_targets)

# Filter the original dataset for the testing time range (year 2023)
data_2023 = data_filtered[data_filtered['date'].dt.year == 2023]

# Assuming 'data_2023' has the same order and length as 'test_predictions_scaled' and 'test_targets_scaled'
# Create DataFrames for comparison
columns_required = ['avg_hourly_temperature', 'precipitation', 'avg_hourly_pressure_station', 'solar_radiation']
comparison_df = pd.DataFrame(data_2023[columns_required].reset_index(drop=True))

# Update the DataFrame with predicted and actual values
for i, col in enumerate(columns_required):
    comparison_df[f'{col}_actual'] = test_targets_scaled[:, i]
    comparison_df[f'{col}_predicted'] = test_predictions_scaled[:, i]

# Display the DataFrame for one feature as an example (e.g., 'avg_hourly_temperature')
comparison_df_temperature = comparison_df[['avg_hourly_temperature_actual', 'avg_hourly_temperature_predicted']]
print(comparison_df_temperature.head())


NameError: name 'test_predictions' is not defined