In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm
import os
torch.manual_seed(42)
np.random.seed(42)

import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.use("pgf")
mpl.rcParams.update(
{
    "pgf.texsystem":   "pdflatex", # or any other engine you want to use
    "text.usetex":     True,       # use TeX for all texts
    "font.family":     "serif",
    "font.serif":      [],         # empty entries should cause the usage of the document fonts
    "font.sans-serif": [],
    "font.monospace":  [],
    "font.size":       10,         # control font sizes of different elements
    "axes.labelsize":  10,
    "legend.fontsize": 9,
    "xtick.labelsize": 9,
    "ytick.labelsize": 9,
})

from utilities import SimpleTemporalFusionTransformer, GreenhouseDatasetHandler, train, test, plot_loss, learn, transfer_learning_with_noise, device

Using device cpu
Using 0 workers


In [3]:
# Hyperparameters
learning_rate = 1e-3
hidden_size = 128
attention_head_size = 8
num_epochs = 200
batch_size = 16
weight_decay = 1e-3
num_lstm_layers = 1
dropout_prob = 0.5
stride = 10

hours_m = 1
hours_p = 12
N_m = hours_m * int(120/stride)  # Past timesteps
N_p = hours_p * int(120/stride)  # Future timesteps

#config_string = f"{N_m}_Nm_{N_p}_Np_lr_{learning_rate}_hs_{hidden_size}_ahs_{attention_head_size}_nll_{num_lstm_layers}_bs_{batch_size}_wd_{weight_decay}_do_{dropout_prob}_e_{num_epochs}_stride_{stride}_sgd"
config_string = f"{N_m}_Nm_{N_p}_Np_lr_{learning_rate}_hs_{hidden_size}_ahs_{attention_head_size}_nll_{num_lstm_layers}_bs_{batch_size}_do_{dropout_prob}_e_{num_epochs}_stride_{stride}"
save_dir = os.getcwd() + f"/transformer_no_tanh_with_past_{config_string}"
if not os.path.exists(save_dir):
    os.makedirs(save_dir)


In [9]:
# Load and preprocess datasets
data_N25 = pd.read_csv('GH_Data_2025-11-19_2025-11-21.csv', delimiter=',')
data_N25_test = pd.read_csv('GH_Data_2025-11-25_2025-11-26.csv', delimiter=',')

state_features = ['Temperature_inside', 'Humidity_inside']
control_features_W24 = ['Vent_S1_Roof_1', 'Vent_S1_Roof_2', 'Vent_S1_Roof_3', 
                                'Vent_S1_Side_N', 'Vent_S1_Side_NW', 'Vent_S1_Side_S', 'Vent_S1_Side_SW', 
                                'Vent_S2_Roof_1', 'Vent_S2_Roof_2', 'Vent_S2_Roof_3',
                                'Vent_S2_Side_E', 'Vent_S2_Side_N', 'Vent_S2_Side_S']
control_features_W24 = [control_features_W24[4]]
disturbance_features = ['Temperature_outside', 'Humidity_outside', 'Radiation_inside', 'Radiation_outside', 'Wind_speed_outside']

split_index_train = int(22/36 * len(data_N25))
split_index_val = int(36/36 * len(data_N25))
# split_index_test = int(4/8 * len(data_J25))
dataset_N25 = GreenhouseDatasetHandler(data=data_N25, 
                                       train_data=data_N25.iloc[:split_index_train],
                                       val_data=data_N25.iloc[split_index_train:split_index_val], 
                                    #    test_data=data_N25.iloc[split_index_val:], 
                                       test_data=data_N25_test,
                                       state_features=state_features, 
                                       control_features=control_features_W24,  # only one control
                                       disturbance_features=disturbance_features, 
                                       seq_len=N_m, pred_len=N_p, stride=stride, batch_size=batch_size)


In [10]:
# Initialize models
model_A25_to_J25 = SimpleTemporalFusionTransformer(
    len(state_features), len(control_features_W24), len(disturbance_features),
    hidden_dim=hidden_size,
    num_heads=attention_head_size,
    num_layers=num_lstm_layers, 
    dropout_prob=dropout_prob
).to(device)

model_N25 = SimpleTemporalFusionTransformer(
    len(state_features), len(control_features_W24), len(disturbance_features),
    hidden_dim=hidden_size,
    num_heads=attention_head_size,
    num_layers=num_lstm_layers, 
    dropout_prob=dropout_prob
).to(device)

# Learn from 2-day Dataset (November 2025, GH with 13 vents)

In [11]:
learning_rate = 1e-3

model_N25 = SimpleTemporalFusionTransformer(
    len(state_features), len(control_features_W24), len(disturbance_features),
    hidden_dim=hidden_size,
    num_heads=attention_head_size,
    num_layers=num_lstm_layers, 
    dropout_prob=dropout_prob
).to(device)

model_N25_path = f"{save_dir}/model_N25_{learning_rate}lr_{num_epochs}e.pth"
learn(model_N25, f'model_N25_{learning_rate}lr_{num_epochs}e', model_N25_path, dataset_N25, num_epochs, learning_rate, weight_decay, save_dir, config_string)

Epoch [1/200], Training Loss: 0.759663, Validation Loss: 1.141201, LR: 9.955000e-04
Epoch [6/200], Training Loss: 0.394152, Validation Loss: 1.115318, LR: 9.730000e-04
Epoch [11/200], Training Loss: 0.237200, Validation Loss: 1.082060, LR: 9.505000e-04
Epoch [16/200], Training Loss: 0.186924, Validation Loss: 1.004755, LR: 9.280000e-04
Epoch [21/200], Training Loss: 0.160558, Validation Loss: 0.907565, LR: 9.055000e-04
Epoch [26/200], Training Loss: 0.140607, Validation Loss: 0.812376, LR: 8.830000e-04
Epoch [31/200], Training Loss: 0.124070, Validation Loss: 0.727005, LR: 8.605000e-04
Epoch [36/200], Training Loss: 0.110974, Validation Loss: 0.653016, LR: 8.380000e-04
Epoch [41/200], Training Loss: 0.100375, Validation Loss: 0.589938, LR: 8.155000e-04
Epoch [46/200], Training Loss: 0.092009, Validation Loss: 0.536822, LR: 7.930000e-04
Epoch [51/200], Training Loss: 0.085477, Validation Loss: 0.492234, LR: 7.705000e-04
Epoch [56/200], Training Loss: 0.079751, Validation Loss: 0.455043,

findfont: Font family ['serif'] not found. Falling back to DejaVu Sans.
findfont: Generic family 'serif' not found because none of the following families were found: 
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Generic family 'sans-serif' not found because none of the following families were found: 
findfont: Font family ['monospace'] not found. Falling back to DejaVu Sans.
findfont: Generic family 'monospace' not found because none of the following families were found: 


Epoch [200/200], Training Loss: 0.045279, Validation Loss: 0.236919, LR: 1.000000e-04
Saving model_N25_0.001lr_200e state dict to /Users/paulloer/Desktop/online_optimization_and_learning/transformer_no_tanh_with_past_12_Nm_144_Np_lr_0.001_hs_128_ahs_8_nll_1_bs_16_do_0.5_e_200_stride_10/model_N25_0.001lr_200e.pth


Inference: 100%|██████████| 3012/3012 [00:14<00:00, 208.74it/s]
  axs[0].legend()
  axs[1].legend()


Predictions: 3012
RMS Temperature Error of 2.1283 with standard deviation 1.9162
RMS Humidity Error of 23.1871 with standard deviation 11.1744


(2.128341, 23.18712, 1.9162217, 11.174354)

## Transfer Learning: Add noise to model A25toJ25 before training on dataset N25

In [None]:
learning_rate = 1e-3
noise = 0.01
model_A25_to_J25_path = f"{save_dir}/model_transfer_A25_to_J25_0.01n_0.001lr_200e.pth"
model_name = f'model_transfer_A25_to_J25_to_N25_{noise}n_{learning_rate}lr_{num_epochs}e'
model_A25_to_J25_to_N25_path = f"{save_dir}/{model_name}.pth"

model_transfer_A25_to_J25 = SimpleTemporalFusionTransformer(
    len(state_features), len(control_features_W24), len(disturbance_features),
    hidden_dim=hidden_size,
    num_heads=attention_head_size,
    num_layers=num_lstm_layers, 
    dropout_prob=dropout_prob
).to(device)

transfer_learning_with_noise(model=model_N25, 
                             model_name=model_name, 
                             model_path_source=model_A25_to_J25_path, 
                             model_path_target=model_A25_to_J25_to_N25_path, 
                             dataset=dataset_N25, 
                             num_epochs=num_epochs, learning_rate=learning_rate, weight_decay=weight_decay, save_dir=save_dir, config_string=config_string, noise=noise
                            )

## Test model transfer A25 to J25 on dataset A25

In [None]:
model_transfer_A25_to_J25.load_state_dict(torch.load(model_A25_to_J25_path, map_location=torch.device(device.type), weights_only=True))
test(model_transfer_A25_to_J25, dataset_A25, save_dir, f'model_transfer_A25_to_J25_on_A25_{learning_rate}lr_{num_epochs}e')

## Transfer Learning: Add noise to model W20 before training on two days from dataset W24

In [None]:
learning_rate = 1e-4
noise = 0.01
model_name = f'model_transfer_W20_to_W24_2d_path_{noise}noise_2d_{learning_rate}lr_{num_epochs}e'
model_transfer_W20_to_W24_2d_path = f"{save_dir}/{model_name}.pth"

model_transfer_W20_to_W24_2d = SimpleTemporalFusionTransformer(
    len(state_features), len(control_features_W20), len(disturbance_features),
    hidden_dim=hidden_size,
    num_heads=attention_head_size,
    num_layers=num_lstm_layers, 
    dropout_prob=dropout_prob
).to(device)

split_index_36 = int(36/48 * len(data_W24))
split_index_80 = int(38/48 * len(data_W24))
split_index_90 = int(43/48 * len(data_W24))
dataset_W24_2d = GreenhouseDatasetHandler(data=data_W24, 
                                       train_data=data_W24.iloc[split_index_36:split_index_80],  # pd.concat([data_W24.iloc[:split_index_45], data_W24.iloc[split_index_50:split_index_95]]), 
                                       val_data=data_W24.iloc[split_index_80:split_index_90], 
                                       test_data=data_W24.iloc[split_index_90:], 
                                       state_features=state_features, 
                                       control_features=control_features_W24,  # only one control
                                       disturbance_features=disturbance_features, 
                                       seq_len=N_m, pred_len=N_p, stride=stride, batch_size=batch_size)

transfer_learning_with_noise(model_transfer_W20_to_W24_2d, model_name, model_W20_path, model_transfer_W20_to_W24_2d_path, dataset_W24_2d, num_epochs, learning_rate, weight_decay, save_dir, config_string, noise)


## Test model transfer W20 to W24 2d directly on dataset A25

In [None]:
model_transfer_W20_to_W24_2d.load_state_dict(torch.load(model_transfer_W20_to_W24_2d_path, map_location=torch.device(device.type), weights_only=True))
test(model_transfer_W20_to_W24_2d, dataset_A25, save_dir, f'model_transfer_W20_to_W24_2d_on_A25')
