In [2]:
### Imports and setup
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import subprocess
import glob
import seaborn as sns
from datetime import datetime
import torch
import joblib
import tabulate

from models.lstm import LSTMModel
from models.transformer import TransformerModel
from models.hybrid import HybridLSTMTransformerModel
from models.randomforest import RandomForestModel
from data.dataset_loader import load_lab42_from_influxdb, add_contextual_features, Lab42Dataset, UciOccupancyDataset
from torch.utils.data import DataLoader
from utils.train import evaluate_model

# Functions to run training scripts and load metrics

In [3]:
### Experiment parameters
models = ['randomforest', 'lstm', 'transformer', 'hybrid']
datasets = ["uci", "lab42"]
metrics_dir = "metrics"
features = ['airquality', 'light', 'hour_of_day', 'is_weekend', 'airquality_delta', 'airquality_trend']
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Auto-run Training via Subprocess
def run_training_script(script_name, args_list):
    """
    Run a python training script (train_model.py) with given list of CLI arguments.

    :param script_name: path to the script to run
    :param args_list: list of arguments to pass to the script [e.g. ["--model", "logreg", "--dataset", "uci"]]
    :return:
    """

    full_command = ["python", script_name] + args_list
    print(f"Running command: {' '.join(full_command)}")
    result = subprocess.run(full_command, capture_output=True, text=True)

    print(f"Return code: {result.returncode}")
    print(f"stdout: {result.stdout}")
    if result.stderr:
        print("Error output:")
        print(f"stderr: {result.stderr}")


### Load latest saved metrics
def load_latest_metrics(model, pattern):
    """
    Load the latest saved metrics for a given model and pattern.
    :param model: Name of model
    :param pattern: Beginning of the filename to search for (e.g. "hybrid_track1")
    :return: Metrics dictionary
    """
    files = sorted(
        glob.glob(f"{metrics_dir}/{model}_{pattern}_*.json"),
        key=os.path.getmtime,
        reverse=True
    )

    if not files:
        print(f"No metrics files found for {model} with pattern {pattern}.")
        return None
    with open(files[0], 'r') as f:
        metrics = json.load(f)
        return metrics

### View results as a table
def view_results_as_table(results):
    table = []
    for model, datasets in results.items():
        for dataset, metrics in datasets.items():
            if metrics:
                row = {
                    "Model": model,
                    "Dataset": dataset,
                    "Accuracy": metrics["accuracy"],
                    "F1 Score": metrics["f1"],
                    "Precision": metrics["precision"],
                    "Recall": metrics["recall"],
                    "Loss": metrics["loss"],
                }
                table.append(row)

    # Convert to DataFrame for better visualization and sort by F1 Score
    df = pd.DataFrame(table).sort_values(by="F1 Score", ascending=False).reset_index(drop=True)

    return df


### Plot Confusion Matrix
def plot_confusion_matrix(cm, title, ax):
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False, ax=ax)
    ax.set_title(title)
    ax.set_xlabel("Predicted")
    ax.set_ylabel("Actual")

start_time = datetime.now()
print(f"[Start] Notebook started at: {start_time}")

[Start] Notebook started at: 2025-06-23 00:23:50.748221


# Track 1: UCI-Only Supervised Baseline

In [None]:
### Train models
track1_uci_models = [
    "randomforest",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track1_uci_models:
    train_model_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",
        "--save_path", f"checkpoints_and_metrics/{model}_track1_uci",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track1_uci",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track1",
    ]

    run_training_script("train_model.py", train_model_args)

In [None]:
### Train models
track1_uci_models = [
    "lstm",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track1_uci_models:
    train_model_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",
        "--save_path", f"checkpoints_and_metrics/{model}_track1_uci",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track1_uci",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track1",
    ]

    run_training_script("train_model.py", train_model_args)

In [None]:
### Train models
track1_uci_models = [
    "transformer",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track1_uci_models:
    train_model_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",
        "--save_path", f"checkpoints_and_metrics/{model}_track1_uci",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track1_uci",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track1",
    ]

    run_training_script("train_model.py", train_model_args)

In [None]:
### Train models
track1_uci_models = [
    "hybrid",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track1_uci_models:
    train_model_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",
        "--save_path", f"checkpoints_and_metrics/{model}_track1_uci",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track1_uci",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track1",
        "--lstm_path", f"checkpoints_and_metrics/lstm_track1_uci",
        "--transformer_path", f"checkpoints_and_metrics/transformer_track1_uci",
    ]

    run_training_script("train_model.py", train_model_args)

# Track 2: UCI Pretraining with LAB42 Fine-Tuning

## Using All Rooms

In [None]:
### Pretrain on UCI and Fine-tune on LAB42
track2_randomforest = [
    "randomforest",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track2_randomforest:
    track2_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",
        "--save_path", f"checkpoints_and_metrics/{model}_track2",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track2",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track2",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2024-11-30T23:59:59Z",
        "--lab42_val_start", "2025-01-01T00:00:00Z",
        "--lab42_val_stop", "2025-01-31T23:59:59Z",
        "--lab42_test_start", "2025-02-01T00:00:00Z",
        "--lab42_test_stop", "2025-02-28T23:59:59Z",
    ]

    # Run the training script
    run_training_script("train_model.py", track2_args)

In [None]:
### Pretrain on UCI and Fine-tune on LAB42
track2_lstm = [
    "lstm",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track2_lstm:
    track2_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",
        "--save_path", f"checkpoints_and_metrics/{model}_track2",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track2",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track2",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2024-11-30T23:59:59Z",
        "--lab42_val_start", "2025-01-01T00:00:00Z",
        "--lab42_val_stop", "2025-01-31T23:59:59Z",
        "--lab42_test_start", "2025-02-01T00:00:00Z",
        "--lab42_test_stop", "2025-02-28T23:59:59Z",
    ]

    # Run the training script
    run_training_script("train_model.py", track2_args)

In [None]:
### Pretrain on UCI and Fine-tune on LAB42
track2_transformer = [
    "transformer",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track2_transformer:
    track2_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",
        "--save_path", f"checkpoints_and_metrics/{model}_track2",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track2",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track2",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2024-11-30T23:59:59Z",
        "--lab42_val_start", "2025-01-01T00:00:00Z",
        "--lab42_val_stop", "2025-01-31T23:59:59Z",
        "--lab42_test_start", "2025-02-01T00:00:00Z",
        "--lab42_test_stop", "2025-02-28T23:59:59Z",
    ]


    # Run the training script
    run_training_script("train_model.py", track2_args)

In [None]:
### Pretrain on UCI and Fine-tune on LAB42
track2_hybrid = [
    "hybrid",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track2_hybrid:
    track2_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",
        "--save_path", f"checkpoints_and_metrics/{model}_track2",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track2",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track2",
        "--lstm_path", f"checkpoints_and_metrics/lstm_track2",
        "--transformer_path", f"checkpoints_and_metrics/transformer_track2",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2024-11-30T23:59:59Z",
        "--lab42_val_start", "2025-01-01T00:00:00Z",
        "--lab42_val_stop", "2025-01-31T23:59:59Z",
        "--lab42_test_start", "2025-02-01T00:00:00Z",
        "--lab42_test_stop", "2025-02-28T23:59:59Z",
    ]

    # Run the training script
    run_training_script("train_model.py", track2_args)

## Using Singular Rooms

### Small-Sized Room (Room_11 Capacity 18)

In [None]:
### Pretrain on UCI and Fine-tune on LAB42
track2_randomforest = [
    "randomforest",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track2_randomforest:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track2_room11",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track2_room11",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track2_room11",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_06",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Pretrain on UCI and Fine-tune on LAB42
track2_lstm = [
    "lstm",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track2_lstm:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track2_room11",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track2_room11",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track2_room11",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_06",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Pretrain on UCI and Fine-tune on LAB42
track2_transformer = [
    "transformer",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track2_transformer:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track2_room11",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track2_room11",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track2_room11",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_06",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Pretrain on UCI and Fine-tune on LAB42
track2_hybrid = [
    "hybrid",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track2_hybrid:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track2_room11",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track2_room11",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track2_room11",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_06",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

### Medium-Sized Room (Room_02 Capacity 40)

In [None]:
### Pretrain on UCI and Fine-tune on LAB42
track2_randomforest = [
    "randomforest",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track2_randomforest:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track2_room02",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track2_room02",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track2_room02",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_02",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Pretrain on UCI and Fine-tune on LAB42
track2_lstm = [
    "lstm",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track2_lstm:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track2_room02",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track2_room02",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track2_room02",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_02",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Pretrain on UCI and Fine-tune on LAB42
track2_transformer = [
    "transformer",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track2_transformer:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track2_room02",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track2_room02",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track2_room02",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_02",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Pretrain on UCI and Fine-tune on LAB42
track2_hybrid = [
    "hybrid",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track2_hybrid:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track2_room02",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track2_room02",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track2_room02",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_02",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

### Large-Sized Room (Room_06 Capacity 160)

In [None]:
### Pretrain on UCI and Fine-tune on LAB42
track2_randomforest = [
    "randomforest",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track2_randomforest:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track2_room06",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track2_room06",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track2_room06",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_06",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Pretrain on UCI and Fine-tune on LAB42
track2_lstm = [
    "lstm",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track2_lstm:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track2_room06",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track2_room06",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track2_room06",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_06",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Pretrain on UCI and Fine-tune on LAB42
track2_transformer = [
    "transformer",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track2_transformer:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track2_room06",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track2_room06",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track2_room06",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_06",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Pretrain on UCI and Fine-tune on LAB42
track2_hybrid = [
    "hybrid",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track2_hybrid:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track2_room06",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track2_room06",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track2_room06",
        "--uci_train_csv", "data/uci_occupancy_dataset/datatraining.txt",
        "--uci_val_csv", "data/uci_occupancy_dataset/datatest.txt",
        "--uci_test_csv", "data/uci_occupancy_dataset/datatest2.txt",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_06",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

# Track 3: LAB42-Only Supervised Training with Capacity

## Using All Rooms

In [None]:
### Training only on LAB42
track3_randomforest = [
    "randomforest",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track3_randomforest:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--include_capacity",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track3",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track3",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track3",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2024-11-30T23:59:59Z",
        "--lab42_val_start", "2025-01-01T00:00:00Z",
        "--lab42_val_stop", "2025-01-31T23:59:59Z",
        "--lab42_test_start", "2025-02-01T00:00:00Z",
        "--lab42_test_stop", "2025-02-28T23:59:59Z",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Training only on LAB42
track3_lstm = [
    "lstm",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track3_lstm:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--include_capacity",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track3",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track3",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track3",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2024-11-30T23:59:59Z",
        "--lab42_val_start", "2025-01-01T00:00:00Z",
        "--lab42_val_stop", "2025-01-31T23:59:59Z",
        "--lab42_test_start", "2025-02-01T00:00:00Z",
        "--lab42_test_stop", "2025-02-28T23:59:59Z",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Training only on LAB42
track3_transformer = [
    "transformer",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track3_transformer:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--include_capacity",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track3",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track3",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track3",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2024-11-30T23:59:59Z",
        "--lab42_val_start", "2025-01-01T00:00:00Z",
        "--lab42_val_stop", "2025-01-31T23:59:59Z",
        "--lab42_test_start", "2025-02-01T00:00:00Z",
        "--lab42_test_stop", "2025-02-28T23:59:59Z",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Training only on LAB42
track3_hybrid = [
    "hybrid",
]

timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track3_hybrid:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--include_capacity",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track3",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track3",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track3",
        "--lstm_path", f"lstm_track3",
        "--transformer_path", f"transformer_track3",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2024-11-30T23:59:59Z",
        "--lab42_val_start", "2025-01-01T00:00:00Z",
        "--lab42_val_stop", "2025-01-31T23:59:59Z",
        "--lab42_test_start", "2025-02-01T00:00:00Z",
        "--lab42_test_stop", "2025-02-28T23:59:59Z",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

## Using Singular Rooms

#### Small-Sized Room (Room_11 Capacity 18)

In [None]:
### Training only on LAB42
track3_randomforest = [
    "randomforest",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track3_randomforest:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track3_room11",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track3_room11",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track3_room11",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_11",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Training only on LAB42
track3_lstm = [
    "lstm",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track3_lstm:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track3_room11",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track3_room11",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track3_room11",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_11",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Training only on LAB42
track3_transformer = [
    "transformer",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track3_transformer:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track3_room11",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track3_room11",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track3_room11",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_11",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Training only on LAB42
track3_hybrid = [
    "hybrid",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track3_hybrid:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track3_room11",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track3_room11",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track3_room11",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_11",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

#### Medium-Sized Room (Room_02 Capacity 40)

In [None]:
### Training only on LAB42
track3_randomforest = [
    "randomforest",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track3_randomforest:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track3_room02",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track3_room02",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track3_room02",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_02",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Training only on LAB42
track3_lstm = [
    "lstm",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track3_lstm:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track3_room02",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track3_room02",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track3_room02",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_02",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Training only on LAB42
track3_transformer = [
    "transformer",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track3_transformer:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track3_room02",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track3_room02",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track3_room02",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_02",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Training only on LAB42
track3_hybrid = [
    "hybrid",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track3_hybrid:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track3_009",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track3_room02",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track3_room02",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_02",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

#### Large-Sized Room (Room_06 Capacity 160)

In [None]:
### Training only on LAB42
track3_randomforest = [
    "randomforest",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track3_randomforest:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track3_room06",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track3_room06",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track3_room06",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_06",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Training only on LAB42
track3_lstm = [
    "lstm",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track3_lstm:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track3_room06",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track3_room06",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track3_room06",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_06",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Training only on LAB42
track3_transformer = [
    "transformer",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track3_transformer:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track3_room06",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track3_room06",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track3_room06",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_06",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

In [None]:
### Training only on LAB42
track3_hybrid = [
    "hybrid",
]
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

for model in track3_hybrid:
    track3_args = [
        "--model", model,
        "--features", *features,
        "--seq_len", "30",
        "--epochs", "30",
        "--batch_size", "64",
        "--learning_rate", "0.001",
        "--weight_decay", "1e-5",
        "--save_path", f"checkpoints_and_metrics/{model}_track3_room06",
        "--pretrain_path", f"checkpoints_and_metrics/{model}_track3_room06",
        "--metrics_path", f"checkpoints_and_metrics/{model}_track3_room06",

        ## Lab42 specific
        "--influx_url", "http://localhost:8086",
        "--influx_token", "VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==",
        "--influx_org", "miguel_master_thesis",
        "--influx_bucket", "lab42_sensor_data",
        "--lab42_train_start", "2024-10-01T00:00:00Z",
        "--lab42_train_stop", "2025-01-31T23:59:59Z",
        "--lab42_val_start", "2025-02-01T00:00:00Z",
        "--lab42_val_stop", "2025-02-28T23:59:59Z",
        "--lab42_test_start", "2025-03-01T00:00:00Z",
        "--lab42_test_stop", "2025-03-31T23:59:59Z",
        "--room_filter", "Room_06",
    ]

    # Run the training script
    run_training_script("train_model.py", track3_args)

# Sensor Delay Simulation

In [4]:
lab42_test_df = load_lab42_from_influxdb(
    url="",
    token='',
    org="",
    bucket='',
    start='',
    stop=''
) # InfluxDB credentials removed as no longer required

lab42_test_df.dropna(inplace=True) # Filter out rows with NaN values
lab42_test_df = add_contextual_features(lab42_test_df, normalize=True) # Add contextual features to the DataFrame

lab42_test_dataset = Lab42Dataset(
    lab42_test_df,
    features=features,
    label_col='Occupancy',
    include_capacity=False
) # Create a dataset from the DataFrame with specified features and label column

lab42_test_loader = DataLoader(lab42_test_dataset, batch_size=64, shuffle=False) # Create a DataLoader for the dataset with a batch size of 64 and no shuffling

Dropped 0 rows due to NaNs.
Dropped 0 rows due to NaNs.


## All Rooms

In [9]:
torch_models = ['lstm', 'transformer', 'hybrid']
results = {}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for model_name in torch_models:
    if model_name == "lstm":
        model = LSTMModel(input_dim=(len(features)), hidden_dim=64, num_layers=2, dropout=0.2)
        model.load_state_dict(torch.load("checkpoints_and_metrics/lstm_track2.pt", map_location=device))

    elif model_name == "transformer":
        model = TransformerModel(input_dim=(len(features)), d_model=64, nhead=4, num_encoder_layers=2, dropout=0.2)
        model.load_state_dict(torch.load(f"checkpoints_and_metrics/transformer_track2.pt", map_location=device))

    elif model_name == "hybrid":
        lstm_model = LSTMModel(input_dim=(len(features)), hidden_dim=64, num_layers=2, dropout=0.2)
        transformer_model = TransformerModel(input_dim=(len(features)), d_model=64, nhead=4, num_encoder_layers=2, dropout=0.2)
        lstm_model.load_state_dict(torch.load(f"checkpoints_and_metrics/lstm_track2.pt", map_location=device))
        transformer_model.load_state_dict(torch.load(f"checkpoints_and_metrics/transformer_track2.pt", map_location=device))
        model = HybridLSTMTransformerModel(lstm_model, transformer_model)
        model.load_state_dict(torch.load("checkpoints_and_metrics/hybrid_track2.pt", map_location=device))

    model = model.to(device)
    model.eval()

    metrics = evaluate_model(
        model=model,
        dataloader=lab42_test_loader,
        loss_fn=torch.nn.BCEWithLogitsLoss(),
        device=device,
        delay_steps=30,
        split_name="test",
        strategy='shift',
        simulate_random=True
    )

    metrics["simulate_delay"] = 30
    metrics["simulate_strategy"] = "shift"
    metrics["simulate_random"] = True

    results[model_name] = metrics

# Display results
from pprint import pprint
from tabulate import tabulate
print("Delay Evaluation Results:")
headers = ["model_all_rooms", "accuracy", "f1", "precision", "recall", "aur_roc", "loss"]
rows = []

for model, metrics in results.items():
    rows.append([
        model,
        round(metrics["accuracy"], 4),
        round(metrics["f1"], 4),
        round(metrics["precision"], 4),
        round(metrics["recall"], 4),
        round(float(metrics["aur_roc"]), 4),
        round(metrics["loss"], 4)
    ])

print(tabulate(rows, headers=headers, tablefmt="github"))

Delay Evaluation Results:
| model_all_rooms   |   accuracy |     f1 |   precision |   recall |   aur_roc |   loss |
|-------------------|------------|--------|-------------|----------|-----------|--------|
| lstm              |     0.8263 | 0      |      0      |   0      |    0.4185 | 1.9423 |
| transformer       |     0.8992 | 0.731  |      0.6812 |   0.7887 |    0.9359 | 0.3597 |
| hybrid            |     0.8417 | 0.1734 |      0.9332 |   0.0956 |    0.9296 | 1.1542 |


## Singular Rooms

### Room_11

In [10]:
lab42_test_df = load_lab42_from_influxdb(
    url="http://localhost:8086",
    token='VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==',
    org="miguel_master_thesis",
    bucket='lab42_sensor_data',
    start='2025-02-01T00:00:00Z',
    stop='2025-02-28T23:59:59Z'
)

lab42_test_df.dropna(inplace=True)
lab42_test_df = lab42_test_df[lab42_test_df['room_number'] == 'Room_11']
lab42_test_df = add_contextual_features(lab42_test_df, normalize=True)

lab42_test_dataset = Lab42Dataset(
    lab42_test_df,
    features=features,
    label_col='Occupancy',
    include_capacity=False
)

lab42_test_loader = DataLoader(lab42_test_dataset, batch_size=64, shuffle=False)

torch_models = ['lstm', 'transformer', 'hybrid']
results = {}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for model_name in torch_models:
    if model_name == "lstm":
        model = LSTMModel(input_dim=(len(features)), hidden_dim=64, num_layers=2, dropout=0.2)
        model.load_state_dict(torch.load("checkpoints_and_metrics/lstm_track2_room11.pt", map_location=device))

    elif model_name == "transformer":
        model = TransformerModel(input_dim=(len(features)), d_model=64, nhead=4, num_encoder_layers=2, dropout=0.2)
        model.load_state_dict(torch.load(f"checkpoints_and_metrics/transformer_track2_room11.pt", map_location=device))

    elif model_name == "hybrid":
        lstm_model = LSTMModel(input_dim=(len(features)), hidden_dim=64, num_layers=2, dropout=0.2)
        transformer_model = TransformerModel(input_dim=(len(features)), d_model=64, nhead=4, num_encoder_layers=2, dropout=0.2)
        lstm_model.load_state_dict(torch.load(f"checkpoints_and_metrics/lstm_track2_room11.pt", map_location=device))
        transformer_model.load_state_dict(torch.load(f"checkpoints_and_metrics/transformer_track2_room11.pt", map_location=device))
        model = HybridLSTMTransformerModel(lstm_model, transformer_model)
        model.load_state_dict(torch.load("checkpoints_and_metrics/hybrid_track2_room11.pt", map_location=device))

    model = model.to(device)
    model.eval()

    metrics = evaluate_model(
        model=model,
        dataloader=lab42_test_loader,
        loss_fn=torch.nn.BCEWithLogitsLoss(),
        device=device,
        delay_steps=30,
        split_name="test",
        strategy='shift',
        simulate_random=True
    )

    metrics["simulate_delay"] = 30
    metrics["simulate_strategy"] = "shift"
    metrics["simulate_random"] = True

    results[model_name] = metrics

# Display results
from pprint import pprint
print("Delay Evaluation Results:")
pprint(results)
headers = ["model_room11", "accuracy", "f1", "precision", "recall", "aur_roc", "loss"]
rows = []

for model, metrics in results.items():
    rows.append([
        model,
        round(metrics["accuracy"], 4),
        round(metrics["f1"], 4),
        round(metrics["precision"], 4),
        round(metrics["recall"], 4),
        round(float(metrics["aur_roc"]), 4),
        round(metrics["loss"], 4)
    ])

print(tabulate(rows, headers=headers, tablefmt="github"))

Dropped 0 rows due to NaNs.
Delay Evaluation Results:
{'hybrid': {'accuracy': 0.8670937153233128,
            'aur_roc': np.float64(0.9319976137006641),
            'confusion_matrix': [[34380, 0], [5270, 2]],
            'f1': 0.0007584376185058779,
            'loss': 1.4315637954060108,
            'precision': 1.0,
            'recall': 0.00037936267071320183,
            'simulate_delay': 30,
            'simulate_random': True,
            'simulate_strategy': 'shift'},
 'lstm': {'accuracy': 0.8670432765055988,
          'aur_roc': np.float64(0.5827178455378211),
          'confusion_matrix': [[34380, 0], [5272, 0]],
          'f1': 0.0,
          'loss': 1.3575851902766274,
          'precision': 0.0,
          'recall': 0.0,
          'simulate_delay': 30,
          'simulate_random': True,
          'simulate_strategy': 'shift'},
 'transformer': {'accuracy': 0.8937254110763644,
                 'aur_roc': np.float64(0.9296281942381012),
                 'confusion_matrix': [[3

### Room_02

In [11]:
lab42_test_df = load_lab42_from_influxdb(
    url="http://localhost:8086",
    token='VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==',
    org="miguel_master_thesis",
    bucket='lab42_sensor_data',
    start='2025-02-01T00:00:00Z',
    stop='2025-02-28T23:59:59Z'
)

lab42_test_df.dropna(inplace=True)
lab42_test_df = lab42_test_df[lab42_test_df['room_number'] == 'Room_02']
lab42_test_df = add_contextual_features(lab42_test_df, normalize=True)

lab42_test_dataset = Lab42Dataset(
    lab42_test_df,
    features=features,
    label_col='Occupancy',
    include_capacity=False
)

lab42_test_loader = DataLoader(lab42_test_dataset, batch_size=64, shuffle=False)

torch_models = ['lstm', 'transformer', 'hybrid']
results = {}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for model_name in torch_models:
    if model_name == "lstm":
        model = LSTMModel(input_dim=(len(features)), hidden_dim=64, num_layers=2, dropout=0.2)
        model.load_state_dict(torch.load("checkpoints_and_metrics/lstm_track2_room02.pt", map_location=device))

    elif model_name == "transformer":
        model = TransformerModel(input_dim=(len(features) ), d_model=64, nhead=4, num_encoder_layers=2, dropout=0.2)
        model.load_state_dict(torch.load(f"checkpoints_and_metrics/transformer_track2_room02.pt", map_location=device))

    elif model_name == "hybrid":
        lstm_model = LSTMModel(input_dim=(len(features)), hidden_dim=64, num_layers=2, dropout=0.2)
        transformer_model = TransformerModel(input_dim=(len(features)), d_model=64, nhead=4, num_encoder_layers=2, dropout=0.2)
        lstm_model.load_state_dict(torch.load(f"checkpoints_and_metrics/lstm_track2_room02.pt", map_location=device))
        transformer_model.load_state_dict(torch.load(f"checkpoints_and_metrics/transformer_track2_room02.pt", map_location=device))
        model = HybridLSTMTransformerModel(lstm_model, transformer_model)
        model.load_state_dict(torch.load("checkpoints_and_metrics/hybrid_track2_room02.pt", map_location=device))

    model = model.to(device)
    model.eval()

    metrics = evaluate_model(
        model=model,
        dataloader=lab42_test_loader,
        loss_fn=torch.nn.BCEWithLogitsLoss(),
        device=device,
        delay_steps=30,
        split_name="test",
        strategy='shift',
        simulate_random=True
    )

    metrics["simulate_delay"] = 30
    metrics["simulate_strategy"] = "shift"
    metrics["simulate_random"] = True

    results[model_name] = metrics

# Display results
print("Delay Evaluation Results:")

headers = ["model_room02", "accuracy", "f1", "precision", "recall", "aur_roc", "loss"]
rows = []

for model, metrics in results.items():
    rows.append([
        model,
        round(metrics["accuracy"], 4),
        round(metrics["f1"], 4),
        round(metrics["precision"], 4),
        round(metrics["recall"], 4),
        round(float(metrics["aur_roc"]), 4),
        round(metrics["loss"], 4)
    ])

print(results)

Dropped 0 rows due to NaNs.
Delay Evaluation Results:
{'lstm': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.6683759394716337, 'aur_roc': np.float64(0.4765268611595485), 'confusion_matrix': [[26590, 0], [13193, 0]], 'loss': 4.5571588777056045, 'simulate_delay': 30, 'simulate_strategy': 'shift', 'simulate_random': True}, 'transformer': {'precision': 0.8892584780139542, 'recall': 0.830819373910407, 'f1': 0.85904620086994, 'accuracy': 0.9095844958902044, 'aur_roc': np.float64(0.9710468048531213), 'confusion_matrix': [[25225, 1365], [2232, 10961]], 'loss': 0.49933627554968074, 'simulate_delay': 30, 'simulate_strategy': 'shift', 'simulate_random': True}, 'hybrid': {'precision': 0.8882695507487521, 'recall': 0.8092928067914803, 'f1': 0.8469440368064094, 'accuracy': 0.9029987683181259, 'aur_roc': np.float64(0.9522810582509154), 'confusion_matrix': [[25247, 1343], [2516, 10677]], 'loss': 0.8403770729509795, 'simulate_delay': 30, 'simulate_strategy': 'shift', 'simulate_random': Tru

### Room_06

In [15]:
lab42_test_df = load_lab42_from_influxdb(
    url="http://localhost:8086",
    token='VvreOjZoLYmSZKbpufKm0boJlNfifTSToscteblxZwEetIRMP3IGdUu-IMqRkHNhKy9_o5hfDX56IXEtcRifhw==',
    org="miguel_master_thesis",
    bucket='lab42_sensor_data',
    start='2025-02-01T00:00:00Z',
    stop='2025-02-28T23:59:59Z'
)

lab42_test_df.dropna(inplace=True)
lab42_test_df = lab42_test_df[lab42_test_df['room_number'] == 'Room_06']
lab42_test_df = add_contextual_features(lab42_test_df, normalize=True)

lab42_test_dataset = Lab42Dataset(
    lab42_test_df,
    features=features,
    label_col='Occupancy',
    include_capacity=False
)

lab42_test_loader = DataLoader(lab42_test_dataset, batch_size=64, shuffle=False)
torch_models = ['lstm', 'transformer', 'hybrid']
results = {}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for model_name in torch_models:
    if model_name == "lstm":
        model = LSTMModel(input_dim=(len(features)), hidden_dim=64, num_layers=2, dropout=0.2)
        model.load_state_dict(torch.load("checkpoints_and_metrics/lstm_track2_room06.pt", map_location=device))

    elif model_name == "transformer":
        model = TransformerModel(input_dim=(len(features)), d_model=64, nhead=4, num_encoder_layers=2, dropout=0.2)
        model.load_state_dict(torch.load(f"checkpoints_and_metrics/transformer_track2_room06.pt", map_location=device))

    elif model_name == "hybrid":
        lstm_model = LSTMModel(input_dim=(len(features)), hidden_dim=64, num_layers=2, dropout=0.2)
        transformer_model = TransformerModel(input_dim=(len(features)), d_model=64, nhead=4, num_encoder_layers=2, dropout=0.2)
        lstm_model.load_state_dict(torch.load(f"checkpoints_and_metrics/lstm_track2_room06.pt", map_location=device))
        transformer_model.load_state_dict(torch.load(f"checkpoints_and_metrics/transformer_track2_room06.pt", map_location=device))
        model = HybridLSTMTransformerModel(lstm_model, transformer_model)
        model.load_state_dict(torch.load("checkpoints_and_metrics/hybrid_track2_room06.pt", map_location=device))

    model = model.to(device)
    model.eval()

    metrics = evaluate_model(
        model=model,
        dataloader=lab42_test_loader,
        loss_fn=torch.nn.BCEWithLogitsLoss(),
        device=device,
        delay_steps=30,
        split_name="test",
        strategy='shift',
        simulate_random=True
    )

    metrics["simulate_delay"] = 30
    metrics["simulate_strategy"] = "shift"
    metrics["simulate_random"] = True

    results[model_name] = metrics

# Display results
from pprint import pprint
print("Delay Evaluation Results:")
headers = ["model_room06", "accuracy", "f1", "precision", "recall", "aur_roc", "loss"]
rows = []

for model, metrics in results.items():
    rows.append([
        model,
        round(metrics["accuracy"], 4),
        round(metrics["f1"], 4),
        round(metrics["precision"], 4),
        round(metrics["recall"], 4),
        round(float(metrics["aur_roc"]), 4),
        round(metrics["loss"], 4)
    ])

print(results)

Dropped 0 rows due to NaNs.
Delay Evaluation Results:
{'lstm': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.8040778358809332, 'aur_roc': np.float64(0.8670963657032945), 'confusion_matrix': [[31983, 0], [7793, 0]], 'loss': 2.16529219095682, 'simulate_delay': 30, 'simulate_strategy': 'shift', 'simulate_random': True}, 'transformer': {'precision': 0.8273147569661377, 'recall': 0.8496086231233158, 'f1': 0.8383134970878704, 'accuracy': 0.9357904263877715, 'aur_roc': np.float64(0.9649147326474715), 'confusion_matrix': [[30601, 1382], [1172, 6621]], 'loss': 0.22332209645575093, 'simulate_delay': 30, 'simulate_strategy': 'shift', 'simulate_random': True}, 'hybrid': {'precision': 1.0, 'recall': 0.00012832028743744385, 'f1': 0.00025660764690787786, 'accuracy': 0.8041029766693484, 'aur_roc': np.float64(0.9404219212616718), 'confusion_matrix': [[31983, 0], [7792, 1]], 'loss': 2.468119396645444, 'simulate_delay': 30, 'simulate_strategy': 'shift', 'simulate_random': True}}
