In [1]:
import sys
sys.path.insert(0, "../")

In [2]:
import os
import torch
import torch.nn as nn
import numpy as np
from tqdm import tqdm
from matplotlib import pyplot as plt
from pathlib import Path
from torch.utils.data import DataLoader

from src.data.feature_dataloader import Features
from src.models.hydramr import HydraMRRegressor

In [3]:
def predict(model: torch.nn.Module, dataloader: torch.utils.data.DataLoader):
    """Run prediction for a given model and dataloader.
    
    Parameters:
    ----------
        model: model to use for prediction
        dataloader: dataloader to use for prediction
    
    Returns:
    -------
        all_predictions (torch.Tensor), all_targets (torch.Tensor), test_losses (ndarray): predictions, targets and losses for the test set

    """
    model.eval()
    
    all_predictions = torch.tensor([])
    all_targets = torch.tensor([])
    losses = np.array([])
    
    iterator = tqdm(dataloader, unit="batch", position=0, leave=False)
    kpi_means = torch.tensor(dataloader.dataset.kpi_means)
    kpi_stds = torch.tensor(dataloader.dataset.kpi_stds)
    
    for data, targets in iterator:
        output = model(data)
        
        # Convert back from standardized to original scale
        output = ((output * kpi_stds) + kpi_means)
        targets = ((targets * kpi_stds) + kpi_means)
        
        loss_fn = nn.MSELoss()
        all_predictions = torch.cat((all_predictions, output), dim=0)
        all_targets = torch.cat((all_targets, targets), dim=0)
        
        loss = loss_fn(output, targets).item()
        losses = np.append(losses, loss)
        
        iterator.set_description(f'Overall RMSE (loss): {np.sqrt(losses.mean()):.2f} Batch RMSE (loss): {np.sqrt(loss):.2f}')
    
    return all_predictions, all_targets, losses

In [4]:
fold = 3
feature_extractors = ['HydraMV_8_64']

batch_size = 32

feature_path = Path(f"../data/processed/features.hdf5")

datasets = {
    "train": Features(feature_path, data_type="train", feature_extractors=feature_extractors, fold=fold),
    "val": Features(feature_path, data_type="val", feature_extractors=feature_extractors, fold=fold),
    "test": Features(feature_path, data_type="test", feature_extractors=feature_extractors, fold=fold)

}

dataloaders = {
    "train": DataLoader(datasets["train"], batch_size=batch_size, shuffle=False, num_workers=0),
    "val": DataLoader(datasets["val"], batch_size=batch_size, shuffle=False, num_workers=0),
    "test": DataLoader(datasets["test"], batch_size=batch_size, shuffle=False, num_workers=0)
}

Arguments: 
                     Data Path:             ../data/processed/features.hdf5 
                     Data Type:             train 
                     Data length:           6551 
                     Features selected:                      
                         - Names:           ['HydraMV_8_64'] 
                         - KPI Window Size: 1 
                     
Arguments: 
                     Data Path:             ../data/processed/features.hdf5 
                     Data Type:             val 
                     Data length:           1637 
                     Features selected:                      
                         - Names:           ['HydraMV_8_64'] 
                         - KPI Window Size: 1 
                     
Arguments: 
                     Data Path:             ../data/processed/features.hdf5 
                     Data Type:             test 
                     Data length:           1446 
                     Features selected:        

In [5]:
# Load the model
input_shape, target_shape = datasets["train"].get_data_shape()

model_path = Path("../models/best_HydraMRRegressor_3.pt")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

hidden_dim = 256
model_depth = 1
batch_norm = True

model = HydraMRRegressor(in_features=input_shape[0], out_features=target_shape[0], hidden_dim=hidden_dim, model_depth=model_depth, batch_norm=batch_norm)
model.load_state_dict(torch.load(model_path, map_location=device))

<All keys matched successfully>

In [6]:
all_predictions = torch.tensor([])
all_targets = torch.tensor([])

for dataset_type in ["train", "val", "test"]:
    predictions, targets, losses = predict(model, dataloaders[dataset_type])
    all_predictions = torch.cat((all_predictions, predictions), dim=0)
    all_targets = torch.cat((all_targets, targets), dim=0)
    print(f"{dataset_type} RMSE: {np.mean(np.sqrt(losses)):.2f}")


                                                                                                       

train RMSE: 0.49


                                                                                                     

val RMSE: 0.66


                                                                                                     

test RMSE: 0.69




In [33]:
data_seg_sec_list = datasets["train"].indices + datasets["val"].indices + datasets["test"].indices
data_seg_sec_list

# Initialize the nested dictionary
road_predictions = {}

# Iterate through data_seg_sec_list and all_predictions to populate the dictionary
for idx, (outer_key, inner_key) in enumerate(data_seg_sec_list):
    outer_key, inner_key = int(outer_key), int(inner_key)
    if outer_key not in road_predictions:
        road_predictions[outer_key] = {}
    road_predictions[outer_key][inner_key] = all_predictions[idx].tolist()

print(road_predictions[0][11])


[0.7721508128171716, 2.756725926569798, 0.04157282451083849, 3.799090905791356]
