# Model Evaluation

This notebook is to evaluate a selected checkpoint (from a list of save checkpoints at different number of training iterations during RL model training) on the training dataset.

This notebook is used to evaluate trained DDQL-MI models.

In [1]:
import numpy as np
import pandas as pd

from pathlib import Path
from datetime import datetime
import sys

from utils import print_log

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# load the created dataset
dataset_folder_path = Path("dataset", "20250707_downsampled_1min", "split")

In [4]:
from utils_data_split import load_split_data_from_folder, convert_to_naive_datetimes, convert_to_naive_datetimes_df

In [5]:
aggregate_load_segments_train, aggregate_load_df_train = load_split_data_from_folder(dataset_folder_path, 'train') 

In [6]:
aggregate_load_segments_train, aggregate_load_df_train = convert_to_naive_datetimes(aggregate_load_segments_train), convert_to_naive_datetimes_df(aggregate_load_df_train )

---

(DDQN) only

We have to quantize the data into per 0.05kW = 50W. This is for the charging action and other stuffs

In [7]:
# https://stackoverflow.com/questions/47949053/round-to-nearest-1000-in-pandas

def round_to_nearest_50(x):
    """Round to the nearest 50"""
    return ((x + 25) / 50).astype(int) * 50


aggregate_load_df_train["aggregate"] = round_to_nearest_50(aggregate_load_df_train["aggregate"])

# then cap at 5kW
aggregate_load_df_train["aggregate"] = aggregate_load_df_train["aggregate"].clip(upper=5000)

---

In [None]:
# initialize experiment folder to store related information/data for future analysis

# Change the datetime to point to the correct folder
# rl_datetime = datetime(2025, 8, 13, 23, 23, 31)     # 0.0
rl_datetime = datetime(2025, 8, 14, 2, 5, 26)     # 0.5
# rl_datetime = datetime(2025, 8, 14, 4, 46, 19)    # 1.0

# Keep the action type un-changed
ACTION_TYPE = "discrete"

# Change the lambda according to your experiment
REWARD_LAMBDA = 0.5  # between 0 and 1

experiment_folder = Path(
    "experiments", rl_datetime.strftime('%Y%m%d_%H%M%S') + f"_action_{ACTION_TYPE}_reward_lambda_{REWARD_LAMBDA:.1f}", 
)

if not experiment_folder.exists():
    raise FileNotFoundError(f"Experiment folder {experiment_folder} does not exist. Please check the path.")

In [9]:
# find the selected checkpoint

selected_ckpt_folder = experiment_folder / "selected_ckpt"

# search for any sub-folders under selected_ckpt_folder
sub_folders = [f for f in selected_ckpt_folder.iterdir() if f.is_dir()]
# assert len(sub_folders) == 1, "There should be exactly one sub-folder in selected_ckpt_folder."

selected_ckpt_folder = sub_folders[0]
selected_ckpt_episode_number = int(selected_ckpt_folder.name.split('_')[-1])

print_log(f"Selected checkpoint folder: {selected_ckpt_folder}")

[2025-09-23 16:33:30:254] Selected checkpoint folder: experiments/20250814_020526_action_discrete_reward_lambda_0.5/selected_ckpt/episode_0600


We create a new folder under the experiment_folder specifically for storing the results of executing a subset of training episodes with the selected ckpt (for IEEE journal)

In [10]:
log_folder = experiment_folder / "logs_train_selected-ckpt"

if not log_folder.exists():
    log_folder.mkdir()

Create data loader

In [None]:
sys.path.append(str(Path('rl_env')))
sys.path.append(str(Path('model', 'H_network')))

from rl_env.data_loader import SimpleSmartMeterDataLoader

sm_dl_train = SimpleSmartMeterDataLoader(
    aggregate_load_segments=aggregate_load_segments_train,
    aggregate_load_df=aggregate_load_df_train,
    registry_path=log_folder / "simple_episode_registry_train_subset.json"
)

sm_dl_train.get_divided_segments_length()

[SimpleDataLoader] Generated 162 episodes (1 day each)


162

Create environment

In [12]:
import torch
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [13]:
from model.H_network.common.factories import create_h_network_module_with_defaults
from model.H_network.h_network_arch import HNetworkType

# the optimizer kwargs does not matter for the testing loop, but they are required to create the module

h_network_kwargs = {
    "h_network_type": HNetworkType.H_NETWORK,
    "optimizer_class": torch.optim.RMSprop,
    "optimizer_kwargs": {"lr": 0.001},
}

h_network_rl_module = create_h_network_module_with_defaults(
    action_type=ACTION_TYPE,
    **h_network_kwargs,
    device=DEVICE
)

In [14]:
from rl_env.base.env_module import SmartMeterEnvFactory
from rl_env.training_mode import TrainingMode
from decimal import Decimal

env_train = SmartMeterEnvFactory.create(
    action_type=ACTION_TYPE,
    smart_meter_data_loader=sm_dl_train,
    h_network_rl_module=h_network_rl_module,
    mode=TrainingMode.TEST,
    reward_lambda=REWARD_LAMBDA,
    render_mode=None,

    aggregate_step_size=50,  # step size for the aggregate load
    battery_step_size=Decimal("0.05")
)

[2025-09-23 16:33:39:449] [SmartMeterDiscreteEnv Test] Using data loader: SimpleSmartMeterDataLoader
[2025-09-23 16:33:39:450] [SmartMeterDiscreteEnv Test] Curriculum enabled: False
[2025-09-23 16:33:39:458] [SmartMeterDiscreteEnv Test]] Render mode set to 'None'. Render server at 127.0.0.1:50007. render_connected: False. render_client_socket: None


In [15]:
h_network_rl_module.set_h_network(
    h_network_rl_module.initialize_h_network(output_dim=env_train.num_aggregate_bins)  # output_dim is the number states in the aggregate load space
)
h_network_rl_module.initialize_h_network_training()

In [16]:
# load the h_network from the selected checkpoint

h_network_rl_module.load_h_network(
    selected_ckpt_folder / ("h_network" + f"_{selected_ckpt_episode_number:04d}" + ".pth")
)

In [17]:
h_network_rl_module.h_network

HNetwork(
  (LSTM_1): LSTM(2, 44, batch_first=True, bidirectional=True)
  (ac1): Tanh()
  (LSTM_2): LSTM(88, 44, batch_first=True, bidirectional=True)
  (ac2): Tanh()
  (fc): Linear(in_features=88, out_features=101, bias=True)
)

In [18]:
import torchinfo


torchinfo.summary(h_network_rl_module.h_network, input_size=(1, 1, 2))

Layer (type:depth-idx)                   Output Shape              Param #
HNetwork                                 [1, 1, 101]               --
├─LSTM: 1-1                              [1, 1, 88]                16,896
├─Tanh: 1-2                              [1, 1, 88]                --
├─LSTM: 1-3                              [1, 1, 88]                47,168
├─Tanh: 1-4                              [1, 1, 88]                --
├─Linear: 1-5                            [1, 1, 101]               8,989
Total params: 73,053
Trainable params: 73,053
Non-trainable params: 0
Total mult-adds (M): 0.07
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.29
Estimated Total Size (MB): 0.29

In [19]:
from gymnasium.utils.env_checker import check_env

# This will catch many common issues
try:
    check_env(env_train)
    print("Environment passes all checks!")
except Exception as e:
    print(f"Environment has issues: {e}")

[2025-09-23 16:33:51:809] [SmartMeterWorldDiscrete Test] Resetting environment with episode 40. Episode info: {'length': 1440, 'datetime_range': (Timestamp('2013-05-07 00:00:04'), Timestamp('2013-05-07 23:59:04'))}
[2025-09-23 16:33:51:813] [SmartMeterWorldDiscrete Test] Resetting environment with episode 2. Episode info: {'length': 693, 'datetime_range': (Timestamp('2013-01-10 00:00:02'), Timestamp('2013-01-10 11:32:02'))}
[2025-09-23 16:33:51:817] [SmartMeterWorldDiscrete Test] Resetting environment with episode 110. Episode info: {'length': 1440, 'datetime_range': (Timestamp('2013-09-26 00:00:00'), Timestamp('2013-09-26 23:59:00'))}
[2025-09-23 16:33:51:821] [SmartMeterWorldDiscrete Test] Resetting environment with episode 2. Episode info: {'length': 693, 'datetime_range': (Timestamp('2013-01-10 00:00:02'), Timestamp('2013-01-10 11:32:02'))}
[2025-09-23 16:33:51:826] [SmartMeterWorldDiscrete Test] Resetting environment with episode 110. Episode info: {'length': 1440, 'datetime_range

  logger.warn(


In [20]:
env_train.reset_render_window()

In [21]:
# initialize a DQN agent
from model.DDQN.ddqn import DoubleDQN

rl_model = DoubleDQN.load(
    selected_ckpt_folder / ("rl_model" + f"_{selected_ckpt_episode_number:04d}"),  # path to the saved RL model
    env=env_train
)

2025-09-23 16:33:54.167765: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-09-23 16:33:54.177159: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758616434.187061  124523 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758616434.189960  124523 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1758616434.198077  124523 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [22]:
rl_model.buffer_size

150000

In [23]:
rl_model.policy

MultiInputPolicy(
  (q_net): QNetwork(
    (features_extractor): CombinedExtractor(
      (extractors): ModuleDict(
        (aggregate_load): Flatten(start_dim=1, end_dim=-1)
        (battery_soc): Flatten(start_dim=1, end_dim=-1)
        (timestamp_features): Flatten(start_dim=1, end_dim=-1)
      )
    )
    (q_net): Sequential(
      (0): Linear(in_features=5, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): ReLU()
      (4): Linear(in_features=64, out_features=161, bias=True)
    )
  )
  (q_net_target): QNetwork(
    (features_extractor): CombinedExtractor(
      (extractors): ModuleDict(
        (aggregate_load): Flatten(start_dim=1, end_dim=-1)
        (battery_soc): Flatten(start_dim=1, end_dim=-1)
        (timestamp_features): Flatten(start_dim=1, end_dim=-1)
      )
    )
    (q_net): Sequential(
      (0): Linear(in_features=5, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, 

We execute on all training episodes

In [24]:
total_episodes = env_train.smart_meter_data_loader.get_divided_segments_length()
total_episodes

162

In [25]:
episode_indices = list(range(total_episodes))

In [26]:
# we test the rl model and H-network

import json
from utils import print_log

all_episode_rewards = []
all_episode_lengths = []
all_episode_infos = []
per_episode_reward_stats = []

for i, episode_idx in enumerate(episode_indices):
    obs, info = env_train.reset(episode_idx=episode_idx)

    # Initialize episode tracking variables
    episode_reward = 0.0
    episode_length = 0
    done = False
    state = None  # For recurrent policies

    while not done:
        # Get action from the RL model
        action, state = rl_model.predict(obs, state=state)

        # Step the environment
        obs, reward, done, truncated, info = env_train.step(action)

        # Update episode tracking variables
        episode_reward += reward
        episode_length += 1

    # Store results for this episode
    all_episode_rewards.append(episode_reward)
    all_episode_lengths.append(episode_length)
    all_episode_infos.append({
        "episode_idx": episode_idx,
        "episode_info": env_train.episode.get_episode_info() if hasattr(env_train, 'episode') else {}
    })

    per_episode_reward_stats.append(env_train.prev_episode_reward_stats)

    env_train.save_episode_info(
        log_folder=log_folder,
        episode_training_idx=episode_idx
    )

    if (i + 1) % 10 == 0:
        print_log(f"Episode {i + 1}/{total_episodes} completed.")


# Calculate comprehensive statistics
mean_reward = np.mean(all_episode_rewards)
std_reward = np.std(all_episode_rewards)
min_reward = np.min(all_episode_rewards)
max_reward = np.max(all_episode_rewards)
median_reward = np.median(all_episode_rewards)

# we need to recover the true sum f_signal and g_signal from the per_episode_reward_stats, with length of each episode
f_signal_sums = np.array([item['f_signal_sum'] for item in per_episode_reward_stats])
g_signal_sums = np.array([item['g_signal_sum'] for item in per_episode_reward_stats])

# get the length of each episode
_episode_lengths = np.array(all_episode_lengths)
sum_episode_lengths = np.sum(_episode_lengths)

# calculate the (true) mean of rewards, f_signal and g_signal

f_signal_mean = f_signal_sums @ _episode_lengths / sum_episode_lengths if sum_episode_lengths > 0 else 0.0
g_signal_mean = g_signal_sums @ _episode_lengths / sum_episode_lengths if sum_episode_lengths > 0 else 0.0

mean_length = np.mean(all_episode_lengths)
std_length = np.std(all_episode_lengths)

# Prepare validation data
test_data = {
    'evaluated_episodes': len(episode_indices),
    'total_available_episodes': total_episodes,
    'episode_indices': episode_indices,
    'episode_rewards': all_episode_rewards,
    'episode_lengths': all_episode_lengths,
    'episode_infos': all_episode_infos,
    'summary_stats': {
        'mean_reward': mean_reward,
        'std_reward': std_reward,
        'min_reward': min_reward,
        'max_reward': max_reward,
        'median_reward': median_reward,
        'mean_f_signal': f_signal_mean,
        'mean_g_signal': g_signal_mean,
        'mean_length': mean_length,
        'std_length': std_length
    }
}

test_stats_file_path = log_folder / "test_stats_file.json"
with open(test_stats_file_path, 'w') as f:
    # Convert numpy types to Python native types for JSON serialization
    def convert_numpy(obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        elif isinstance(obj, list):
            return [convert_numpy(item) for item in obj]
        elif isinstance(obj, dict):
            return {key: convert_numpy(value) for key, value in obj.items()}
        return obj

    json.dump(convert_numpy(test_data), f, indent=2, default=str)

sm_dl_train._save_registry()
print_log(f"Validation results saved to {test_stats_file_path}")

[2025-09-23 16:34:02:616] [SmartMeterWorldDiscrete Test] Resetting environment with episode 0. Episode info: {'length': 1440, 'datetime_range': (Timestamp('2013-01-08 00:00:05'), Timestamp('2013-01-08 23:59:05'))}
[2025-09-23 16:34:08:729] [SmartMeterDiscreteEnv Test] Episode finished. Sum of rewards: 155.71376629557187. Mean of rewards: 0.10820970555633903. Std of rewards: 0.057085488035774394
[2025-09-23 16:34:08:729] [SmartMeterDiscreteEnv Test] Episode f_signal sum: -312.7396611744771. Mean: -0.21733124473556434. Std: 0.11403694198749469
[2025-09-23 16:34:08:729] [SmartMeterDiscreteEnv Test] Episode g_signal sum: 1.3121285833333332. Mean: 0.0009118336228862635. Std: 0.0017470878125850258
[2025-09-23 16:34:08:745] [SmartMeterDiscreteEnv Test] Episode 0000 info saved to experiments/20250814_020526_action_discrete_reward_lambda_0.5/logs_train_selected-ckpt/episode_info/episode_0000_info.json
[2025-09-23 16:34:08:749] [SmartMeterWorldDiscrete Test] Resetting environment with episode 1.