# Setup Environment and Dependencies
Import required libraries including NumPy, sklearn with Intel extension, and pi_optimal utilities. Configure warning suppression.

In [1]:
# Setup Environment and Dependencies

import os
import numpy as np
from sklearnex import patch_sklearn
import warnings

# Change directory to the parent directory
os.chdir("..")

# Apply Intel extension to sklearn
patch_sklearn()

# Suppress warnings
warnings.filterwarnings('ignore')

# Import pi_optimal utilities
from pi_optimal.utils.data_generators.gym_data_generator import GymDataGenerator
from pi_optimal.datasets.timeseries_dataset import TimeseriesDataset
from pi_optimal.models.random_forest_model import RandomForest
from pi_optimal.models.mlp import NeuralNetwork
from pi_optimal.evaluators.base_evaluator import BaseEvaluator
from pi_optimal.evaluators.plotting import plot_n_step_evaluation, plot_n_step_episode_rollout

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


# Create Gym Data Generator
Initialize GymDataGenerator with LunarLander environment and collect training and test data with specified parameters.

In [2]:
# Create Gym Data Generator

# Initialize GymDataGenerator with LunarLander environment
data_collector = GymDataGenerator(env_name="LunarLander-v3")

# Collect training data
df_train = data_collector.collect(n_steps=5000, max_steps_per_episode=200, env_seed=None, action_seed=None)
df_test = data_collector.collect(n_steps=5000, max_steps_per_episode=200, env_seed=None, action_seed=None)

Collecting steps: 100%|██████████| 5000/5000 [00:00<00:00, 14776.01it/s]
Collecting steps: 100%|██████████| 5000/5000 [00:00<00:00, 15921.59it/s]


# Configure Dataset Parameters
Set up dataset configuration dictionary defining features, processors, and evaluation metrics for states, actions, and rewards.

In [8]:
# Configure Dataset Parameters

# Define dataset configuration dictionary
dataset_config = {
    "episode_column": "episode",
    "timestep_column": "step",
    "states": {
        0: {"name": "state_0", "type": "numerical", "processor": {"name": "StandardScaler"}, "evaluation_metric": "mae"},
        1: {"name": "state_1", "type": "numerical", "processor": {"name": "RobustScaler", "params": {"quantile_range": (5.0, 95.0)}}, "evaluation_metric": "mae"},
        2: {"name": "state_2", "type": "numerical", "processor": {"name": "RobustScaler", "params": {"quantile_range": (5.0, 95.0)}}, "evaluation_metric": "mae"},
        3: {"name": "state_3", "type": "numerical", "processor": {"name": "RobustScaler", "params": {"quantile_range": (5.0, 95.0)}}, "evaluation_metric": "mae"},
        4: {"name": "state_4", "type": "numerical", "processor": {"name": "RobustScaler", "params": {"quantile_range": (5.0, 95.0)}}, "evaluation_metric": "mae"},
        5: {"name": "state_5", "type": "numerical", "processor": {"name": "RobustScaler", "params": {"quantile_range": (5.0, 95.0)}}, "evaluation_metric": "mae"},
        6: {"name": "state_6", "type": "binary", "processor": None, "evaluation_metric": "f1_binary"},
        7: {"name": "state_7", "type": "binary", "processor": None, "evaluation_metric": "f1_binary"},
        8: {"name": "done", "type": "binary", "processor": None, "evaluation_metric": "f1_binary"},
        9: {"name": "reward", "type": "numerical", "processor": {"name": "PowerTransformer"}, "evaluation_metric": "mae"},
    },
    "actions": {
        0: {"name": "action_0", "type": "categorial", "processor": {"name": "OneHotEncoder"}},
    },
    "reward_feature_idx": 9,
    "reward_vector_idx": 9,
    "reward_column": "reward",
}

# Create Training and Test Datasets
Initialize TimeseriesDataset objects with collected data, applying the configuration and setting lookback/forecast windows.

In [9]:
# Create Training and Test Datasets

# Define lookback and forecast timesteps
LOOKBACK_TIMESTEPS = 10
FORECAST_TIMESTEPS = 1

# Initialize TimeseriesDataset objects for training and test data
dataset_train = TimeseriesDataset(
    df=df_train,
    dataset_config=dataset_config,
    lookback_timesteps=LOOKBACK_TIMESTEPS,
    forecast_timesteps=FORECAST_TIMESTEPS,
    train_processors=True
)


dataset_test = TimeseriesDataset(
    df=df_test,
    dataset_config=dataset_config,
    lookback_timesteps=LOOKBACK_TIMESTEPS,
    forecast_timesteps=FORECAST_TIMESTEPS,
    train_processors=False
)

# Train Neural Network Model
Create and train a Neural Network model with specified hyperparameters on the training dataset.

In [10]:

# Initialize Neural Network model with specified hyperparameters
nn_model = NeuralNetwork(
    alpha=0, 
    max_iter=1000, 
    hidden_layer_sizes=(64, 64, 64), 
    learning_rate_init=0.001,
    learning_rate="constant",
    verbose=0,
    batch_size=16,
    tol=1e-6
)
        
# Train the Neural Network model on the first training dataset
nn_model.fit(dataset_train)

Training models...:   0%|          | 0/10 [00:00<?, ?it/s]

# Evaluate Model Performance
Use BaseEvaluator to assess model performance with one-step and n-step predictions.

In [11]:
# Evaluate Model Performance

# Initialize the evaluator with the dataset configuration
evaluator = BaseEvaluator(dataset_config=dataset_test.dataset_config)

# Perform one-step evaluation on the second training dataset
res = evaluator.evaluate_one_step(dataset_test, nn_model)

# Perform n-step evaluation on the second training dataset
evaluations = evaluator.evaluate_dataset(dataset_test, nn_model, n_steps=20)

# Plot the n-step evaluation results
plot_n_step_evaluation(evaluations, dataset_test)

Performing rollouts: 100%|██████████| 161/161 [00:13<00:00, 11.73it/s]


## Visualize Predictions on a Test Trajectory
Visualize model predictions on a test trajectory using the BaseEvaluator.

In [15]:

# Perform n-step episode rollout and plot the results
next_states, next_states_hat = evaluator._rollout_episode_n_steps(dataset_test, model=nn_model, episode_idx=10, initial_state_idx=5, n_rollout_steps=10)
plot_n_step_episode_rollout(next_states, next_states_hat, dataset_test)