In [17]:
import gymnasium as gym
from gymnasium.spaces import Discrete, Box, Sequence, Dict
import numpy as np
import pandas as pd
from src.commons import methods
import statistics

from src.rl_environments.env_impr import EnvImpr
from src.model_approaches.analytical.oracle_fix_n import OracleFix_n
from src.model_approaches.analytical.oracle_var_n import OracleVar_n
from src.model_approaches.analytical.pto_fix_n import PTO_Fix_n
from src.model_approaches.analytical.pto_var_n import PTO_Var_n
from src.model_approaches.rl.ppo import PPO

from src.commons import generate_data, methods
from src.commons.constants import AlphaEstimator, TravelTimeDist
import os
import pickle
from pathlib import Path

## Config

In [18]:
TRAIN_CONFIG = {
    'alpha_range': [ (1 + i / 10) for i in range(1,10)],
    'beta_range': [round(i * 0.5, 1) for i in range(2, 6)],
    'h_range': [0.5],
    'c_range': [25],
    'total': list(range(10, 40)),
    'travel_time': TravelTimeDist.UNIFORM,
    'param_estimator': AlphaEstimator.MOMENTS,
}

TEST_CONFIG = {
    'alpha_range': [ (0 + i / 10) for i in range(1,10)],
    'beta_range': [round(i * 0.5, 1) for i in range(2, 6)],
    'h_range': [0.5],
    'c_range': [25],
    'total': list(range(10, 40)),
    'travel_time': TravelTimeDist.HIGH,
    'param_estimator': AlphaEstimator.MOMENTS,
}

In [19]:
model_approaches = [PPO(EnvClass=EnvImpr, config=TRAIN_CONFIG, timesteps=500_000),
                    # OracleVar_n(),
                    PTO_Var_n(TEST_CONFIG)
                   ] + [PTO_Fix_n(n, TEST_CONFIG) for n in [3, 5]] \
                    # + [OracleFix_n(n) for n in [3, 5]]
test_df_path = Path(methods.file_path('prof_data.pkl', dir_name='data/prof_data_test'))
test_df = pd.DataFrame()
with open(test_df_path, 'rb') as f:
    test_df =pickle.load(f)


[INFO] Loaded trained PPO model from:
/Users/preetkaria/PycharmProjects/when_to_leave/models/PPO_EnvImpr_9a770_500K.zip


In [20]:
from pandarallel import pandarallel

# Initialize pandarallel with progress bar enabled
pandarallel.initialize(progress_bar=True)

columns_to_visualize = []
for model in model_approaches:
    print(f'Evaluating {model.prediction_keys()}')
    columns_to_visualize += model.prediction_keys()
    
    new_cols = test_df.parallel_apply(lambda row: model.predict(row, override=False), result_type='expand', axis=1)
    for i, col_name in enumerate(model.prediction_keys()):
        test_df[col_name] = new_cols[i]
        print(col_name, test_df[col_name].mean())




INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.
Evaluating ['cost_PPO_EnvImpr_9a770_500K', 'observed_n_PPO_EnvImpr_9a770_500K']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1250), Label(value='0 / 1250'))), …

cost_PPO_EnvImpr_9a770_500K 14.06396905841846
observed_n_PPO_EnvImpr_9a770_500K 17.1395
Evaluating ['cost_PTO_Var_n', 'observed_n_PTO_Var_n']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1250), Label(value='0 / 1250'))), …

cost_PTO_Var_n 14.337385218706823
observed_n_PTO_Var_n 16.1672
Evaluating ['cost_PTO_Fix_n_3']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1250), Label(value='0 / 1250'))), …

cost_PTO_Fix_n_3 26.405796761524012
Evaluating ['cost_PTO_Fix_n_5']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1250), Label(value='0 / 1250'))), …

cost_PTO_Fix_n_5 22.468632054456194


In [21]:
from pathlib import Path
test_df_path = Path(test_df_path)
test_df.to_csv(test_df_path.with_suffix('.csv'), index=False)
test_df.to_pickle(test_df_path)

## Visualize Results

In [22]:
import plotly.graph_objects as go

# columns_to_visualize = [x for x in test_df.columns if 'cost' in x or 'observe' in x]

# Calculate means
means = test_df[columns_to_visualize + ['h', 'c']].mean()

# Plot
fig = go.Figure(data=[go.Bar(x=means.index, y=means.values)])
fig.update_layout(title="Mean Values of Costs, H, and C", yaxis_title="Mean Value")
fig.show()

# Calculate medians
medians = test_df[columns_to_visualize + ['h', 'c']].median()

# Plot
fig = go.Figure(data=[go.Bar(x=medians.index, y=medians.values)])
fig.update_layout(title="Median Values of Costs, H, and C", yaxis_title="Median Value")
fig.show()

# Create a dataframe with two rows: one for means and one for medians
summary_df = pd.DataFrame({'Mean': means, 'Median': medians}).T
summary_df.head()


Unnamed: 0,cost_PPO_EnvImpr_9a770_500K,observed_n_PPO_EnvImpr_9a770_500K,cost_PTO_Var_n,observed_n_PTO_Var_n,cost_PTO_Fix_n_3,cost_PTO_Fix_n_5,h,c
Mean,14.063969,17.1395,14.337385,16.1672,26.405797,22.468632,0.5,25.0
Median,7.577279,16.0,6.814322,14.0,25.0,25.0,0.5,25.0


In [23]:
test_df.head()

Unnamed: 0,total,travel_time,h,c,intervals,cost_PPO_EnvImpr_9e088_500K,observed_n_PPO_EnvImpr_9e088_500K,cost_PTO_Var_n,observed_n_PTO_Var_n,cost_PTO_Fix_n_3,...,cost_PPO_EnvImpr_f41f5_500K,observed_n_PPO_EnvImpr_f41f5_500K,cost_PPO_EnvImpr_e12a4_500K,observed_n_PPO_EnvImpr_e12a4_500K,cost_PPO_EnvImpr_3b08c_500K,observed_n_PPO_EnvImpr_3b08c_500K,cost_PPO_EnvImpr_cd5dd_500K,observed_n_PPO_EnvImpr_cd5dd_500K,cost_PPO_EnvImpr_b73d3_500K,observed_n_PPO_EnvImpr_b73d3_500K
0,43.0,3.535281,0.5,25,"[0.1306593104459917, 0.08214531495921755, 0.02...",0.050699,4.0,0.064969,3.0,0.064969,...,0.06099,3.0,0.06099,3.0,0.06099,3.0,0.06099,3.0,0.06099,3.0
1,48.0,41.459338,0.5,25,"[4.276706517433805, 3.2544721073860834, 3.7400...",62.352291,5.0,5.790341,35.0,6.522108,...,9.721972,33.0,4.997402,36.0,4.800846,36.0,8.540261,34.0,63.925812,5.0
2,31.0,114.792988,0.5,25,"[16.079087401217105, 11.608519241181346, 14.23...",49.104405,16.0,1.588985,21.0,5.063852,...,14.275825,20.0,25.0,23.0,2.589038,22.0,17.897477,20.0,25.0,23.0
3,52.0,976.197274,0.5,25,"[28.206915811705233, 6.758341759637238, 24.521...",42.585944,4.0,25.0,13.0,10.193586,...,29.287909,4.0,25.0,13.0,25.0,13.0,39.517167,4.0,25.0,13.0
4,39.0,41.909987,0.5,25,"[0.9262261308152662, 1.9254123526889755, 3.236...",18.81681,16.0,12.121427,21.0,28.006445,...,17.001209,18.0,11.164761,22.0,10.748942,22.0,17.137511,18.0,11.164761,22.0


In [24]:
test_df.iloc[2]

total                                                                             31.0
travel_time                                                                 114.792988
h                                                                                  0.5
c                                                                                   25
intervals                            [16.079087401217105, 11.608519241181346, 14.23...
cost_PPO_EnvImpr_9e088_500K                                                  49.104405
observed_n_PPO_EnvImpr_9e088_500K                                                 16.0
cost_PTO_Var_n                                                                1.588985
observed_n_PTO_Var_n                                                              21.0
cost_PTO_Fix_n_3                                                              5.063852
cost_PTO_Fix_n_5                                                                  25.0
cost_PPO_EnvImpr_9a770_500K                