In [1]:
import gymnasium as gym
from gymnasium.spaces import Discrete, Box, Sequence, Dict
import numpy as np
import pandas as pd
from src.commons import methods
import statistics

from src.rl_environments.env_impr import EnvImpr
from src.model_approaches.analytical.oracle_fix_n import OracleFix_n
from src.model_approaches.analytical.oracle_var_n import OracleVar_n
from src.model_approaches.analytical.pto_fix_n import PTO_Fix_n
from src.model_approaches.analytical.pto_var_n import PTO_Var_n
from src.model_approaches.rl.ppo import PPO

from src.commons import generate_data, methods
from src.commons.constants import AlphaEstimator, TravelTimeDist
import os
import pickle
from pathlib import Path

## Config

In [2]:
TRAIN_CONFIG = {
    'alpha_range': [ (2 + i / 10) for i in range(1,10)],
    'beta_range': [round(i * 0.5, 1) for i in range(2, 6)],
    'h_range': [0.5],
    'c_range': [25],
    'total': list(range(10, 40)),
    'travel_time': TravelTimeDist.UNIFORM,
    'param_estimator': AlphaEstimator.MOMENTS,
}

TEST_CONFIG = {
    'alpha_range': [ (0 + i / 10) for i in range(1,10)],
    'beta_range': [round(i * 0.5, 1) for i in range(2, 6)],
    'h_range': [0.5],
    'c_range': [25],
    'total': list(range(10, 40)),
    'travel_time': TravelTimeDist.HIGH,
    'param_estimator': AlphaEstimator.MOMENTS,
}

In [3]:
model_approaches = [PPO(EnvClass=EnvImpr, config=TRAIN_CONFIG, timesteps=500_000),
                    # OracleVar_n(),
                    PTO_Var_n(TEST_CONFIG)
                   ] + [PTO_Fix_n(n, TEST_CONFIG) for n in [3, 5]] \
                    # + [OracleFix_n(n) for n in [3, 5]]
test_df_path = Path(methods.file_path('bicycle_data.pkl', dir_name='data/non_gamma_bicyle_test'))
test_df = pd.DataFrame()
with open(test_df_path, 'rb') as f:
    test_df =pickle.load(f)


[INFO] Loaded trained PPO model from:
/Users/preetkaria/PycharmProjects/when_to_leave/models/PPO_EnvImpr_e12a4_500K.zip


In [4]:
from pandarallel import pandarallel

# Initialize pandarallel with progress bar enabled
pandarallel.initialize(progress_bar=True)

columns_to_visualize = []
for model in model_approaches:
    print(f'Evaluating {model.prediction_keys()}')
    columns_to_visualize += model.prediction_keys()
    
    new_cols = test_df.parallel_apply(lambda row: model.predict(row, override=True), result_type='expand', axis=1)
    for i, col_name in enumerate(model.prediction_keys()):
        test_df[col_name] = new_cols[i]
        print(col_name, test_df[col_name].mean())




INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.
Evaluating ['cost_PPO_EnvImpr_e12a4_500K', 'observed_n_PPO_EnvImpr_e12a4_500K']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12), Label(value='0 / 12'))), HBox…

cost_PPO_EnvImpr_e12a4_500K 17.392957992438443
observed_n_PPO_EnvImpr_e12a4_500K 36.333333333333336
Evaluating ['cost_PTO_Var_n', 'observed_n_PTO_Var_n']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12), Label(value='0 / 12'))), HBox…

cost_PTO_Var_n 18.572522782000004
observed_n_PTO_Var_n 35.63333333333333
Evaluating ['cost_PTO_Fix_n_3']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12), Label(value='0 / 12'))), HBox…

cost_PTO_Fix_n_3 33.843928248833336
Evaluating ['cost_PTO_Fix_n_5']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12), Label(value='0 / 12'))), HBox…

cost_PTO_Fix_n_5 29.70959239833333


In [5]:
from pathlib import Path
test_df_path = Path(test_df_path)
test_df.to_csv(test_df_path.with_suffix('.csv'), index=False)
test_df.to_pickle(test_df_path)

## Visualize Results

In [6]:
import plotly.graph_objects as go

# columns_to_visualize = [x for x in test_df.columns if 'cost' in x or 'observe' in x]

# Calculate means
means = test_df[columns_to_visualize + ['h', 'c']].mean()

# Plot
fig = go.Figure(data=[go.Bar(x=means.index, y=means.values)])
fig.update_layout(title="Mean Values of Costs, H, and C", yaxis_title="Mean Value")
fig.show()

# Calculate medians
medians = test_df[columns_to_visualize + ['h', 'c']].median()

# Plot
fig = go.Figure(data=[go.Bar(x=medians.index, y=medians.values)])
fig.update_layout(title="Median Values of Costs, H, and C", yaxis_title="Median Value")
fig.show()

# Create a dataframe with two rows: one for means and one for medians
summary_df = pd.DataFrame({'Mean': means, 'Median': medians}).T
summary_df.head()


Unnamed: 0,cost_PPO_EnvImpr_e12a4_500K,observed_n_PPO_EnvImpr_e12a4_500K,cost_PTO_Var_n,observed_n_PTO_Var_n,cost_PTO_Fix_n_3,cost_PTO_Fix_n_5,h,c
Mean,17.392958,36.333333,18.572523,35.633333,33.843928,29.709592,0.5,25.0
Median,20.244511,37.0,22.181752,36.0,25.0,25.0,0.5,25.0


In [7]:
test_df.head()

Unnamed: 0,intervals,h,c,travel_time,total,cost_PPO_EnvImpr_b4e7e_500K,observed_n_PPO_EnvImpr_b4e7e_500K,cost_PTO_Var_n,observed_n_PTO_Var_n,cost_PTO_Fix_n_3,cost_PTO_Fix_n_5,cost_PPO_EnvImpr_9a770_500K,observed_n_PPO_EnvImpr_9a770_500K,cost_PPO_EnvImpr_e12a4_500K,observed_n_PPO_EnvImpr_e12a4_500K,cost_PPO_EnvImpr_9e088_500K,observed_n_PPO_EnvImpr_9e088_500K
0,"[23, 13, 15, 24, 30, 21, 60, 16, 2, 4, 10, 3, ...",0.5,25,120,24,25.0,23.0,25.0,16.0,14.134634,25.0,2.00326,17.0,1.132672,17.0,22.289265,14.0
1,"[56, 5, 15, 8, 11, 14, 7, 12, 12.01, 20, 4, 10...",0.5,25,120,43,25.0,42.0,2.549946,31.0,25.0,25.0,2.409618,31.0,0.202938,32.0,136.743333,3.0
2,"[19, 108, 13, 18, 14, 16, 5, 7, 2, 3, 2, 16, 1...",0.5,25,120,23,25.0,22.0,25.0,19.0,25.0,25.0,25.0,19.0,25.0,19.0,66.623968,9.0
3,"[60, 61, 68, 27, 26, 6, 17, 63, 75, 13, 30, 7]",0.5,25,120,12,25.0,11.0,25.0,9.0,25.0,25.0,25.0,9.0,25.0,8.0,30.450476,7.0
4,"[67, 32, 16, 26, 19, 17, 3, 6, 3, 18, 8, 3, 4,...",0.5,25,120,27,25.0,26.0,25.489672,19.0,25.0,25.0,24.859123,19.0,23.306492,19.0,29.611755,18.0


In [8]:
test_df.iloc[2]

intervals                            [19, 108, 13, 18, 14, 16, 5, 7, 2, 3, 2, 16, 1...
h                                                                                  0.5
c                                                                                   25
travel_time                                                                        120
total                                                                               23
cost_PPO_EnvImpr_b4e7e_500K                                                       25.0
observed_n_PPO_EnvImpr_b4e7e_500K                                                 22.0
cost_PTO_Var_n                                                                    25.0
observed_n_PTO_Var_n                                                              19.0
cost_PTO_Fix_n_3                                                                  25.0
cost_PTO_Fix_n_5                                                                  25.0
cost_PPO_EnvImpr_9a770_500K                