In [1]:
import gymnasium as gym
from gymnasium.spaces import Discrete, Box, Sequence, Dict
import numpy as np
import pandas as pd
from src.commons import methods
import statistics

from src.rl_environments.env_impr import EnvImpr
from src.model_approaches.analytical.oracle_fix_n import OracleFix_n
from src.model_approaches.analytical.oracle_var_n import OracleVar_n
from src.model_approaches.analytical.pto_fix_n import PTO_Fix_n
from src.model_approaches.analytical.pto_var_n import PTO_Var_n
from src.model_approaches.rl.ppo import PPO

from src.commons import generate_data, methods
from src.commons.constants import AlphaEstimator, TravelTimeDist
import os
import pickle
from pathlib import Path

## Config

In [2]:
TRAIN_CONFIG = {
    'alpha_range': [ (6 + i / 10) for i in range(1,10)],
    'beta_range': [round(i * 1, 1) for i in range(2, 6)],
    'h_range': [0.5],
    'c_range': [25],
    'total': list(range(10, 40)),
    'travel_time': TravelTimeDist.UNIFORM,
    'param_estimator': AlphaEstimator.MAX_LIKELI,
}

TEST_CONFIG = {
    'alpha_range': [ (0 + i / 10) for i in range(1,10)],
    'beta_range': [round(i * 0.5, 1) for i in range(2, 6)],
    'h_range': [0.5],
    'c_range': [25],
    'total': list(range(10, 40)),
    'travel_time': TravelTimeDist.HIGH,
    'param_estimator': AlphaEstimator.MAX_LIKELI,
}

In [3]:
model_approaches = [PPO(EnvClass=EnvImpr, config=TRAIN_CONFIG, timesteps=500_000),
                    # OracleVar_n(),
                    PTO_Var_n(TEST_CONFIG)
                   ] + [PTO_Fix_n(n, TEST_CONFIG) for n in [3, 5]] \
                    # + [OracleFix_n(n) for n in [3, 5]]
test_df_path = Path(methods.file_path('bicycle_data.pkl', dir_name='data/non_gamma_bicyle_test'))
test_df = pd.DataFrame()
with open(test_df_path, 'rb') as f:
    test_df =pickle.load(f)


[INFO] Loaded trained PPO model from:
/Users/preetkaria/PycharmProjects/when_to_leave/models/PPO_EnvImpr_b4e7e_500K.zip


In [4]:
from pandarallel import pandarallel

# Initialize pandarallel with progress bar enabled
pandarallel.initialize(progress_bar=True)

columns_to_visualize = []
for model in model_approaches:
    print(f'Evaluating {model.prediction_keys()}')
    columns_to_visualize += model.prediction_keys()
    
    new_cols = test_df.parallel_apply(lambda row: model.predict(row, override=True), result_type='expand', axis=1)
    for i, col_name in enumerate(model.prediction_keys()):
        test_df[col_name] = new_cols[i]
        print(col_name, test_df[col_name].mean())




INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.
Evaluating ['cost_PPO_EnvImpr_b4e7e_500K', 'observed_n_PPO_EnvImpr_b4e7e_500K']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12), Label(value='0 / 12'))), HBox…

cost_PPO_EnvImpr_b4e7e_500K 33.26944247596769
observed_n_PPO_EnvImpr_b4e7e_500K 56.67777777777778
Evaluating ['cost_PTO_Var_n', 'observed_n_PTO_Var_n']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12), Label(value='0 / 12'))), HBox…

cost_PTO_Var_n 83.17065061211112
observed_n_PTO_Var_n 43.43333333333333
Evaluating ['cost_PTO_Fix_n_3']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12), Label(value='0 / 12'))), HBox…

cost_PTO_Fix_n_3 84.65397975803704
Evaluating ['cost_PTO_Fix_n_5']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=12), Label(value='0 / 12'))), HBox…

cost_PTO_Fix_n_5 77.61190803994444


In [5]:
from pathlib import Path
test_df_path = Path(test_df_path)
test_df.to_csv(test_df_path.with_suffix('.csv'), index=False)
test_df.to_pickle(test_df_path)

## Visualize Results

In [6]:
import plotly.graph_objects as go

# columns_to_visualize = [x for x in test_df.columns if 'cost' in x or 'observe' in x]

# Calculate means
means = test_df[columns_to_visualize + ['h', 'c']].mean()

# Plot
fig = go.Figure(data=[go.Bar(x=means.index, y=means.values)])
fig.update_layout(title="Mean Values of Costs, H, and C", yaxis_title="Mean Value")
fig.show()

# Calculate medians
medians = test_df[columns_to_visualize + ['h', 'c']].median()

# Plot
fig = go.Figure(data=[go.Bar(x=medians.index, y=medians.values)])
fig.update_layout(title="Median Values of Costs, H, and C", yaxis_title="Median Value")
fig.show()

# Create a dataframe with two rows: one for means and one for medians
summary_df = pd.DataFrame({'Mean': means, 'Median': medians}).T
summary_df.head()


Unnamed: 0,cost_PPO_EnvImpr_b4e7e_500K,observed_n_PPO_EnvImpr_b4e7e_500K,cost_PTO_Var_n,observed_n_PTO_Var_n,cost_PTO_Fix_n_3,cost_PTO_Fix_n_5,h,c
Mean,33.269442,56.677778,83.170651,43.433333,84.65398,77.611908,0.5,25.0
Median,25.0,56.5,81.809001,43.5,61.75,62.689749,0.5,25.0


In [7]:
test_df.head()

Unnamed: 0,intervals,h,c,travel_time,total,cost_PPO_EnvImpr_b4e7e_500K,observed_n_PPO_EnvImpr_b4e7e_500K,cost_PTO_Var_n,observed_n_PTO_Var_n,cost_PTO_Fix_n_3,cost_PTO_Fix_n_5,cost_PPO_EnvImpr_6509a_500K,observed_n_PPO_EnvImpr_6509a_500K
0,"[21, 60, 16, 2, 4, 10, 3, 10, 14, 39, 2, 10, 2...",0.5,25,120,27,25.0,26.0,41.756238,19.0,25.0,25.0,42.330396,19.0
1,"[12, 12, 20, 4, 10, 3, 3, 5, 9, 2, 24, 25, 6, ...",0.5,25,120,44,25.0,43.0,34.5,31.0,25.0,25.0,37.6535,30.0
2,"[13, 18, 14, 16, 5, 7, 2, 3, 2, 16, 15, 17, 3,...",0.5,25,120,26,25.0,25.0,149.572795,12.0,86.0,116.622705,146.418854,13.0
3,"[68, 27, 26, 6, 17, 63, 75, 13, 30, 7, 23, 34,...",0.5,25,120,14,25.0,13.0,21.729591,10.0,25.0,36.422655,23.761562,11.0
4,"[19, 17, 3, 6, 3, 18, 8, 3, 4, 13, 11, 2, 18, ...",0.5,25,120,37,25.0,36.0,53.522721,26.0,53.303516,128.170664,50.263958,28.0


In [8]:
test_df.iloc[2]

intervals                            [13, 18, 14, 16, 5, 7, 2, 3, 2, 16, 15, 17, 3,...
h                                                                                  0.5
c                                                                                   25
travel_time                                                                        120
total                                                                               26
cost_PPO_EnvImpr_b4e7e_500K                                                       25.0
observed_n_PPO_EnvImpr_b4e7e_500K                                                 25.0
cost_PTO_Var_n                                                              149.572795
observed_n_PTO_Var_n                                                              12.0
cost_PTO_Fix_n_3                                                                  86.0
cost_PTO_Fix_n_5                                                            116.622705
cost_PPO_EnvImpr_6509a_500K                