In [1]:
import gymnasium as gym
from gymnasium.spaces import Discrete, Box, Sequence, Dict
import numpy as np
import pandas as pd
from src.commons import methods
import statistics

from src.rl_environments.env_impr import EnvImpr
from src.model_approaches.analytical.oracle_fix_n import OracleFix_n
from src.model_approaches.analytical.oracle_var_n import OracleVar_n
from src.model_approaches.analytical.pto_fix_n import PTO_Fix_n
from src.model_approaches.analytical.pto_var_n import PTO_Var_n
from src.model_approaches.rl.ppo import PPO

from src.commons import generate_data
from src.commons.constants import AlphaEstimator, TravelTimeDist
import os
import pickle

## Config

In [2]:
TRAIN_CONFIG = {
    'alpha_range': [ (1 + i / 10) for i in range(1,10)],
    'beta_range': [round(i * 0.5, 1) for i in range(2, 6)],
    'h_range': [0.5],
    'c_range': [25],
    'total': list(range(10, 40)),
    'travel_time': TravelTimeDist.UNIFORM,
    'param_estimator': AlphaEstimator.MAX_LIKELI,
}

TEST_CONFIG = {
    'alpha_range': [ (3 + i / 10) for i in range(1,10)],
    'beta_range': [round(i * 0.5, 1) for i in range(2, 6)],
    'h_range': [0.5],
    'c_range': [25],
    'total': list(range(10, 40)),
    'travel_time': TravelTimeDist.UNIFORM,
    'param_estimator': AlphaEstimator.MAX_LIKELI,
}



In [3]:
model_approaches = [PPO(EnvClass=EnvImpr, config=TRAIN_CONFIG, timesteps=500_000),
                    OracleVar_n(),
                    PTO_Var_n(TEST_CONFIG)
                   ] + [PTO_Fix_n(n, TEST_CONFIG) for n in [3, 5]] + [OracleFix_n(n) for n in [3, 5]]
test_df_path = generate_data.generate(config=TEST_CONFIG, row_count=10_000)
test_df = pd.DataFrame()
with open(test_df_path, 'rb') as f:
    test_df =pickle.load(f)


[INFO] Training PPO model...
Saving to: /Users/preetkaria/PycharmProjects/when_to_leave/models/PPO_EnvImpr_b73d3_500K.zip
[INFO] Launching training with 8 parallel environments
Using cpu device
[INFO] Training PPO for 500000 timesteps...
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 4.17     |
|    ep_rew_mean     | -14      |
| time/              |          |
|    fps             | 491      |
|    iterations      | 1        |
|    time_elapsed    | 33       |
|    total_timesteps | 16384    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5.05        |
|    ep_rew_mean          | -13.8       |
| time/                   |             |
|    fps                  | 490         |
|    iterations           | 2           |
|    time_elapsed         | 66          |
|    total_timesteps      | 32768       |
| train/                  |             |
| 

In [4]:

from pandarallel import pandarallel

# Initialize pandarallel with progress bar enabled
pandarallel.initialize(progress_bar=True)

columns_to_visualize = []
for model in model_approaches:
    print(f'Evaluating {model.prediction_keys()}')
    columns_to_visualize += model.prediction_keys()
    
    new_cols = test_df.parallel_apply(lambda row: model.predict(row, override=False), result_type='expand', axis=1)
    for i, col_name in enumerate(model.prediction_keys()):
        test_df[col_name] = new_cols[i]




INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.
Evaluating ['cost_PPO_EnvImpr_b73d3_500K', 'observed_n_PPO_EnvImpr_b73d3_500K']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1250), Label(value='0 / 1250'))), …

Evaluating ['cost_Oracle_Var_n', 'observed_n_Oracle_Var_n']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1250), Label(value='0 / 1250'))), …

Evaluating ['cost_PTO_Var_n', 'observed_n_PTO_Var_n']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1250), Label(value='0 / 1250'))), …

Evaluating ['cost_PTO_Fix_n_3']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1250), Label(value='0 / 1250'))), …

Evaluating ['cost_PTO_Fix_n_5']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1250), Label(value='0 / 1250'))), …

Evaluating ['cost_Oracle_Fix_n_3']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1250), Label(value='0 / 1250'))), …

Evaluating ['cost_Oracle_Fix_n_5']


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1250), Label(value='0 / 1250'))), …

In [5]:
from pathlib import Path
test_df_path = Path(test_df_path)
test_df.to_csv(test_df_path.with_suffix('.csv'), index=False)
test_df.to_pickle(test_df_path)

## Visualize Results

In [6]:
import plotly.graph_objects as go

# columns_to_visualize = [x for x in test_df.columns if 'cost' in x or 'observe' in x]

# Calculate means
means = test_df[columns_to_visualize + ['h', 'c']].mean()

# Plot
fig = go.Figure(data=[go.Bar(x=means.index, y=means.values)])
fig.update_layout(title="Mean Values of Costs, H, and C", yaxis_title="Mean Value")
fig.show()

# Calculate medians
medians = test_df[columns_to_visualize + ['h', 'c']].median()

# Plot
fig = go.Figure(data=[go.Bar(x=medians.index, y=medians.values)])
fig.update_layout(title="Median Values of Costs, H, and C", yaxis_title="Median Value")
fig.show()

# Create a dataframe with two rows: one for means and one for medians
summary_df = pd.DataFrame({'Mean': means, 'Median': medians}).T
summary_df.head()


Unnamed: 0,cost_PPO_EnvImpr_b73d3_500K,observed_n_PPO_EnvImpr_b73d3_500K,cost_Oracle_Var_n,observed_n_Oracle_Var_n,cost_PTO_Var_n,observed_n_PTO_Var_n,cost_PTO_Fix_n_3,cost_PTO_Fix_n_5,cost_Oracle_Fix_n_3,cost_Oracle_Fix_n_5,h,c
Mean,12.230326,11.9027,9.271882,11.0837,11.196373,11.1163,18.001524,16.846946,10.710248,11.957163,0.5,25.0
Median,7.981996,10.0,7.39828,9.0,7.646752,9.0,20.512448,18.606267,8.060416,8.944021,0.5,25.0
