In [1]:
import pandas as pd
import numpy as np
import pickle
import gymnasium as gym
from gymnasium.spaces import Box, Discrete
import statistics
from utils import methods
from utils.rl_environments.env1 import Env1

with open(methods.file_path('prof_data/data1_intervals.pkl', 'data'), 'rb') as f:
    intervals = pickle.load(f)

df = pd.read_csv(methods.file_path('prof_data/data1.csv', 'data'))

df['intervals'] = intervals
display(df.head())

PPO_MODEL = methods.file_path('ppo_env1', 'models')


Unnamed: 0,total,travel_time,h,c,intervals
0,43.0,3.535281,5.320843,1.787906,"[0.1306593104459917, 0.08214531495921755, 0.02..."
1,48.0,41.459338,0.12022,1.670595,"[4.276706517433805, 3.2544721073860834, 3.7400..."
2,31.0,114.792988,0.146738,6.669483,"[16.079087401217105, 11.608519241181346, 14.23..."
3,52.0,976.197274,0.003427,0.570308,"[28.206915811705233, 6.758341759637238, 24.521..."
4,39.0,41.909987,0.105977,3.407713,"[0.9262261308152662, 1.9254123526889755, 3.236..."


In [2]:
from stable_baselines3 import PPO
model = PPO.load(PPO_MODEL)

env = Env1()
rewards = {}
print(env._get_info())
for i in range(len(df)):
    row = df.iloc[i]
    state, _ = env.reset(row=row)
    done = False
    total_reward = 0
    while not done:
        action, _ = model.predict(state)  
        state, reward, done, _, info = env.step(action)
        total_reward += reward
    rewards[i] = total_reward
    # print(f"Episode {i} reward: {total_reward} | h = {info['state']['h']}, c = {info['state']['c']}")
    print(f"Episode {i} reward: {total_reward} | cur_time = {info['state']['cur_time']}")


df['rewards'] = rewards

df.head()

{'hidden': {'alpha': -1, 'beta': -1, 'interval': -1, 'cum_sum_intervals': -1}, 'state': {'n': -1, 'N': -1, 'h': -1, 'c': -1, 'travel_time': -1, 'cur_time': -1, 'mean_n': -1, 'std_n': -1, 'alpha_hat': -1, 'beta_hat': -1, 'u_star_hat': -1, 'last_update': -1}}
Episode 0 reward: -1.7879058522669198 | cur_time = 3.9387106959965377
Episode 1 reward: -16.789159941828782 | cur_time = 12.271261353239975
Episode 2 reward: -42.04328268505151 | cur_time = 42.02336413816172
Episode 3 reward: -0.38013569490368493 | cur_time = 59.586525123710274
Episode 4 reward: -6.789738754827452 | cur_time = 19.688359553252457
Episode 5 reward: -100.69850606104622 | cur_time = 2857.9977671615893
Episode 6 reward: -16.33518834971634 | cur_time = 10.571790100250428
Episode 7 reward: -2.821401950265599 | cur_time = 22.237525017502858
Episode 8 reward: -8.67367327638704 | cur_time = 40.072576122541044
Episode 9 reward: -432.2751247914234 | cur_time = 722.5480170021746
Episode 10 reward: -6.972097527830836 | cur_time =

Unnamed: 0,total,travel_time,h,c,intervals,rewards
0,43.0,3.535281,5.320843,1.787906,"[0.1306593104459917, 0.08214531495921755, 0.02...",-1.787906
1,48.0,41.459338,0.12022,1.670595,"[4.276706517433805, 3.2544721073860834, 3.7400...",-16.78916
2,31.0,114.792988,0.146738,6.669483,"[16.079087401217105, 11.608519241181346, 14.23...",-42.043283
3,52.0,976.197274,0.003427,0.570308,"[28.206915811705233, 6.758341759637238, 24.521...",-0.380136
4,39.0,41.909987,0.105977,3.407713,"[0.9262261308152662, 1.9254123526889755, 3.236...",-6.789739


In [3]:
import plotly.graph_objects as go

# Calculate means
means = df[['rewards', 'h', 'c']].mean()

# Plot
fig = go.Figure(data=[go.Bar(x=means.index, y=means.values)])
fig.update_layout(title="Mean Values of Rewards, H, and C", yaxis_title="Mean Value")
fig.show()

# Calculate medians
medians = df[['rewards', 'h', 'c']].median()

# Plot
fig = go.Figure(data=[go.Bar(x=medians.index, y=medians.values)])
fig.update_layout(title="Median Values of Rewards, H, and C", yaxis_title="Median Value")
fig.show()









In [4]:
# filter total > 30 in df
# df = df[df['total'] > 30]
df['alpha'] = df.apply(lambda row: methods.gamma_estimate_parameters(int(row['total']), row['intervals'])[0], axis=1)
df['beta'] = df.apply(lambda row: methods.gamma_estimate_parameters(int(row['total']), row['intervals'])[1], axis=1)
df['mean'] = df.apply(lambda row: statistics.mean(row['intervals']), axis=1)
df['std'] = df.apply(lambda row: statistics.stdev(row['intervals']), axis=1)

# alpha, beta, h, c scatter plot plotly
fig = go.Figure(data=[go.Scatter(x=df['alpha'], y=df['beta'], mode='markers', marker=dict(size=5))])
fig.update_layout(title="Alpha vs Beta", xaxis_title="Alpha", yaxis_title="Beta")
fig.show()

fig = go.Figure(data=[go.Scatter(x=df['h'], y=df['c'], mode='markers', marker=dict(size=5))])
fig.update_layout(title="H vs C", xaxis_title="H", yaxis_title="C")
fig.show()

fig = go.Figure(data=[go.Scatter(x=df['h'] / df['c'], y= 1 / df['beta'], mode='markers', marker=dict(size=5))])
fig.update_layout(title="h/c vs 1/beta", xaxis_title="h/c", yaxis_title="1/beta")
fig.show()

fig = go.Figure(data=[go.Scatter(x=df['travel_time'], y=df['intervals'].apply(lambda row: sum(row[3:])), mode='markers', marker=dict(size=5))])
fig.update_layout(title="Travel Time vs Total", xaxis_title="Travel Time", yaxis_title="Total")
# add y = x line
fig.add_trace(go.Scatter(x=[0, 10000], y=[0, 10000], mode='lines', name='y=x'))
fig.show()


In [5]:

# I want range of alpha and beta, total, h, c
total_range = (df['total'].min(), df['total'].max())
h_range = (df['h'].min(), df['h'].max())
c_range = (df['c'].min(), df['c'].max())
alpha_range = (df['alpha'].min(), df['alpha'].max())
beta_range = (df['beta'].min(), df['beta'].max())
mean_range = (df['mean'].min(), df['mean'].max())
std_range = (df['std'].min(), df['std'].max())
travel_time_range = (df['travel_time'].min(), df['travel_time'].max())
print(f"Alpha range: {alpha_range}")
print(f"Beta range: {beta_range}")
print(f"Total range: {total_range}")
print(f"H range: {h_range}")
print(f"C range: {c_range}")
print(f"Mean range: {mean_range}")
print(f"Std range: {std_range}")
print(f"Travel time range: {travel_time_range}")
df.head(50)

Alpha range: (0.028460825724258725, 567.9556442351517)
Beta range: (0.0004579081838998966, 32.62570309852111)
Total range: (20.0, 70.0)
H range: (3.556345624867136e-05, 581.5328467536872)
C range: (0.0009908023283743, 48.4727568441552)
Mean range: (0.00018347907565493645, 1255.491447762732)
Std range: (0.0003966596764921248, 123.31431960764903)
Travel time range: (0.0003935176331111, 34658.29848422208)


Unnamed: 0,total,travel_time,h,c,intervals,rewards,alpha,beta,mean,std
0,43.0,3.535281,5.320843,1.787906,"[0.1306593104459917, 0.08214531495921755, 0.02...",-1.787906,2.9129,0.031168,0.090789,0.053195
1,48.0,41.459338,0.12022,1.670595,"[4.276706517433805, 3.2544721073860834, 3.7400...",-16.78916,15.414274,0.261371,4.028842,1.026168
2,31.0,114.792988,0.146738,6.669483,"[16.079087401217105, 11.608519241181346, 14.23...",-42.043283,4.699589,3.043065,14.301155,6.59692
3,52.0,976.197274,0.003427,0.570308,"[28.206915811705233, 6.758341759637238, 24.521...",-0.380136,3.904633,5.647611,22.051849,11.159761
4,39.0,41.909987,0.105977,3.407713,"[0.9262261308152662, 1.9254123526889755, 3.236...",-6.789739,2.909377,1.107528,3.222217,1.889099
5,40.0,18968.864118,0.005753,2.546631,"[964.5425831234778, 845.9818291341096, 1047.37...",-100.698506,124.686414,7.885933,983.268767,88.056755
6,37.0,5.713274,3.593902,16.335188,"[0.46221294532785073, 0.08902386135418568, 0.0...",-16.335188,0.451451,0.787582,0.355555,0.529177
7,42.0,112.727918,0.152379,4.193468,"[9.601735204923799, 2.821560185146939, 9.31422...",-2.821402,1.77824,2.055016,3.654312,2.740378
8,36.0,24.318587,0.141482,2.120688,"[16.86292821044016, 1.0866065475530133, 0.8230...",-8.673673,0.836405,4.174511,3.491583,3.817808
9,49.0,1672.335583,0.051048,14.763162,"[262.7323029378378, 215.39368190149267, 244.32...",-432.275125,56.095389,3.952052,221.691922,29.59963


In [6]:
#plot distribution of alpha, beta, h, c
fig = go.Figure(data=[go.Histogram(x=df['alpha'])])
fig.update_layout(title="Alpha Distribution", xaxis_title="Alpha")
fig.add_vline(x=df['alpha'].mean(), line_dash="dash", line_color="red", annotation_text=f"Mean: {df['alpha'].mean():.2f}")
fig.show()

fig = go.Figure(data=[go.Histogram(x=df['beta'])])
fig.update_layout(title="Beta Distribution", xaxis_title="Beta")
fig.add_vline(x=df['beta'].mean(), line_dash="dash", line_color="red", annotation_text=f"Mean: {df['beta'].mean():.2f}")
fig.show()

fig = go.Figure(data=[go.Histogram(x=df['h'])])
fig.update_layout(title="H Distribution", xaxis_title="H")
fig.add_vline(x=df['h'].mean(), line_dash="dash", line_color="red", annotation_text=f"Mean: {df['h'].mean():.2f}")
fig.show()

fig = go.Figure(data=[go.Histogram(x=df['c'])])
fig.update_layout(title="C Distribution", xaxis_title="C")
fig.add_vline(x=df['c'].mean(), line_dash="dash", line_color="red", annotation_text=f"Mean: {df['c'].mean():.2f}")
fig.show()

fig = go.Figure(data=[go.Histogram(x=df['total'])])
fig.update_layout(title="Total Distribution", xaxis_title="Total")
fig.add_vline(x=df['total'].mean(), line_dash="dash", line_color="red", annotation_text=f"Mean: {df['total'].mean():.2f}")
fig.add_vline(x=df['total'].mean() + df['total'].std(), line_dash="dash", line_color="green", annotation_text=f"Std: {df['total'].std():.2f}")
fig.show()

fig = go.Figure(data=[go.Histogram(x=df['travel_time'])])
fig.update_layout(title="Travel Time Distribution", xaxis_title="Travel Time")
fig.add_vline(x=df['travel_time'].mean(), line_dash="dash", line_color="red", annotation_text=f"Mean: {df['travel_time'].mean():.2f}")
fig.show()



In [7]:
from pandarallel import pandarallel

# Initialize pandarallel with progress bar enabled
pandarallel.initialize(progress_bar=True)
for i in [0, 3, 5]:
    print(f"Optimal reward at n = {i}")
    df[f'u{i}'] = df.apply(lambda row: row['intervals'][i:].sum(), axis=1)
    df[f'u_star{i}'] = df.apply(lambda row: (print(row), methods.get_u_star_binary_fast(row['total'] - i, row['alpha'], row['beta'], row['h'], row['c']))[1], axis=1)
    df[f'optimal_rewards{i}'] = df.apply(lambda row: -methods.cal_cost(row['c'], row['h'], row[f'u{i}'], row[f'u_star{i}']), axis=1)

df['direct_leave_rewards'] = df.apply(lambda row: -methods.cal_cost(row['c'], row['h'], row['intervals'][3:].sum(), row[f'travel_time']), axis=1)
df.head()

INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.
Optimal reward at n = 0
total                                                       43.0
travel_time                                             3.535281
h                                                       5.320843
c                                                       1.787906
intervals      [0.1306593104459917, 0.08214531495921755, 0.02...
rewards                                                -1.787906
alpha                                                     2.9129
beta                                                    0.031168
mean                                                    0.090789
std                                                     0.053195
u0                                                      3.903929
Name: 0, dtype: object
total                                                       48.0
trav

TypeError: Invalid NaN comparison

In [None]:
tp = df[df['h'] / df['c'] >= 1 / df['beta']]
tp.head(14)
# len(tp)

Unnamed: 0,total,travel_time,h,c,intervals,rewards,alpha,beta,mean,std,u0
795,54.0,22.572009,0.306247,1.979096,"[0.014973814481733946, 0.00025907574946661, 3....",-8.737229,0.109503,8.66155,0.948468,2.866217,51.217262
814,35.0,1.701905,1.211772,2.074716,"[0.4377438803130833, 1.2852740352673644e-05, 0...",-5.837467,0.089269,2.297203,0.205068,0.686355,7.177385
890,44.0,5.918861,2.202298,2.832889,"[7.025081195069271e-11, 0.041764422951962234, ...",-0.518553,0.109002,1.313322,0.143155,0.433599,6.298818
951,39.0,1.840705,0.760142,2.353463,"[1.9270370635383731, 6.182731466592538, 3.4052...",-2.335951,0.101808,3.30524,0.336501,1.054616,13.12352
1777,27.0,0.905625,0.860297,0.778439,"[0.0008414023526065289, 4.2248169348672495e-06...",-0.507997,0.050949,1.228955,0.062614,0.277399,1.690587
2338,32.0,0.322215,1.218925,5.743359,"[0.004318316620975128, 1.0384232836324901e-15,...",-7.959696,0.040331,5.390276,0.217395,1.082505,6.956631
3483,34.0,1.764753,0.488458,0.72678,"[9.871602143827961e-07, 6.211606145992767e-07,...",-0.72678,0.032679,1.59563,0.052143,0.288446,1.772871
5070,32.0,1.082955,0.802421,1.431228,"[0.0001735600897038623, 4.413734654859798, 1.6...",-2.47278,0.129209,2.495929,0.322498,0.89718,10.319925
5400,51.0,1.666707,1.359409,6.937835,"[8.44294861622943e-34, 4.5398285616949235e-05,...",-20.930972,0.042002,8.012666,0.336546,1.642143,17.163862
7993,39.0,1.527342,3.818864,3.713327,"[6.003552808625337e-06, 0.0018803849879250453,...",-4.250445,0.066464,1.075194,0.071462,0.277192,2.78702


In [None]:
from utils import math_expressions as me
import numpy as np
from sympy import symbols, erf, pi, sympify, E
row = df.iloc[795]
alpha = row['alpha']
beta = row['beta']
h = row['h']
c = row['c']
total = row['total']
print(row)
h_precise = lambda x: float(me.gamma_hazard_rate((alpha * total), beta).subs(symbols('x'), x).evalf())
x = np.linspace(0, 457, 1000)
print(h_precise(457.4584584584585))
y = [(print(i), h_precise(i))[1] for i in x]
# print y_norm if imaginary
fig = go.Figure(data=[go.Scatter(x=x, y=y)])
fig.update_layout(title="Gamma Hazard Rate", xaxis_title="x", yaxis_title="f(x)")
fig.show()

u = methods.get_u_star_binary_fast(row['total'], row['alpha'], row['beta'], row['h'], row['c'])

total                                                       54.0
travel_time                                            22.572009
h                                                       0.306247
c                                                       1.979096
intervals      [0.014973814481733946, 0.00025907574946661, 3....
rewards                                                -8.645355
alpha                                                   0.109503
beta                                                     8.66155
mean                                                    0.948468
std                                                     2.866217
u0                                                     51.217262
Name: 795, dtype: object
0.01689748031019771
0.0
0.4574574574574575
0.914914914914915
1.3723723723723724
1.82982982982983
2.2872872872872874
2.744744744744745
3.2022022022022023
3.65965965965966
4.117117117117117
4.574574574574575
5.032032032032032
5.48948948948949
5.946946946946947
6

TypeError: Invalid NaN comparison

In [None]:
from utils import math_expressions as me
import numpy as np
from sympy import symbols, erf, pi, sympify, E



import plotly.graph_objects as go
from scipy.stats import gamma, norm
import time
alpha = 1000
beta = 0.1


h_precise = lambda x: (me.gamma_hazard_rate((alpha), beta).subs(symbols('x'), x).evalf(5))
h_precise2 = lambda x: (me.gamma_hazard_rate((alpha), beta, x).evalf())
pdf = lambda x: gamma.pdf(x, alpha, scale=beta)
cdf = lambda x: gamma.cdf(x, alpha, scale=beta)
h_fast = lambda x: pdf(x) / (1 - cdf(x))
pdf_log = lambda x: gamma.logpdf(x, alpha, scale=beta)
cdf_log = lambda x: gamma.logcdf(x, alpha, scale=beta)

j = 351
print(cdf_log(j), cdf(j), me.gamma_cdf(alpha, beta, j).evalf(40))


x = np.linspace(0, (alpha * beta  + 4 * beta * np.sqrt(alpha)) * 8, 20)
# time y and y2

start_time = time.time()
y = [(print(i), h_precise(i))[1] for i in x]
end_time = time.time()
print(f"Execution time for y2: {end_time - start_time} seconds")


start_time = time.time()
y2 = [(print(i), h_precise2(i))[1] for i in x]
end_time = time.time()
print(f"Execution time for y2: {end_time - start_time} seconds")
print(y)
print(y2)
y_fast = [h_fast(i) for i in x]
y_pdf = [pdf(i) for i in x]
print(y_fast)
# print y_norm if imaginary
fig = go.Figure(data=[go.Scatter(x=x, y=y), go.Scatter(x=x, y=y_fast), go.Scatter(x=x, y=y_pdf)])
fig.update_layout(title="Gamma Hazard Rate", xaxis_title="x", yaxis_title="f(x)")
fig.show()


0.0 1.0 1.000000000000000000000000000000000000000
0.0
47.43120448028358
94.86240896056717
142.29361344085075
189.72481792113433
237.1560224014179
284.5872268817015
332.0184313619851
379.44963584226866
426.8808403225522
474.3120448028358
521.7432492831194
569.174453763403
616.6056582436865
664.0368627239702
711.4680672042538
758.8992716845373
806.3304761648209
853.7616806451044
901.1928851253881
Execution time for y2: 5.805858135223389 seconds
0.0
47.43120448028358
94.86240896056717
142.29361344085075
189.72481792113433
237.1560224014179
284.5872268817015
332.0184313619851
379.44963584226866
426.8808403225522
474.3120448028358
521.7432492831194
569.174453763403
616.6056582436865
664.0368627239702
711.4680672042538
758.8992716845373
806.3304761648209
853.7616806451044
901.1928851253881
Execution time for y2: 3.231959104537964 seconds
[0, 6.1993e-97, 0.035693, 2.9956, 4.7403, 5.7906, 6.4915, 6.9924, 7.3682, 7.6605, 7.8944, 8.0857, 8.2452, 8.3802, 8.4958, 8.5961, 8.6838, 8.7612, 8.8300, 8.


divide by zero encountered in scalar divide


invalid value encountered in scalar divide



TypeError: Object of type Zero is not JSON serializable

In [None]:
from utils import methods
alpha = 137.883037
beta = 1.873392
total = 34
h = 0.005466
c = 1.279461
u = methods.get_u_star_binary_fast(total, alpha, beta, h, c)
print(u)


8726.81340929
