# Revenue Management Simulations

In this notebook we will run some simulations to compare the algorithms you developed with the Bayes Selector to PPO and standard Q learning.

### Package Installation

In [1]:
import or_suite
import numpy as np

import copy

import os
from stable_baselines3.common.monitor import Monitor
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
import pandas as pd


import gym

### Test 1

We start off by doing a small scale simulation on a problem with three resources, two customer types, and a horizon of four.  This allows us to test out Q-Learning based approaches without dealing with issues around scale.

In [2]:
CONFIG =  or_suite.envs.env_configs.airline_default_config

epLen = CONFIG['epLen']

In [3]:
print(CONFIG)

{'epLen': 4, 'f': array([1., 2.]), 'A': array([[2., 3.],
       [3., 0.],
       [2., 1.]]), 'starting_state': array([6.66666667, 4.        , 4.        ]), 'P': array([[0.33333333, 0.33333333],
       [0.33333333, 0.33333333],
       [0.33333333, 0.33333333],
       [0.33333333, 0.33333333],
       [0.33333333, 0.33333333]])}


Here we see the different rewards, resource consumption, starting state, and arrival distribution being uniform.

Next up - we generate a list of historical traces according to the distribution.

In [4]:
num_traces = 50
dataset = []
for _ in range(num_traces): # samples traces
    for timestep in range(CONFIG['epLen']): # each of length of the time horizon
        # samples a customer type according to that step's distribution
        pDist = np.append(np.copy(CONFIG['P'][timestep, :]), 1 - np.sum(CONFIG['P'][timestep, :]))

        dataset.append((timestep, np.random.choice(a = CONFIG['A'].shape[1]+1, p = pDist)))
print(dataset)

[(0, 2), (1, 2), (2, 1), (3, 0), (0, 1), (1, 0), (2, 0), (3, 0), (0, 2), (1, 0), (2, 2), (3, 0), (0, 1), (1, 1), (2, 1), (3, 1), (0, 2), (1, 1), (2, 2), (3, 2), (0, 0), (1, 2), (2, 2), (3, 2), (0, 2), (1, 1), (2, 0), (3, 0), (0, 0), (1, 1), (2, 1), (3, 0), (0, 1), (1, 2), (2, 0), (3, 1), (0, 1), (1, 1), (2, 2), (3, 0), (0, 1), (1, 1), (2, 1), (3, 0), (0, 2), (1, 1), (2, 0), (3, 1), (0, 2), (1, 2), (2, 1), (3, 1), (0, 1), (1, 1), (2, 0), (3, 2), (0, 0), (1, 2), (2, 1), (3, 0), (0, 1), (1, 0), (2, 1), (3, 2), (0, 1), (1, 1), (2, 0), (3, 0), (0, 2), (1, 0), (2, 1), (3, 2), (0, 1), (1, 2), (2, 2), (3, 0), (0, 2), (1, 0), (2, 2), (3, 2), (0, 2), (1, 0), (2, 1), (3, 2), (0, 0), (1, 1), (2, 0), (3, 1), (0, 1), (1, 2), (2, 1), (3, 1), (0, 1), (1, 2), (2, 0), (3, 0), (0, 2), (1, 2), (2, 1), (3, 2), (0, 1), (1, 2), (2, 0), (3, 1), (0, 2), (1, 0), (2, 2), (3, 1), (0, 1), (1, 2), (2, 1), (3, 0), (0, 0), (1, 1), (2, 1), (3, 1), (0, 2), (1, 1), (2, 2), (3, 2), (0, 0), (1, 0), (2, 0), (3, 2), (0, 2),

In [5]:
nEps = 500
numIters = 50

DEFAULT_SETTINGS = {'seed': 1, 
                    'recFreq': 1, 
                    'dirPath': '../data/', 
                    'deBug': False, 
                    'nEps': nEps, 
                    'numIters': numIters, 
                    'saveTrajectory': True, 
                    'epLen' : epLen,
                    'render': False,
                    'pickle': False
                    }


revenue_env = gym.make('Airline-v0', config=CONFIG)
mon_env = Monitor(revenue_env)

### Specifying Agent

We specify 4 agents to compare effectiveness of each:

* `SB PPO`
* `Random`
* `BayesSelector`
* `BayesSelectorTraces`

In [6]:
agents = { # 'SB PPO': PPO(MlpPolicy, mon_env, gamma=1, verbose=0, n_steps=epLen),
'Random': or_suite.agents.rl.random.randomAgent(),
'BayesSelector': or_suite.agents.airline_revenue_management.bayes_selector.bayes_selectorAgent(epLen, round_flag=True),
'BayesSelectorTraces': or_suite.agents.airline_revenue_management.bayes_selector_traces.bayes_selector_tracesAgent(epLen, round_flag=True, dataset = dataset),
}

Next we run up the experiments.

In [7]:
path_list_line = []
algo_list_line = []
path_list_radar = []
algo_list_radar= []
for agent in agents:
    print(agent)
    DEFAULT_SETTINGS['dirPath'] = '../data/airline_'+str(agent)
    if agent == 'SB PPO':
        or_suite.utils.run_single_sb_algo(mon_env, agents[agent], DEFAULT_SETTINGS)
    else:
        or_suite.utils.run_single_algo(revenue_env, agents[agent], DEFAULT_SETTINGS)

    path_list_line.append('../data/airline_'+str(agent))
    algo_list_line.append(str(agent))
    path_list_radar.append('../data/airline_'+str(agent))
    algo_list_radar.append(str(agent))

Random
Writing to file data.csv
BayesSelector
Writing to file data.csv
BayesSelectorTraces
Writing to file data.csv


In [8]:
fig_path = '../figures/'
fig_name = 'revenue'+'_line_plot'+'.pdf'
or_suite.plots.plot_line_plots(path_list_line, algo_list_line, fig_path, fig_name, int(nEps / 40)+1)

In [9]:
from IPython.display import IFrame
IFrame("../figures/revenue_line_plot.pdf", width=600, height=280)