# Revenue Management Simulations

In this notebook we will run some simulations to compare Hindsight Learning to standard Q learning in revenue management problems.

Note: You may get some errors + warnings around calculating the time and space that the algorithms are using.  This might be due to your operating system, so please ignore those numbers.

### Package Installation

In [None]:
import or_suite
import numpy as np

import copy

import os
from stable_baselines3.common.monitor import Monitor
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
import pandas as pd


import gym

### Test 1 T = 5

We start off by doing a small scale simulation with a horizon of five.

In [None]:
CONFIG =  or_suite.envs.env_configs.airline_default_config
epLen = 5
num_traces = 100
CONFIG['epLen'] = epLen
CONFIG['P'] = np.asarray([[1/3, 1/3] for _ in range(epLen+1)])

In [None]:
dataset = []
for _ in range(num_traces): # samples traces
    for timestep in range(CONFIG['epLen']): # each of length of the time horizon
        # samples a customer type according to that step's distribution
        pDist = np.append(np.copy(CONFIG['P'][timestep, :]), 1 - np.sum(CONFIG['P'][timestep, :]))

        dataset.append((timestep, np.random.choice(a = CONFIG['A'].shape[1]+1, p = pDist)))

# RUNS EXPERIMENT
nEps = 1
        
numIters = 500

DEFAULT_SETTINGS = {'seed': 1, 
                    'recFreq': 1, 
                    'dirPath': '../data/', 
                    'deBug': False, 
                    'nEps': nEps, 
                    'numIters': numIters, 
                    'saveTrajectory': True, 
                    'epLen' : epLen,
                    'render': False,
                    'pickle': False
                    }

revenue_env = gym.make('Airline-v0', config=CONFIG)
mon_env = Monitor(revenue_env)


####### TRAINS Q LEARNING ALGORITHMS
nEps = num_traces
numIters = 1

revenue_env = gym.make('Airline-v0', config=CONFIG)
q_l_agent = or_suite.agents.airline_revenue_management.discrete_ql_data.DiscreteQl_Data(revenue_env.action_space, revenue_env.observation_space, epLen, 1)
or_suite.utils.run_single_algo(revenue_env, q_l_agent, DEFAULT_SETTINGS)






agents = { 'Q Learning': q_l_agent,
'Random': or_suite.agents.rl.random.randomAgent(),
'Pi Dagger': or_suite.agents.airline_revenue_management.bayes_selector.bayes_selectorAgent(epLen, round_flag=True),
'Hindsight Learning': or_suite.agents.airline_revenue_management.bayes_selector_traces.bayes_selector_tracesAgent(epLen, round_flag=True, dataset = dataset),
'Greedy': or_suite.agents.airline_revenue_management.greedy.greedyAgent(epLen)
}

path_list_line = []
algo_list_line = []
path_list_radar = []
algo_list_radar= []
for agent in agents:
    print(agent)
    DEFAULT_SETTINGS['dirPath'] = '../data/airline_'+str(agent)
    if agent == 'SB PPO':
        or_suite.utils.run_single_sb_algo(mon_env, agents[agent], DEFAULT_SETTINGS)
    else:
        or_suite.utils.run_single_algo(revenue_env, agents[agent], DEFAULT_SETTINGS)

    path_list_line.append('../data/airline_'+str(agent))
    algo_list_line.append(str(agent))
    path_list_radar.append('../data/airline_'+str(agent))
    algo_list_radar.append(str(agent))
    
fig_path = '../figures/'
fig_name = 'revenue'+'_line_plot'+'.pdf'
or_suite.plots.plot_radar_plots(path_list_line, algo_list_line, fig_path, fig_name, {})

### Test 2: T = 10

In [None]:
CONFIG =  or_suite.envs.env_configs.airline_default_config
epLen = 10
num_traces = 100
CONFIG['epLen'] = epLen
CONFIG['P'] = np.asarray([[1/3, 1/3] for _ in range(epLen+1)])

In [None]:
dataset = []
for _ in range(num_traces): # samples traces
    for timestep in range(CONFIG['epLen']): # each of length of the time horizon
        # samples a customer type according to that step's distribution
        pDist = np.append(np.copy(CONFIG['P'][timestep, :]), 1 - np.sum(CONFIG['P'][timestep, :]))

        dataset.append((timestep, np.random.choice(a = CONFIG['A'].shape[1]+1, p = pDist)))

# RUNS EXPERIMENT
nEps = 1
        
numIters = 500

DEFAULT_SETTINGS = {'seed': 1, 
                    'recFreq': 1, 
                    'dirPath': '../data/', 
                    'deBug': False, 
                    'nEps': nEps, 
                    'numIters': numIters, 
                    'saveTrajectory': True, 
                    'epLen' : epLen,
                    'render': False,
                    'pickle': False
                    }

revenue_env = gym.make('Airline-v0', config=CONFIG)
mon_env = Monitor(revenue_env)


####### TRAINS Q LEARNING ALGORITHMS
nEps = num_traces
numIters = 1

revenue_env = gym.make('Airline-v0', config=CONFIG)
q_l_agent = or_suite.agents.airline_revenue_management.discrete_ql_data.DiscreteQl_Data(revenue_env.action_space, revenue_env.observation_space, epLen, 1)
or_suite.utils.run_single_algo(revenue_env, q_l_agent, DEFAULT_SETTINGS)






agents = { 'Q Learning': q_l_agent,
'Random': or_suite.agents.rl.random.randomAgent(),
'Pi Dagger': or_suite.agents.airline_revenue_management.bayes_selector.bayes_selectorAgent(epLen, round_flag=True),
'Hindsight Learning': or_suite.agents.airline_revenue_management.bayes_selector_traces.bayes_selector_tracesAgent(epLen, round_flag=True, dataset = dataset),
'Greedy': or_suite.agents.airline_revenue_management.greedy.greedyAgent(epLen)
         }

path_list_line = []
algo_list_line = []
path_list_radar = []
algo_list_radar= []
for agent in agents:
    print(agent)
    DEFAULT_SETTINGS['dirPath'] = '../data/airline_'+str(agent)
    if agent == 'SB PPO':
        or_suite.utils.run_single_sb_algo(mon_env, agents[agent], DEFAULT_SETTINGS)
    else:
        or_suite.utils.run_single_algo(revenue_env, agents[agent], DEFAULT_SETTINGS)

    path_list_line.append('../data/airline_'+str(agent))
    algo_list_line.append(str(agent))
    path_list_radar.append('../data/airline_'+str(agent))
    algo_list_radar.append(str(agent))
    
fig_path = '../figures/'
fig_name = 'revenue'+'_line_plot'+'.pdf'
or_suite.plots.plot_radar_plots(path_list_line, algo_list_line, fig_path, fig_name, {})

### Test 3: T = 100

In [None]:
CONFIG =  or_suite.envs.env_configs.airline_default_config
epLen = 100
num_traces = 100
CONFIG['epLen'] = epLen
CONFIG['P'] = np.asarray([[1/3, 1/3] for _ in range(epLen+1)])

In [None]:
dataset = []
for _ in range(num_traces): # samples traces
    for timestep in range(CONFIG['epLen']): # each of length of the time horizon
        # samples a customer type according to that step's distribution
        pDist = np.append(np.copy(CONFIG['P'][timestep, :]), 1 - np.sum(CONFIG['P'][timestep, :]))

        dataset.append((timestep, np.random.choice(a = CONFIG['A'].shape[1]+1, p = pDist)))

# RUNS EXPERIMENT
nEps = 1
        
numIters = 500

DEFAULT_SETTINGS = {'seed': 1, 
                    'recFreq': 1, 
                    'dirPath': '../data/', 
                    'deBug': False, 
                    'nEps': nEps, 
                    'numIters': numIters, 
                    'saveTrajectory': True, 
                    'epLen' : epLen,
                    'render': False,
                    'pickle': False
                    }

revenue_env = gym.make('Airline-v0', config=CONFIG)
mon_env = Monitor(revenue_env)


####### TRAINS Q LEARNING ALGORITHMS
nEps = num_traces
numIters = 1

revenue_env = gym.make('Airline-v0', config=CONFIG)
q_l_agent = or_suite.agents.airline_revenue_management.discrete_ql_data.DiscreteQl_Data(revenue_env.action_space, revenue_env.observation_space, epLen, 1)
or_suite.utils.run_single_algo(revenue_env, q_l_agent, DEFAULT_SETTINGS)






agents = { 'Q Learning': q_l_agent,
'Random': or_suite.agents.rl.random.randomAgent(),
'Pi Dagger': or_suite.agents.airline_revenue_management.bayes_selector.bayes_selectorAgent(epLen, round_flag=True),
'Hindsight Learning': or_suite.agents.airline_revenue_management.bayes_selector_traces.bayes_selector_tracesAgent(epLen, round_flag=True, dataset = dataset),
'Greedy': or_suite.agents.airline_revenue_management.greedy.greedyAgent(epLen)
         }

path_list_line = []
algo_list_line = []
path_list_radar = []
algo_list_radar= []
for agent in agents:
    print(agent)
    DEFAULT_SETTINGS['dirPath'] = '../data/airline_'+str(agent)
    if agent == 'SB PPO':
        or_suite.utils.run_single_sb_algo(mon_env, agents[agent], DEFAULT_SETTINGS)
    else:
        or_suite.utils.run_single_algo(revenue_env, agents[agent], DEFAULT_SETTINGS)

    path_list_line.append('../data/airline_'+str(agent))
    algo_list_line.append(str(agent))
    path_list_radar.append('../data/airline_'+str(agent))
    algo_list_radar.append(str(agent))
    
fig_path = '../figures/'
fig_name = 'revenue'+'_line_plot'+'.pdf'
or_suite.plots.plot_radar_plots(path_list_line, algo_list_line, fig_path, fig_name, {})