In [1]:
import os
os.environ['NUMEXPR_MAX_THREADS'] = '1'

import logging
import numexpr as ne
import numpy as np
import torch
import datetime
from ddopai.envs.pricing.dynamic import DynamicPricingEnv
from ddopai.envs.pricing.dynamic_inventory import DynamicPricingInvEnv
from ddopai.envs.actionprocessors import ClipAction, RoundAction

from ddopai.experiments.experiment_functions_online import run_experiment
from ddopai.experiments.meta_experiment_functions import *
import requests
import yaml
import re
import pandas as pd
import wandb
from copy import deepcopy
import warnings
import gc
from mushroom_rl import core 

In [2]:
logging_level = logging.INFO
logging.basicConfig(level=logging_level)

ne.set_num_threads(1)
torch.backends.cudnn.enabled = False
torch.set_num_threads(1)

set_warnings(logging.INFO) # turn off warnings for any level higher or equal to the input level

PROJECT_NAME = "pricing_cMDP_test"
LIBRARIES_TO_TRACK = ["ddopai", "mushroom_rl"]
ENVCLASS = DynamicPricingEnv
RESULTS_DIR = "results"
def get_ENVCLASS(class_name):
    if class_name == "DynamicPricingEnv":
        return DynamicPricingEnv
    elif class_name == "DynamicPricingInvEnv":
        return DynamicPricingInvEnv
    else:
        raise ValueError(f"Unknown class name {class_name}")

# Experiment preparations
## Set-up WandB
### Init WandB

In [3]:
project_name = "pricing_cMDP_test"

wandb.init(
    project=project_name,
    name = f"{project_name}_{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mtimlachner[0m. Use [1m`wandb login --relogin`[0m to force relogin


### Track library versions and git hash of experiment

# Experiment parameters

In [4]:
config_train, config_agent, config_env, AgentClass, agent_name = prep_experiment(
        PROJECT_NAME,
        LIBRARIES_TO_TRACK,
        config_train_name="config_train.yaml",
        config_agent_name="config_agent.yaml",
        config_env_name="config_env.yaml",
    )

INFO:root:ddopai: 0.0.7
INFO:root:mushroom_rl: 1.10.1
INFO:root:Git hash: 4b09580a1615ee094fe974afd014071f36e821bb
INFO:root:Configuration file 'config_train.yaml' successfully loaded.
INFO:root:Configuration file 'config_agent.yaml' successfully loaded.
INFO:root:Configuration file 'config_env.yaml' successfully loaded.


## Prepare data
* Get the env parameters 
* Create the data and noise based on these parameters

In [5]:
raw_data, val_index_start, test_index_start = get_online_data(
        config_env,
        overwrite=False
    )

In [6]:
raw_data[0][0]

array([[1.        , 1.01494341, 1.50438407, 1.56887237, 1.95534569,
        0.44374949, 0.94637045, 1.29128228, 0.09025757, 1.47603848]])

## Environment parameters

* Get the environment parameters from the config file 
* Overwrite the ```lag_window```parameter with the parameter specified in the agent, if it is specified (since lag window is provided by the environment, but a tunable hyperparameter of the agent)

In [7]:
round_action = RoundAction(unit_size=config_env["unit_size"])
postprocessors = [round_action]
ENVCLASS = get_ENVCLASS(config_env["env_class"])
environment = set_up_env_online(ENVCLASS, raw_data, val_index_start, test_index_start, config_env, postprocessors)

## Agent Parameter

In [8]:
logging.info(f"Agent: {agent_name}")


if AgentClass.train_mode == "env_interaction":
    if "link" in config_agent:
        glm_link, price_function = set_up_agent(AgentClass, environment, config_agent)
        config_agent["g"] = glm_link
        config_agent["price_function"] = price_function
        del config_agent["link"]
    agent = AgentClass(
        environment_info=environment.mdp_info,
        **config_agent
    )

else:
    raise ValueError("Invalid train_mode for online training")

INFO:root:Agent: SAC
INFO:root:Actor network (mu network):


Layer (type:depth-idx)                   Output Shape              Param #
MLPActor                                 [1, 1]                    --
├─Sequential: 1-1                        [1, 1]                    --
│    └─Linear: 2-1                       [1, 64]                   768
│    └─ReLU: 2-2                         [1, 64]                   --
│    └─Dropout: 2-3                      [1, 64]                   --
│    └─Linear: 2-4                       [1, 32]                   2,080
│    └─ReLU: 2-5                         [1, 32]                   --
│    └─Dropout: 2-6                      [1, 32]                   --
│    └─Linear: 2-7                       [1, 1]                    33
│    └─Identity: 2-8                     [1, 1]                    --
Total params: 2,881
Trainable params: 2,881
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.01
Estimated Total Size (MB): 0.01


INFO:root:################################################################################
INFO:root:Critic network:


Layer (type:depth-idx)                   Output Shape              Param #
MLPStateAction                           --                        --
├─Sequential: 1-1                        [1, 1]                    --
│    └─Linear: 2-1                       [1, 64]                   832
│    └─ReLU: 2-2                         [1, 64]                   --
│    └─Dropout: 2-3                      [1, 64]                   --
│    └─Linear: 2-4                       [1, 32]                   2,080
│    └─ReLU: 2-5                         [1, 32]                   --
│    └─Dropout: 2-6                      [1, 32]                   --
│    └─Linear: 2-7                       [1, 1]                    33
│    └─Identity: 2-8                     [1, 1]                    --
Total params: 2,945
Trainable params: 2,945
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.01
Estimated Total Size (MB): 0.01


In [9]:
earlystoppinghandler = set_up_earlystoppinghandler(config_train)

In [10]:
dataset = run_experiment(
        agent,
        environment,
        n_epochs=config_train["n_epochs"],
        n_steps=config_train["n_steps"],
        early_stopping_handler=earlystoppinghandler,
        save_best=config_train["save_best"],
        run_id=wandb.run.id,
        tracking="wandb",
        eval_step_info=False,
        print_freq=1,
        results_dir = RESULTS_DIR,
        return_dataset=True,
        return_score=False
    )

INFO:root:Starting experiment


Experiment directory: results/vc1j0dnm


  0%|          | 0/10 [00:00<?, ?it/s]INFO:root:Epoch 1: R=112039.85879082026, J=0.0
 10%|█         | 1/10 [00:18<02:44, 18.28s/it]INFO:root:Epoch 2: R=110018.47960741427, J=14.713048138015067
 20%|██        | 2/10 [00:35<02:22, 17.79s/it]INFO:root:Epoch 3: R=110379.04887421588, J=14.454903980042761
 30%|███       | 3/10 [00:52<02:02, 17.51s/it]INFO:root:Epoch 4: R=108710.89633117587, J=10.915007102758727
 40%|████      | 4/10 [01:10<01:44, 17.40s/it]INFO:root:Epoch 5: R=112882.14108779938, J=16.925065217491216
 50%|█████     | 5/10 [01:27<01:27, 17.48s/it]INFO:root:Epoch 6: R=115148.67517540854, J=13.445947940833475
 60%|██████    | 6/10 [01:46<01:11, 17.87s/it]INFO:root:Epoch 7: R=111854.66789164975, J=11.63262676080817
 70%|███████   | 7/10 [02:05<00:54, 18.22s/it]INFO:root:Epoch 8: R=109677.35784091488, J=8.326761756892571
 80%|████████  | 8/10 [02:22<00:35, 17.89s/it]INFO:root:Epoch 9: R=112767.31882425926, J=10.998037237158425
 90%|█████████ | 9/10 [02:40<00:17, 17.82s/it]INFO:ro

In [11]:
wandb.finish()

0,1
Action,▁▅▂▆▅▅█▄▃▄▆▅▄▃▃▅▃▅▆▄▆▄▅▆▇▅▅▅▃▅▄▅▅▄▅█▅▆▇▆
Action_0,▁▅▇▄██▆▆▅▄▆▆▅▃▅▄▃▅▅▄▅▄▅▅▅▅▄▅▆▄▅▄▅▆▄▄▆▄█▄
Action_1,▇▆▃▃▁▃▆▄▄▄▅▄▇▃▅▇▆▅▃▃▅█▅▆▄▅▇▄▃▃▄▅▅▂▄▃▅▃█▃
Action_2,▅▃▃▅█▅▆▄▅▄▅▅▅▃▅▄▆▃▁▂▄▇▅▃▆▇▇▃▄▂▁▃▃▅▆▁▄▅▄▃
Action_3,▅▅▃█▇▁▇▂▅▅▃▆▅▅▄▆▄▆▄▃▃▅▄▅▂▅▂▃▄▅▅▄▄▄▄▅▅▂▅▄
Action_4,▆▃▃▂▂▃▂▂▃▄▂▅▄▄▃▅▃▃▁▅▂▄▄▄▅▃▃▄▃▄▆█▃▃▇▄▄▅▄▄
Action_5,▄▄▁▆▅▇▂▅█▄▄▃▃▄▄▇▃▄▄▄▄▄▅▄▅▆▅▄▄▅▁▄▃▆▅▅▅▅▄▄
Action_6,▃▄▃▇▅▆▆▅▆▆▆▅▃▃▂▅▄▁▄▃▄▅▂▂▄▄▃▂▂▃▃▃▃▂▄▆▃▅▃█
Action_7,▇▃▅▄▅▄▁▆▅▆▅█▅▅▄▆▄▅█▃▇▆▇▆▆▅▄▆▅▆▆▅▆▆▆▆▅▆▆▆
Action_8,▅▄▄▃▂▆▃▃▆▄▂▅▃▅▄▅▅▃▅▅▅▅▆▃▇▃▅▂▃▄▆▁▂▅▃▂▅▆█▃

0,1
Action,2.38
Action_0,2.73
Action_1,2.23
Action_2,3.84
Action_3,2.91
Action_4,3.18
Action_5,2.71
Action_6,3.53
Action_7,3.29
Action_8,2.69
