In [1]:
import os
os.environ['NUMEXPR_MAX_THREADS'] = '1'

import logging
import numexpr as ne
import numpy as np
import torch
import datetime
from ddopai.envs.pricing.dynamic import DynamicPricingEnv
from ddopai.envs.actionprocessors import ClipAction, RoundAction

from ddopai.experiments.experiment_functions_online import run_experiment
from ddopai.experiments.meta_experiment_functions import *
import requests
import yaml
import re
import pandas as pd
import wandb
from copy import deepcopy
import warnings
import gc


In [2]:
logging_level = logging.INFO
logging.basicConfig(level=logging_level)

ne.set_num_threads(1)
torch.backends.cudnn.enabled = False
torch.set_num_threads(1)

set_warnings(logging.INFO) # turn off warnings for any level higher or equal to the input level

PROJECT_NAME = "dynamic_pricing"
LIBRARIES_TO_TRACK = ["ddopai", "mushroom_rl"]
ENVCLASS = DynamicPricingEnv
RESULTS_DIR = "results"

# Experiment preparations
## Set-up WandB
### Init WandB

In [3]:
project_name = "pricing_test"

wandb.init(
    project=project_name,
    name = f"{project_name}_{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mtimlachner[0m. Use [1m`wandb login --relogin`[0m to force relogin


### Track library versions and git hash of experiment

# Experiment parameters

In [4]:
config_train, config_agent, config_env, AgentClass, agent_name = prep_experiment(
        PROJECT_NAME,
        LIBRARIES_TO_TRACK,
        config_train_name="config_train.yaml",
        config_agent_name="config_agent.yaml",
        config_env_name="config_env.yaml",
    )

INFO:root:ddopai: 0.0.7
INFO:root:mushroom_rl: 1.10.1
INFO:root:Git hash: eb256e3ce54b6be3348799526c2d46b30ddf04a5
INFO:root:Configuration file 'config_train.yaml' successfully loaded.
INFO:root:Configuration file 'config_agent.yaml' successfully loaded.
INFO:root:Configuration file 'config_env.yaml' successfully loaded.


## Prepare data
* Get the env parameters 
* Create the data and noise based on these parameters

In [5]:
raw_data, val_index_start, test_index_start = get_online_data(
        config_env,
        overwrite=False
    )

## Environment parameters

* Get the environment parameters from the config file 
* Overwrite the ```lag_window```parameter with the parameter specified in the agent, if it is specified (since lag window is provided by the environment, but a tunable hyperparameter of the agent)

In [6]:
round_action = RoundAction(unit_size=config_env["unit_size"])
postprocessors = [round_action]

environment = set_up_env_online(ENVCLASS, raw_data, val_index_start, test_index_start, config_env, postprocessors)

## Agent Parameter

In [7]:
logging.info(f"Agent: {agent_name}")


if AgentClass.train_mode == "env_interaction":
    if "link" in config_agent:
        glm_link, price_function = set_up_agent(AgentClass, environment, config_agent)
        config_agent["g"] = glm_link
        config_agent["price_function"] = price_function
        del config_agent["link"]
    agent = AgentClass(
        environment_info=environment.mdp_info,
        **config_agent
    )

else:
    raise ValueError("Invalid train_mode for online training")

INFO:root:Agent: Greedy


In [8]:
earlystoppinghandler = set_up_earlystoppinghandler(config_train)

In [11]:
run_experiment(
        agent,
        environment,
        n_epochs=config_train["n_epochs"],
        n_steps=config_train["n_steps"],
        early_stopping_handler=earlystoppinghandler,
        save_best=config_train["save_best"],
        run_id=wandb.run.id,
        tracking="wandb",
        eval_step_info=False,
        print_freq=1,
        results_dir = RESULTS_DIR,
        return_dataset=True,
        return_score=True
    )

Experiment directory: results/7xdul8ux


INFO:root:Starting experiment
  0%|          | 0/1 [00:00<?, ?it/s]INFO:root:Epoch 1: R=24765.899812447482, J=8.484775792623298
100%|██████████| 1/1 [00:41<00:00, 41.56s/it]


([np.float64(24765.899812447482)],
 [np.float64(8.484775792623298)],
 [[(array([[0.41003167]]),
    array([4.97726024]),
    np.float64(8.484775792623298),
    array([[0.41003167]]),
    False,
    False)],
  [(array([[0.41003167]]),
    array([4.97726024]),
    np.float64(16.40889973572651),
    array([[0.76964792]]),
    False,
    False)],
  [(array([[0.76964792]]),
    array([4.97726024]),
    np.float64(28.646459494748857),
    array([[1.16218691]]),
    False,
    False)],
  [(array([[1.16218691]]),
    array([4.97726024]),
    np.float64(26.962421173872897),
    array([[1.17506995]]),
    False,
    False)],
  [(array([[1.17506995]]),
    array([4.97726024]),
    np.float64(30.073317359953368),
    array([[1.17311897]]),
    False,
    False)],
  [(array([[1.17311897]]),
    array([4.97726024]),
    np.float64(31.816830338404824),
    array([[1.29775928]]),
    False,
    False)],
  [(array([[1.29775928]]),
    array([4.97726024]),
    np.float64(24.985077504130103),
    array([

In [12]:
wandb.finish()

In [None]:
"large-array"