In [2]:
import os
os.environ['NUMEXPR_MAX_THREADS'] = '1'

import logging
import numexpr as ne
import numpy as np
import torch
import datetime
from ddopai.envs.pricing.dynamic import DynamicPricingEnv
from ddopai.envs.actionprocessors import ClipAction, RoundAction

from ddopai.experiments.experiment_functions_online import run_experiment
from ddopai.experiments.meta_experiment_functions import *
import requests
import yaml
import re
import pandas as pd
import wandb
from copy import deepcopy
import warnings
import gc


In [3]:
logging_level = logging.INFO
logging.basicConfig(level=logging_level)

ne.set_num_threads(1)
torch.backends.cudnn.enabled = False
torch.set_num_threads(1)

set_warnings(logging.INFO) # turn off warnings for any level higher or equal to the input level

PROJECT_NAME = "dynamic_pricing"
LIBRARIES_TO_TRACK = ["ddopai", "mushroom_rl"]
ENVCLASS = DynamicPricingEnv
RESULTS_DIR = "results"

# Experiment preparations
## Set-up WandB
### Init WandB

In [4]:
project_name = "pricing_test"

wandb.init(
    project=project_name,
    name = f"{project_name}_{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/timlachner/.netrc


### Track library versions and git hash of experiment

# Experiment parameters

In [6]:
config_train, config_agent, config_env, AgentClass, agent_name = prep_experiment(
        PROJECT_NAME,
        LIBRARIES_TO_TRACK,
        config_train_name="config_train.yaml",
        config_agent_name="config_agent.yaml",
        config_env_name="config_env.yaml",
    )

INFO:root:ddopai: 0.0.7
INFO:root:mushroom_rl: 1.10.1
fatal: ambiguous argument 'HEAD': unknown revision or path not in the working tree.
Use '--' to separate paths from revisions, like this:
'git <command> [<revision>...] -- [<file>...]'


CalledProcessError: Command '['git', 'rev-parse', 'HEAD']' returned non-zero exit status 128.

## Agent parameters

* Get the agent parameters from the config file containing the parameters for all agent classes
* Get the agent class based on the agent name specified in the training params
* Get the agent-specific parameters based on the agent name specified in the training params


In [None]:
# open params from yaml file

with open("config_agent.yaml", "r") as stream:
    try:
        config_agent = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

AgentClass = select_agent(config_train["agent"])
config_agent = config_agent[config_train["agent"]]

# update wandb
wandb.config.update(config_agent)

config_agent

## Environment parameters

* Get the environment parameters from the config file 
* Overwrite the ```lag_window```parameter with the parameter specified in the agent, if it is specified (since lag window is provided by the environment, but a tunable hyperparameter of the agent)

In [None]:
# open params from yaml file

with open("config_env.yaml", "r") as stream:
    try:
        config_env = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

# check if config_agent has a key lag_window
if "lag_window" in config_agent:
    # check if lag-window is integer
    if isinstance(config_agent["lag_window"], int):
        # check if lag-window is positive
        config_env["lag_window_params"]["lag_window"] = config_agent["lag_window"]
    del config_agent["lag_window"]

normalize_features = config_env["normalize_features"]

# update wandb
wandb.config.update(config_env)

config_env

## Prepare data

In [None]:
datasetloader = DatasetLoader()

data = datasetloader.load_dataset(
    dataset_type = config_env["dataset_type"],
    dataset_number = config_env["dataset_number"],
    overwrite=False)

X = data["data_raw_features"]
X = np.round(X, 5)
Y = data["data_raw_target"]

val_index_start = len(X) - config_env["size_val"] - config_env["size_test"]
test_index_start = len(X) - config_env["size_test"]

In [None]:
# clip_action = ClipAction(lower=0, upper=config_env["env_kwargs"]["q_bound_high"])
round_action = RoundAction(unit_size=config_env["unit_size"])
postprocessors = [round_action]

dataloader = XYDataLoader(  X = X,
                            Y = Y,
                            val_index_start = val_index_start,
                            test_index_start = test_index_start,
                            lag_window_params = config_env["lag_window_params"],
                            normalize_features = {'normalize': normalize_features, 'ignore_one_hot': True})

environment = NewsvendorEnv(
    dataloader = dataloader,
    postprocessors = postprocessors,
    **config_env["env_kwargs"]
)

# check if config_train has eigher early_stopping_patience or early_stopping_warmup
if "early_stopping_patience" in config_train or "early_stopping_warmup" in config_train:
    warmup = config_train["early_stopping_warmup"] if "early_stopping_warmup" in config_train else 0
    patience = config_train["early_stopping_patience"] if "early_stopping_patience" in config_train else 0
    earlystoppinghandler = EarlyStoppingHandler(warmup=warmup, patience=warmup)
else:
    earlystoppinghandler = None

In [None]:
if AgentClass.train_mode == "direct_fit":
    agent = AgentClass(
        environment_info=environment.mdp_info,
        cu = config_env["env_kwargs"]["underage_cost"],
        co = config_env["env_kwargs"]["overage_cost"],
        **config_agent
    )
    
elif AgentClass.train_mode == "epochs_fit":
    agent = AgentClass(
        environment_info=environment.mdp_info,
        dataloader=dataloader,
        input_shape=environment.observation_space.shape,
        output_shape=environment.action_space.shape,
        cu = config_env["env_kwargs"]["underage_cost"],
        co = config_env["env_kwargs"]["overage_cost"],
        **config_agent
    )

elif AgentClass.train_mode == "env_interaction":
    agent = AgentClass(
        environment_info=environment.mdp_info,
        **config_agent
    )
    
else:
    raise ValueError("Invalid train_mode")

In [None]:
run_experiment(
                agent,
                environment,
                n_epochs = config_train["n_epochs"],
                n_steps = config_train["n_steps"],
                early_stopping_handler = earlystoppinghandler,
                save_best = config_train["save_best"],
                run_id = "test",
                tracking="wandb",
                eval_step_info = False,
                print_freq = 1
                )

environment.test()
agent.eval()

R, J = test_agent(  agent,
                    environment,
                    return_dataset=False,
                    tracking = "wandb")

# print R and J rounded to 1 decimal
print("final evaluation on test set: R =", np.round(R, 10), "J =", np.round(J, 10))

In [None]:
wandb.finish()