In [1]:
import os
os.environ['NUMEXPR_MAX_THREADS'] = '1'

import logging
import numexpr as ne
import numpy as np
import torch
import datetime
from ddopai.envs.pricing.dynamic import DynamicPricingEnv
from ddopai.envs.actionprocessors import ClipAction, RoundAction

from ddopai.experiments.experiment_functions_online import run_experiment
from ddopai.experiments.meta_experiment_functions import *
import requests
import yaml
import re
import pandas as pd
import wandb
from copy import deepcopy
import warnings
import gc


In [2]:
logging_level = logging.INFO
logging.basicConfig(level=logging_level)

ne.set_num_threads(1)
torch.backends.cudnn.enabled = False
torch.set_num_threads(1)

set_warnings(logging.INFO) # turn off warnings for any level higher or equal to the input level

PROJECT_NAME = "dynamic_pricing"
LIBRARIES_TO_TRACK = ["ddopai", "mushroom_rl"]
ENVCLASS = DynamicPricingEnv
RESULTS_DIR = "results"

# Experiment preparations
## Set-up WandB
### Init WandB

In [3]:
project_name = "pricing_test"

wandb.init(
    project=project_name,
    name = f"{project_name}_{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mtimlachner[0m. Use [1m`wandb login --relogin`[0m to force relogin


### Track library versions and git hash of experiment

# Experiment parameters

In [4]:
config_train, config_agent, config_env, AgentClass, agent_name = prep_experiment(
        PROJECT_NAME,
        LIBRARIES_TO_TRACK,
        config_train_name="config_train.yaml",
        config_agent_name="config_agent.yaml",
        config_env_name="config_env.yaml",
    )

INFO:root:ddopai: 0.0.7
INFO:root:mushroom_rl: 1.10.1
INFO:root:Git hash: ff705cd2613ee9fa1e7927cfd1843367795727ff
INFO:root:Configuration file 'config_train.yaml' successfully loaded.
INFO:root:Configuration file 'config_agent.yaml' successfully loaded.
INFO:root:Configuration file 'config_env.yaml' successfully loaded.


## Prepare data
* Get the env parameters 
* Create the data and noise based on these parameters

In [5]:
raw_data, val_index_start, test_index_start = get_online_data(
        config_env,
        overwrite=False
    )

In [6]:
raw_data[0][0]

array([[0.79370963]])

## Environment parameters

* Get the environment parameters from the config file 
* Overwrite the ```lag_window```parameter with the parameter specified in the agent, if it is specified (since lag window is provided by the environment, but a tunable hyperparameter of the agent)

In [7]:
round_action = RoundAction(unit_size=config_env["unit_size"])
postprocessors = [round_action]

environment = set_up_env_online(ENVCLASS, raw_data, val_index_start, test_index_start, config_env, postprocessors)

## Agent Parameter

In [8]:
logging.info(f"Agent: {agent_name}")


if AgentClass.train_mode == "env_interaction":
    if "link" in config_agent:
        glm_link, price_function = set_up_agent(AgentClass, environment, config_agent)
        config_agent["g"] = glm_link
        config_agent["price_function"] = price_function
        del config_agent["link"]
    agent = AgentClass(
        environment_info=environment.mdp_info,
        **config_agent
    )

else:
    raise ValueError("Invalid train_mode for online training")

INFO:root:Agent: SAC
INFO:root:Actor network (mu network):


Layer (type:depth-idx)                   Output Shape              Param #
MLPActor                                 [1, 1]                    --
├─Sequential: 1-1                        [1, 1]                    --
│    └─Linear: 2-1                       [1, 64]                   128
│    └─ReLU: 2-2                         [1, 64]                   --
│    └─Dropout: 2-3                      [1, 64]                   --
│    └─Linear: 2-4                       [1, 1]                    65
│    └─Identity: 2-5                     [1, 1]                    --
Total params: 193
Trainable params: 193
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


INFO:root:################################################################################
INFO:root:Critic network:


Layer (type:depth-idx)                   Output Shape              Param #
MLPStateAction                           --                        --
├─Sequential: 1-1                        [1, 1]                    --
│    └─Linear: 2-1                       [1, 64]                   192
│    └─ReLU: 2-2                         [1, 64]                   --
│    └─Dropout: 2-3                      [1, 64]                   --
│    └─Linear: 2-4                       [1, 1]                    65
│    └─Identity: 2-5                     [1, 1]                    --
Total params: 257
Trainable params: 257
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


In [9]:
earlystoppinghandler = set_up_earlystoppinghandler(config_train)

In [10]:
dataset = run_experiment(
        agent,
        environment,
        n_epochs=config_train["n_epochs"],
        n_steps=config_train["n_steps"],
        early_stopping_handler=earlystoppinghandler,
        save_best=config_train["save_best"],
        run_id=wandb.run.id,
        tracking="wandb",
        eval_step_info=False,
        print_freq=1,
        results_dir = RESULTS_DIR,
        return_dataset=True,
        return_score=False
    )

INFO:root:Starting experiment


Experiment directory: results/n1atzueg


  0%|          | 0/3 [00:00<?, ?it/s]INFO:root:Epoch 1: R=119384.34406709572, J=17.374767376265627
 33%|███▎      | 1/3 [00:07<00:14,  7.07s/it]INFO:root:Epoch 2: R=54225.346723543116, J=0.4970029411814527
 67%|██████▋   | 2/3 [00:14<00:07,  7.05s/it]INFO:root:Epoch 3: R=38812.00397098696, J=6.810134545727945
100%|██████████| 3/3 [00:21<00:00,  7.14s/it]


[(array([1.42350822]), array([2.920124], dtype=float32), np.float64(6.810134545727945), array([1.42350822]), False, False)]


In [11]:
wandb.finish()