# Playing Differentiated Demand Environemnt

In [1]:
#Imports

from marketsai.markets.diff_demand import DiffDemand

#import ray

from ray import tune, shutdown, init
from ray.tune.registry import register_env
from ray.rllib.agents.a3c.a2c import A2CTrainer
from ray.rllib.agents.dqn.dqn import DQNTrainer
from ray.tune.integration.mlflow import MLflowLoggerCallback
from ray.rllib.utils.schedules.exponential_schedule import ExponentialSchedule

import random
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import logging

Instructions for updating:
non-resource variables are not supported in the long term
{'agent_0': array([1.6, 1.6], dtype=float32), 'agent_1': array([1.6, 1.6], dtype=float32)} {'agent_0': 0.27249236968976237, 'agent_1': 0.27249236968976237} {'__all__': False} {'agent_0': 1.6, 'agent_1': 1.6}


In [2]:
# STEP 0: Inititialize ray
NUM_CPUS = 12
shutdown()
init(num_cpus=NUM_CPUS, 
    logging_level=logging.ERROR,
)

{'node_ip_address': '192.168.1.202',
 'raylet_ip_address': '192.168.1.202',
 'redis_address': '192.168.1.202:31042',
 'object_store_address': '/tmp/ray/session_2021-04-09_19-44-19_049487_53831/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2021-04-09_19-44-19_049487_53831/sockets/raylet',
 'webui_url': '127.0.0.1:8265',
 'session_dir': '/tmp/ray/session_2021-04-09_19-44-19_049487_53831',
 'metrics_export_port': 58961,
 'node_id': '0d63c7ca6c283467164f22bc5e46f04de1ac69bd8f0d9d25ab2fd5ea'}

In [3]:
# STEP 1: register environment
register_env("diffdemand", DiffDemand)
env = DiffDemand()
policy_ids = [f"policy_{i}" for i in range(env.n_agents)]

In [4]:
# STEP 2: Experiment configuration

MAX_STEPS = 20 * 1000
PRICE_BAND_WIDE = 0.1
LOWER_PRICE = 1.47 - PRICE_BAND_WIDE
HIGHER_PRICE = 1.93 + PRICE_BAND_WIDE
DEC_RATE = math.e ** (-4 * 10 ** (-6))
DEC_RATE_HIGH = math.e ** (-4 * 10 ** (-6) * 4)

env_config = {
    "mkt_config": {
    "lower_price": [LOWER_PRICE for i in range(env.n_agents)],
    "higher_price": [HIGHER_PRICE for i in range(env.n_agents)],
    "parameteres": {
                "cost": [1 for i in range(env.n_agents)],
                "values": [2 for i in range(env.n_agents)],
                "ext_demand": 0,
                "substitution": 0.25,
    },
    "space_type": "MultiDiscrete",
    "gridpoints": 16,
    }
}

exploration_config = {
    "type": "EpsilonGreedy",
    "epsilon_schedule": ExponentialSchedule(
        schedule_timesteps=1,
        framework=None,
        initial_p=1,
        decay_rate=DEC_RATE,
    ),
}

training_config = {
    "gamma": 0.95,
    "lr": 0.15,
    "env": "diffdemand",
    "exploration_config": exploration_config,
    "env_config": env_config,
    "horizon": 100,
    "soft_horizon": True,
    "no_done_at_end": True,
    "multiagent": {
        "policies": {
            policy_ids[i]: (
                None,
                env.observation_space["agent_{}".format(i)],
                env.action_space["agent_{}".format(i)],
                {},
            )
            for i in range(env.n_agents)
        },
        "policy_mapping_fn": (lambda agent_id: policy_ids[int(agent_id.split("_")[1])]),
    },
    "framework": "torch",
    "num_workers": NUM_CPUS - 1,
    "num_gpus": 0,
}

stop = {"info/num_steps_trained": MAX_STEPS}

In [None]:
#Step 3: Experiments

exp_name = "DQN_test_April9"
results = tune.run(
    "DQN",
    name=exp_name,
    config=training_config,
    checkpoint_freq=250,
    checkpoint_at_end=True,
    stop=stop,
    metric="episode_reward_mean",
    mode="max",
    callbacks=[MLflowLoggerCallback(experiment_name=exp_name, save_artifact=True)],
)

best_checkpoint = results.best_checkpoint
print("Best checkpont:", best_checkpoint)

## Continuous Space


In [5]:
env_config["mkt_config"]["space_type"] = "Continuous"
env=DiffDemand(env_config)
training_config["env_config"] = env_config
training_config["multiagent"]["policies"] =  {
            policy_ids[i]: (None, env.observation_space[f"agent_{i}"], env.action_space[f"agent_{i}"],{},) for i in range(env.n_agents)
}
#print(env_config)
print(training_config)
print(env.action_space)


{'gamma': 0.95, 'lr': 0.15, 'env': 'diffdemand', 'exploration_config': {'type': 'EpsilonGreedy', 'epsilon_schedule': <ray.rllib.utils.schedules.exponential_schedule.ExponentialSchedule object at 0x1a08fc730>}, 'env_config': {'mkt_config': {'lower_price': [1.3699999999999999, 1.3699999999999999], 'higher_price': [2.03, 2.03], 'parameteres': {'cost': [1, 1], 'values': [2, 2], 'ext_demand': 0, 'substitution': 0.25}, 'space_type': 'Continuous', 'gridpoints': 16}}, 'horizon': 100, 'soft_horizon': True, 'no_done_at_end': True, 'multiagent': {'policies': {'policy_0': (None, Box(1.3700000047683716, 2.0299999713897705, (2,), float32), Box(1.3700000047683716, 2.0299999713897705, (1,), float32), {}), 'policy_1': (None, Box(1.3700000047683716, 2.0299999713897705, (2,), float32), Box(1.3700000047683716, 2.0299999713897705, (1,), float32), {})}, 'policy_mapping_fn': <function <lambda> at 0x1a08e10d0>}, 'framework': 'torch', 'num_workers': 11, 'num_gpus': 0}
{'agent_0': Box(1.3700000047683716, 2.0299

In [7]:
exp_name = "DDPG_cont_test_April9"
results = tune.run(
    "DDPG",
    name=exp_name,
    config=training_config,
    checkpoint_freq=250,
    checkpoint_at_end=True,
    stop=stop,
    metric="episode_reward_mean",
    mode="max",
    callbacks=[MLflowLoggerCallback(experiment_name=exp_name, save_artifact=True)],
)

best_checkpoint = results.best_checkpoint
print("Best checkpont:", best_checkpoint)

Trial name,status,loc
DDPG_diffdemand_b3cbd_00000,RUNNING,


[2m[36m(pid=55656)[0m   torch.from_numpy(self.action_space.low).float())
[2m[36m(pid=55658)[0m   torch.from_numpy(self.action_space.low).float())
[2m[36m(pid=55662)[0m   torch.from_numpy(self.action_space.low).float())
[2m[36m(pid=55660)[0m   torch.from_numpy(self.action_space.low).float())
[2m[36m(pid=55664)[0m   torch.from_numpy(self.action_space.low).float())
[2m[36m(pid=55650)[0m {'agent_0': array([1.6, 1.6], dtype=float32), 'agent_1': array([1.6, 1.6], dtype=float32)} {'agent_0': 0.27249236968976237, 'agent_1': 0.27249236968976237} {'__all__': False} {'agent_0': 1.6, 'agent_1': 1.6}
[2m[36m(pid=55649)[0m {'agent_0': array([1.6, 1.6], dtype=float32), 'agent_1': array([1.6, 1.6], dtype=float32)} {'agent_0': 0.27249236968976237, 'agent_1': 0.27249236968976237} {'__all__': False} {'agent_0': 1.6, 'agent_1': 1.6}
[2m[36m(pid=55648)[0m {'agent_0': array([1.6, 1.6], dtype=float32), 'agent_1': array([1.6, 1.6], dtype=float32)} {'agent_0': 0.27249236968976237, 'agen

Trial name,status,loc
DDPG_diffdemand_b3cbd_00000,ERROR,

Trial name,# failures,error file
DDPG_diffdemand_b3cbd_00000,1,/Users/matiascovarrubias/ray_results/DDPG_cont_test_April9/DDPG_diffdemand_b3cbd_00000_0_2021-04-09_19-45-45/error.txt


Trial name,status,loc
DDPG_diffdemand_b3cbd_00000,ERROR,

Trial name,# failures,error file
DDPG_diffdemand_b3cbd_00000,1,/Users/matiascovarrubias/ray_results/DDPG_cont_test_April9/DDPG_diffdemand_b3cbd_00000_0_2021-04-09_19-45-45/error.txt


TuneError: ('Trials did not complete', [DDPG_diffdemand_b3cbd_00000])