In [9]:
#Imports

from marketsai.markets.diff_demand import DiffDemand
from marketsai.economies.economy_constructor import Economy

#import ray

from ray import tune, shutdown, init
from ray.tune.registry import register_env
from ray.rllib.agents.a3c.a2c import A2CTrainer
from ray.rllib.agents.dqn.dqn import DQNTrainer
from ray.tune.integration.mlflow import MLflowLoggerCallback
from ray.rllib.utils.schedules.exponential_schedule import ExponentialSchedule

import random
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import logging

In [10]:
# STEP 0: Inititialize ray

NUM_CPUS = 11
shutdown()
init(num_cpus=NUM_CPUS, logging_level=logging.ERROR)

{'node_ip_address': '192.168.1.202',
 'raylet_ip_address': '192.168.1.202',
 'redis_address': '192.168.1.202:47169',
 'object_store_address': '/tmp/ray/session_2021-04-13_18-59-38_074167_13555/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2021-04-13_18-59-38_074167_13555/sockets/raylet',
 'webui_url': '127.0.0.1:8266',
 'session_dir': '/tmp/ray/session_2021-04-13_18-59-38_074167_13555',
 'metrics_export_port': 61480,
 'node_id': 'dc7345f4a39547cb5639ea780261d8b24a15c8f72ab73086d085cfd3'}

In [11]:
# STEP 1: register environment

register_env("economy", Economy)
env = Economy()
policy_ids = ["policy_{}".format(i) for i in range(env.n_agents)]

In [12]:
# STEP 2: configuration

#Experiment configuration
test=True
date="April13"
env_label="Econ"
if (test==True): 
    MAX_STEPS = 20 * 1000
    exp_label =env_label+"_test_"+date+"_"
else: 
    MAX_STEPS = 3000 * 1000
    exp_label ="_run_"+date 

verbosity=2
stop = {"episodes_total": MAX_STEPS//100}

# Markets configuration
PRICE_BAND_WIDE = 0.1
LOWER_PRICE = 1.47 - PRICE_BAND_WIDE
HIGHER_PRICE = 1.93 + PRICE_BAND_WIDE
DEC_RATE = float(math.e ** (-4 * 10 ** (-6)))
DEC_RATE_HIGH = float(math.e ** (-4 * 10 ** (-6) * 4))

mkt_config = {
    "lower_price": [LOWER_PRICE for i in range(env.n_agents)],
    "higher_price": [HIGHER_PRICE for i in range(env.n_agents)],
    "parameteres": {
                "cost": [1 for i in range(env.n_agents)],
                "values": [2 for i in range(env.n_agents)],
                "ext_demand": 0,
                "substitution": 0.25,
    },
    "space_type": "MultiDiscrete",
    "gridpoints": 16,
    }

# environment (economy) configuration
env_config = {"markets_dict": {"market_0": (DiffDemand, mkt_config), "market_1": (DiffDemand, mkt_config)}}

#exploration configuration
exploration_config = {
    "type": "EpsilonGreedy",
    "epsilon_schedule": ExponentialSchedule(
        schedule_timesteps=1,
        framework="Torch",
        initial_p=1,
        decay_rate=DEC_RATE,
    ),
}

#training configuration
training_config = {
    "gamma": 0.95,
    "lr": 0.15,
    "env": "economy",
    "exploration_config": exploration_config,
    "env_config": env_config,
    "horizon": 100,
    "soft_horizon": True,
    "no_done_at_end": True,
    "multiagent": {
        "policies": {
            policy_ids[i]: (
                None,
                env.observation_space["agent_{}".format(i)],
                env.action_space["agent_{}".format(i)],
                {},
            )
            for i in range(env.n_agents)
        },
        "policy_mapping_fn": (lambda agent_id: policy_ids[int(agent_id.split("_")[1])]),
    },
    "framework": "torch",
    "num_workers": NUM_CPUS - 1,
    "num_gpus": 0,
    "log_level": "ERROR",
    "timesteps_per_iteration": 1000,
    "normalize_actions": False,
}


In [13]:
print(env.observation_space["agent_0"])

Tuple(MultiDiscrete([16 16]), MultiDiscrete([16 16]))


In [18]:
#Policy Gradient Methods: PG, A2C, A3C, PPO, APPO

# algo_list=["PG", "A2C", "A3C", "PPO", "APPO"]
algo_list=["PPO"]
for i in range(len(algo_list)):
    exp_name = exp_label + algo_list[i]
    results = tune.run(
        algo_list[i],
        name=exp_name,
        config=training_config,
        #checkpoint_freq=250,
        checkpoint_at_end=True,
        stop=stop,
        callbacks=[MLflowLoggerCallback(experiment_name=exp_name, save_artifact=True)],
        verbose=verbosity
    )

ompat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
[2m[36m(pid=19478)[0m Instructions for updating:
[2m[36m(pid=19478)[0m non-resource variables are not supported in the long term
[2m[36m(pid=19478)[0m Instructions for updating:
[2m[36m(pid=19478)[0m non-resource variables are not supported in the long term
[2m[36m(pid=19481)[0m Instructions for updating:
[2m[36m(pid=19481)[0m non-resource variables are not supported in the long term
[2m[36m(pid=19481)[0m Instructions for updating:
[2m[36m(pid=19481)[0m non-resource variables are not supported in the long term
[2m[36m(pid=19485)[0m Instructions for updating:
[2m[36m(pid=19485)[0m non-resource variables are not supported in the long term
[2m[36m(pid=19485)[0m Instructions for updating:
[2m[36m(pid=19485)[0m non-resource variables are not supported in the long term
[2m[36m(pid=19483)[0m Instructions for updating:

Trial name,# failures,error file
PPO_economy_a28d7_00000,1,/Users/matiascovarrubias/ray_results/Econ_test_April13_PPO/PPO_economy_a28d7_00000_0_2021-04-13_19-04-44/error.txt


Trial name,status,loc
PPO_economy_a28d7_00000,ERROR,

Trial name,# failures,error file
PPO_economy_a28d7_00000,1,/Users/matiascovarrubias/ray_results/Econ_test_April13_PPO/PPO_economy_a28d7_00000_0_2021-04-13_19-04-44/error.txt




TuneError: ('Trials did not complete', [PPO_economy_a28d7_00000])

In [15]:
exploration_config_cont = {
        "type": "OrnsteinUhlenbeckNoise",
        "final_scale": 0.02,
        "scale_timesteps": 100000,
    }

mkt_config_cont=mkt_config.copy()
mkt_config_cont["space_type"] = "Continuous"
env_config_cont= {"markets_dict": {"market_0": (DiffDemand, mkt_config_cont), "market_1": (DiffDemand, mkt_config_cont)}}

training_config_cont=training_config.copy()
training_config_cont["exploration_config"] = exploration_config_cont
training_config_cont["env_config"] = env_config_cont
env=Economy(env_config_cont)

training_config_cont["multiagent"]["policies"] =  {
            policy_ids[i]: (None, env.observation_space[f"agent_{i}"], env.action_space[f"agent_{i}"],{},) for i in range(env.n_agents)
}


In [19]:
#Continous space

# algo_list=["PG", "A2C", "A3C", "PPO", "APPO"]
algo_list=["PPO"]
for i in range(len(algo_list)):
    exp_name = exp_label + algo_list[i]
    results = tune.run(
        algo_list[i],
        name=exp_name,
        config=training_config_cont,
        #checkpoint_freq=250,
        checkpoint_at_end=True,
        stop=stop,
        callbacks=[MLflowLoggerCallback(experiment_name=exp_name, save_artifact=True)],
        verbose=verbosity
    )

 will result in a RuntimeError.
[2m[36m(pid=19605)[0m Instructions for updating:
[2m[36m(pid=19605)[0m non-resource variables are not supported in the long term
[2m[36m(pid=19605)[0m Instructions for updating:
[2m[36m(pid=19605)[0m non-resource variables are not supported in the long term
[2m[36m(pid=19609)[0m Instructions for updating:
[2m[36m(pid=19609)[0m non-resource variables are not supported in the long term
[2m[36m(pid=19609)[0m Instructions for updating:
[2m[36m(pid=19609)[0m non-resource variables are not supported in the long term
[2m[36m(pid=19611)[0m Instructions for updating:
[2m[36m(pid=19611)[0m non-resource variables are not supported in the long term
[2m[36m(pid=19611)[0m Instructions for updating:
[2m[36m(pid=19611)[0m non-resource variables are not supported in the long term
[2m[36m(pid=19612)[0m Instructions for updating:
[2m[36m(pid=19612)[0m non-resource variables are not supported in the long term
[2m[36m(pid=19612)[0

Trial name,# failures,error file
PPO_economy_d0cd3_00000,1,/Users/matiascovarrubias/ray_results/Econ_test_April13_PPO/PPO_economy_d0cd3_00000_0_2021-04-13_19-06-01/error.txt


Trial name,status,loc
PPO_economy_d0cd3_00000,ERROR,

Trial name,# failures,error file
PPO_economy_d0cd3_00000,1,/Users/matiascovarrubias/ray_results/Econ_test_April13_PPO/PPO_economy_d0cd3_00000_0_2021-04-13_19-06-01/error.txt


TuneError: ('Trials did not complete', [PPO_economy_d0cd3_00000])