In [13]:
from example_envs.tag_continuous.tag_continuous import TagContinuous
from warp_drive.env_wrapper import EnvWrapper
from warp_drive.training.trainer import Trainer

In [14]:
# Set logger level e.g., DEBUG, INFO, WARNING, ERROR
import logging

logging.getLogger().setLevel(logging.ERROR)

In [15]:
run_config = dict(
    name="tag_continuous",
    # Environment settings
    env=dict(
        num_taggers=5,
        num_runners=20,
        episode_length=100,
        seed=1234,
        use_full_observation=False,
        num_other_agents_observed=10,
        tagging_distance=0.02,
    ),
    # Trainer settings
    trainer=dict(
        num_envs=100,  # number of environment replicas (number of GPU blocks used)
        train_batch_size=10000,  # total batch size used for training per iteration (across all the environments)
        num_episodes=5000,  # total number of episodes to run the training for (can be arbitrarily high!)
    ),
    # Policy network settings
    policy=dict(
        runner=dict(
            to_train=True,  # flag indicating whether the model needs to be trained
            algorithm="A2C",  # algorithm used to train the policy
            gamma=0.98,  # discount rate
            lr=0.005,  # learning rate
            model=dict(
                type="fully_connected", fc_dims=[256, 256], model_ckpt_filepath=""
            ),  # policy model settings
        ),
        tagger=dict(
            to_train=True,
            algorithm="A2C",
            gamma=0.98,
            lr=0.002,
            model=dict(
                type="fully_connected", fc_dims=[256, 256], model_ckpt_filepath=""
            ),
        ),
    ),
    # Checkpoint saving setting
    saving=dict(
        metrics_log_freq=10,  # how often (in iterations) to print the metrics
        model_params_save_freq=5000,  # how often (in iterations) to save the model parameters
        basedir="/tmp",  # base folder used for saving
        name="continuous_tag",  # experiment name
        tag="example",  # experiment tag
    ),
)

In [None]:
"""
class MyDualModeEnvironment(CUDAEnvironmentContext):
    
    ...
    
    def get_data_dictionary(self):
        data_dict = DataFeed()
        ...
        return data_dict 
    
    def get_tensor_dictionary(self):
        tensor_dict = DataFeed()
        ...
        return tensor_dict
    
    def reset(self):
        # reset for CPU environment
        ...
    
    def step(self, actions=None):
        args = [YOUR_CUDA_STEP_ARGUMENTS]
        
        if self.use_cuda:
            self.cuda_step(
                *self.cuda_step_function_feed(args),
                block=self.cuda_function_manager.block,
                grid=self.cuda_function_manager.grid,
            )
            return None
        else:
            ...
            return obs, rew, done, info

"""

Seed set to 1716681179


In [18]:
# Create a wrapped environment object via the EnvWrapper
# Ensure that env_backend is set to be "pycuda" or "numba" (in order to run on the GPU)
env_wrapper = EnvWrapper(
    TagContinuous(**run_config["env"]),
    num_envs=run_config["trainer"]["num_envs"],
    env_backend='pycuda',
)

# Agents can share policy models: this dictionary maps policy model names to agent ids.
policy_tag_to_agent_id_map = {
    "tagger": list(env_wrapper.env.taggers),
    "runner": list(env_wrapper.env.runners),
}

# Create the trainer object
trainer = Trainer(
    env_wrapper=env_wrapper,
    config=run_config,
    policy_tag_to_agent_id_map=policy_tag_to_agent_id_map,
)

# Perform training!
trainer.train()



Device: 0
Iterations Completed                    : 1 / 50
Speed performance stats
Mean policy eval time per iter (ms)     :     321.26
Mean action sample time per iter (ms)   :      43.83
Mean env. step time per iter (ms)       :     223.14
Mean training time per iter (ms)        :      99.42
Mean total time per iter (ms)           :     711.49
Mean steps per sec (policy eval)        :   31127.67
Mean steps per sec (action sample)      :  228168.81
Mean steps per sec (env. step)          :   44815.14
Mean steps per sec (training time)      :  100587.37
Mean steps per sec (total)              :   14054.99
Metrics for policy 'runner'
VF loss coefficient                     :    0.01000
Entropy coefficient                     :    0.05000
Total loss                              :   -0.05545
Policy loss                             :    0.18206
Value function loss                     :    0.20981
Mean rewards                            :    0.00114
Max. rewards                           

In [None]:
from warp_drive.training.pytorch_lightning import (
    CUDACallback,
    PerfStatsCallback,
    WarpDriveModule,
)

ERPROBLEM

rationale for why stricter anti-trust enforcement will not fix 

https://www.sciencedirect.com/science/article/abs/pii/S0167629616303757

this article shows that the price elasticity is above -2 and -1 and very close to 0 indicating almost insensitivity to price increases.

suppose we have a Hospital Emergency room and we want them to charge the equalibrium market price P for one particular medical procedure. However they presently charge a different price. 

Solution:

We set up an auction. This auction is a sealed bid auction. In the sealed bid auction hospitals provide a bid on how much they would like to charge. 

hospitals then submit bids. Note: it may be possible to have multiple rounds. The idea is that after the winning bid credits will be given. Hospitals can possibly 
then choose to participate in further rounds for the oppertunity for more credits or abstain after completing atleast one round. 

The final price will depend on the final auction rule

Unknown to answer: should the winning bid be chosen based on: median bid, second lowest bid or second highest bid? 

the julia package polynomialradii will be used to find and prove that the equalibrium is the lowest possible price under cournot competition

Assume that the competitive regieme that needs to be emulated is cournot competiton 

In [None]:
from warp_drive.utils.constants import Constants
from warp_drive.utils.data_feed import DataFeed
from warp_drive.utils.gpu_environment_context import CUDAEnvironmentContext

"""
this is what agrs for step should look like

            args = [
                _LOC_X,
                _LOC_Y,
                _SP,
                _DIR,
                _ACC,
                "agent_types",
                "edge_hit_reward_penalty",
                "edge_hit_penalty",
                "grid_length",
                "acceleration_actions",
                "turn_actions",
                "max_speed",
                "num_other_agents_observed",
                "skill_levels",
                "runner_exits_game_after_tagged",
                "still_in_the_game",
                "use_full_observation",
                _OBSERVATIONS,
                _ACTIONS,
                "neighbor_distances",
                "neighbor_ids_sorted_by_distance",
                "nearest_neighbor_ids",
                _REWARDS,
                "step_rewards",
                "num_runners",
                "distance_margin_for_reward",
                "tag_reward_for_tagger",
                "tag_penalty_for_runner",
                "end_of_game_reward_for_runner",
                "_done_",
                "_timestep_",
                ("n_agents", "meta"),
                ("episode_length", "meta"),
            ]
"""

class MyDualModeEnvironment(CUDAEnvironmentContext):
    
    
    def get_data_dictionary(self):
        data_dict = DataFeed()
        ...
        return data_dict 
    
    def get_tensor_dictionary(self):
        tensor_dict = DataFeed()
        ...
        return tensor_dict
    
    def reset(self):
        # reset for CPU environment
        ...
    
    def step(self, actions=None):
        args = [YOUR_CUDA_STEP_ARGUMENTS]
        
        if self.use_cuda:
            self.cuda_step(
                *self.cuda_step_function_feed(args),
                block=self.cuda_function_manager.block,
                grid=self.cuda_function_manager.grid,
            )
            return None
        else:
            ...
            return obs, rew, done, inf

In [None]:
run_config = dict(
    name="tag_continuous",
    # Environment settings.
    env=dict(
        num_taggers=5,  # number of taggers in the environment
        num_runners=100,  # number of runners in the environment
        grid_length=20.0,  # length of the (square) grid on which the game is played
        episode_length=200,  # episode length in timesteps
        max_acceleration=0.1,  # maximum acceleration
        min_acceleration=-0.1,  # minimum acceleration
        max_turn=2.35,  # 3*pi/4 radians
        min_turn=-2.35,  # -3*pi/4 radians
        num_acceleration_levels=10,  # number of discretized accelerate actions
        num_turn_levels=10,  # number of discretized turn actions
        skill_level_tagger=1.0,  # skill level for the tagger
        skill_level_runner=1.0,  # skill level for the runner
        use_full_observation=False,  # each agent only sees full or partial information
        runner_exits_game_after_tagged=True,  # flag to indicate if a runner stays in the game after getting tagged
        num_other_agents_observed=10,  # number of other agents each agent can see
        tag_reward_for_tagger=10.0,  # positive reward for the tagger upon tagging a runner
        tag_penalty_for_runner=-10.0,  # negative reward for the runner upon getting tagged
        end_of_game_reward_for_runner=1.0,  # reward at the end of the game for a runner that isn't tagged
        tagging_distance=0.02,  # margin between a tagger and runner to consider the runner as 'tagged'.
    ),
    # Trainer settings.
    trainer=dict(
        num_envs=50,  # number of environment replicas (number of GPU blocks used)
        train_batch_size=10000,  # total batch size used for training per iteration (across all the environments)
        num_episodes=500,  # total number of episodes to run the training for (can be arbitrarily high!)
    ),
    # Policy network settings.
    policy=dict(
        runner=dict(
            to_train=True,  # flag indicating whether the model needs to be trained
            algorithm="A2C",  # algorithm used to train the policy
            gamma=0.98,  # discount rate
            lr=0.005,  # learning rate
            model=dict(
                type="fully_connected", fc_dims=[256, 256], model_ckpt_filepath=""
            ),  # policy model settings
        ),
        tagger=dict(
            to_train=True,
            algorithm="A2C",
            gamma=0.98,
            lr=0.002,
            model=dict(
                type="fully_connected", fc_dims=[256, 256], model_ckpt_filepath=""
            ),
        ),
    ),
    # Checkpoint saving setting.
    saving=dict(
        metrics_log_freq=10,  # how often (in iterations) to print the metrics
        model_params_save_freq=5000,  # how often (in iterations) to save the model parameters
        basedir="/tmp",  # base folder used for saving
        name="continuous_tag",  # experiment name
        tag="example",  # experiment tag
    ),
)

# %% [markdown]
# # Instantiate the WarpDrive Module

# %% [markdown]
# In order to instantiate the WarpDrive module,
# we first use an environment wrapper to specify that the environment needs to
# be run on the GPU (via the `env_backend` flag).
# Also, agents in the environment can share policy models;
# so we specify a dictionary to map each policy network model to the list of agent ids using that model.

# %%
# Create a wrapped environment object via the EnvWrapper
# Ensure that env_backend is set to be "pycuda" (in order to run on the GPU)
# WarpDrive v2 also supports JIT numba backend,
# if you have installed Numba, you can set "numba" instead of "pycuda" too.
env_wrapper = EnvWrapper(
    TagContinuous(**run_config["env"]),
    num_envs=run_config["trainer"]["num_envs"],
    env_backend="pycuda",
)

# Agents can share policy models: this dictionary maps policy model names to agent ids.
policy_tag_to_agent_id_map = {
    "tagger": list(env_wrapper.env.taggers),
    "runner": list(env_wrapper.env.runners),
}

wd_module = WarpDriveModule(
    env_wrapper=env_wrapper,
    config=run_config,
    policy_tag_to_agent_id_map=policy_tag_to_agent_id_map,
    verbose=True,
)

In [22]:
from pytorch_lightning import Trainer as tr

In [23]:
log_freq = run_config["saving"]["metrics_log_freq"]
# Define callbacks.
cuda_callback = CUDACallback(module=wd_module)
perf_stats_callback = PerfStatsCallback(
    batch_size=wd_module.training_batch_size,
    num_iters=wd_module.num_iters,
    log_freq=log_freq,
)

# Instantiate the PytorchLightning trainer with the callbacks.
# # Also, set the number of gpus to 1, since this notebook uses just a single GPU.
num_gpus = 1
num_episodes = run_config["trainer"]["num_episodes"]
episode_length = run_config["env"]["episode_length"]
training_batch_size = run_config["trainer"]["train_batch_size"]
num_epochs = int(num_episodes * episode_length / training_batch_size)

# Set reload_dataloaders_every_n_epochs=1 to invoke
# train_dataloader() each epoch.
trainer = tr(
    accelerator="gpu",
    devices=num_gpus,
    callbacks=[cuda_callback, perf_stats_callback],
    max_epochs=num_epochs,
    reload_dataloaders_every_n_epochs=1,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(wd_module)

In [1]:
from juliacall import Main as jl, convert as jlconvert

In [2]:
jl.seval('Pkg.add("RadiiPolynomial")')
#jl.seval('Pkg.add("GraphPlot")')

    Updating registry at `~/.julia/registries/General.toml`
   Resolving package versions...
   Installed IntervalArithmetic ─ v0.22.17
   Installed RadiiPolynomial ──── v0.8.15
    Updating `~/miniconda3/julia_env/Project.toml`
  [f2081a94] + RadiiPolynomial v0.8.15
    Updating `~/miniconda3/julia_env/Manifest.toml`
  [d1acc4aa] + IntervalArithmetic v0.22.17
  [f2081a94] + RadiiPolynomial v0.8.15
  [5eaf0fd0] + RoundingEmulator v0.2.1
  [4e9b3aee] + CRlibm_jll v1.0.1+0
Precompiling project...
[32m  ✓ [39m[90mIntervalArithmetic[39m
[32m  ✓ [39m[90mIntervalArithmetic → IntervalArithmeticLinearAlgebraExt[39m
[32m  ✓ [39mRadiiPolynomial
  3 dependencies successfully precompiled in 5 seconds. 103 already precompiled.
