In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import pyspiel
import open_spiel.python.games
from open_spiel.python.observation import make_observation
from open_spiel.python.examples.ubc_decorators import TakeSingleActionDecorator
from open_spiel.python.examples.straightforward_agent import StraightforwardAgent
from open_spiel.python.examples.ppo_utils import EnvParams
from auctions.webutils import *
import os
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"


In [67]:
# Load checkpoint
seed = 1234
equilibrium_solver_run_checkpoint = get_checkpoint_by_name('confused_national_wide_test_with_gpu', 'confused_national_with_undersell-aug11ppo_potential_length-100', t=6656)
env_params = EnvParams(track_stats=True, seed=seed, num_envs = 1, history_prefix=[0, 0], sync=True)
env_and_policy = ppo_db_checkpoint_loader(equilibrium_solver_run_checkpoint, env_params=env_params)
env = EnvParams(normalize_rewards=True, observer_params=dict(normalize=False)).make_env(env_and_policy.game)

In [None]:
env.envs[0]

In [None]:
env.envs[0].reset

In [68]:
state = env_and_policy.game.new_initial_state()
state.apply_action(3)
state.apply_action(0)
state.apply_action(9*2 + 1)
state.apply_action(3*2 + 1)
print(state.is_terminal())
print(state.rewards())

True
[12.0, 8780.0]


In [69]:
state = env_and_policy.game.new_initial_state()
state.apply_action(3)
state.apply_action(0)
state.apply_action(1)
state.apply_action(3*2 + 1)
print(state.is_terminal())
print(state.rewards())

True
[325.0, 8780.0]


'Bid for [0 2 1] licenses @ $1640.00 with activity 21'

In [48]:
print(state.action_to_string(9*2 + 1))
print(state.action_to_string(1))

Bid for [2 0 1] licenses @ $1200.00 with activity 15
Bid for [0 0 1] licenses @ $400.00 with activity 5


In [30]:
state

Round: 1
Player 0: EnumeratedValues: [    0   932  1585  2013  3121  3813  4823  6098  6789   556  1578  2269
  2723  3780  4485  5700  6813  7518  2931  4093  4785  5239  6343  7048
  8216  9376 10081] Budget: 10081
Player 1: EnumeratedValues: [    0   963  1533  2099  3167  3757  9113 10420 11010   740  1762  2352
  2936  3976  4572 10189 11309 11905  1268  2308  2898  3482  4528  5124
 10735 11861 12457] Budget: 12457

In [13]:
time_step = env.reset()

In [14]:
time_step[0].current_player()

0

In [16]:
time_step[0].observations['info_dict'][0]

{'player': array([1., 0.], dtype=float32),
 'bidder_type': array([0., 1., 0., 0., 0.], dtype=float32),
 'activity': array([36.], dtype=float32),
 'sor_exposure': array([0.], dtype=float32),
 'submitted_demand_history': array([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], dtype=float32),
 'processed_demand_history': array([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], dtype=float32),
 'sor_profits': array([   0.,  532.,  785., 1393., 2101., 2393., 3583., 4458., 4749.,
         156.,  778., 1069., 1703., 2360., 2665., 4060., 4773., 5078.,
        2131., 2893., 3185., 3819., 4523., 4828., 6176., 6936., 7241.],
       dtype=float32),
 'clock_profits': array([   0.,  532.,  785., 1393., 2101., 2393., 3583., 4458., 4749.,
         156.,  778., 1069., 1703., 2360., 2665., 4060., 4773., 5078.,
        2131., 2893., 3185., 3819., 4523., 4828., 6176., 6936., 7241.],
       dtype=float32),
 'round': array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0

In [4]:
time_step[0].observations['info_dict'][1]

{'player': array([0., 1.], dtype=float32),
 'bidder_type': array([0., 0., 0., 1., 0.], dtype=float32),
 'activity': array([36.], dtype=float32),
 'sor_exposure': array([0.], dtype=float32),
 'submitted_demand_history': array([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], dtype=float32),
 'processed_demand_history': array([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], dtype=float32),
 'sor_profits': array([   0.,  739., 1394., 1355., 2704., 3565., 2517., 4213., 5075.,
         365., 1477., 2339., 2257., 3472., 4402., 3766., 5098., 6028.,
         993., 2306., 3167., 3085., 4367., 5297., 4595., 5993., 6923.],
       dtype=float32),
 'clock_profits': array([   0.,  739., 1394., 1355., 2704., 3565., 2517., 4213., 5075.,
         365., 1477., 2339., 2257., 3472., 4402., 3766., 5098., 6028.,
         993., 2306., 3167., 3085., 4367., 5297., 4595., 5993., 6923.],
       dtype=float32),
 'round': array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0

In [10]:
env_and_policy.agents[1] = straightforward

In [11]:
agents = env_and_policy.agents
time_step = env.reset()
episode_counter = len(env)
num_episodes = 10
while episode_counter < num_episodes:
    for agent in agents:
        agent_output = agent.step(time_step, is_evaluation=True)
        for output in agent_output:
            print(agent.player_id, output.action)
        time_step, rewards, dones, _ = env.step(agent_output, reset_if_done=True)
        print(dones)
    episode_counter += sum(dones)



0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 5
[True]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 5
[True]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 5
[True]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[False]
0 3
[False]
1 5
[True]
0 3
[False]
1 8
[False]
0 3
[False]
1 8
[Fal

In [None]:
for ts in time_step:
    d = ts.observations['info_dict'][0]
    bt = int(d['bidder_type'].nonzero()[0])
    print(bt, straightforward.step(ts).action)
    print(d['sor_profits'])
    
    
    break

In [12]:
game.auction_params.all_bids

array([[0, 0],
       [0, 1],
       [0, 2],
       [1, 0],
       [1, 1],
       [1, 2],
       [2, 0],
       [2, 1],
       [2, 2]])

In [None]:
np.array(game.auction_params.opening_prices)# @@ np.array([2,2])

In [None]:
game.auction_params.max_budget

In [None]:
game.auction_params.max_activity

In [None]:
game.auction_params.activity

In [None]:
# Action 7 is the way to go (straightforward)
# Actual played action is 