In [117]:
import glob, os, sys
import numpy as np
import pandas as pd
import plotnine as pn
from random import sample
DATA_DIR = '/Users/kurtsmith/research/spinningup/data'
pd.options.display.max_columns=None

In [7]:
from spinup.my_algos.ultimatum_agents import *
from spinup.environments.dual_ultimatum_env import *

In [148]:
def assign_matches(num_agents):
    shuffled = sample(range(num_agents), k=num_agents)
    matches = [shuffled[i:i+2] for i in range(0, num_agents, 2)]
    return matches

In [149]:
# Initialize agents and per-agent scores:
agent_1 =ConstantBot(offer=0.5, threshold=1.0)
agent_2 =ConstantBot(offer=0.6, threshold=0.)
agent_3 =ConstantBot(offer=0.8, threshold=0.)
agent_4 =ConstantBot(offer=0.2, threshold=0.)
agent_list = [agent_1, agent_2, agent_3, agent_4]

In [184]:
# Initialize agents and per-agent scores:
agent_1 =ConstantBot(offer=0.2, threshold=0.8)
agent_2 = StaticDistribBot(
    mean_offer=0.5, std_offer=0.1, mean_threshold=0.5, std_threshold=0.1
)
agent_3 = StaticDistribBot(
    mean_offer=0.5, std_offer=1., mean_threshold=0.5, std_threshold=1.
)
agent_4 = StaticDistribBot(
    mean_offer=0.5, std_offer=2., mean_threshold=0.5, std_threshold=2.
)
agent_list = [agent_1, agent_2, agent_3, agent_4]

In [185]:
num_rounds = 10
round_length = 2
score_list = np.zeros(len(agent_list))
for round in range(num_rounds):
    matches = assign_matches(4)
    env_list = [DualUltimatum(), DualUltimatum()]
    agent_opponent_dict = dict()
    agent_match_dict = dict()
    for i, m in enumerate(matches):
        agent_opponent_dict[m[0]] = m[1]
        agent_opponent_dict[m[1]] = m[0]
        agent_match_dict[m[0]] = i
        agent_match_dict[m[1]] = i
    
    # Generate initial obs for each match
    env_obs_list = [env.reset() for env in env_list]
    ag_obs_list = [env_obs_list[agent_match_dict[ag]] for ag in range(len(agent_list))]
    ag_obs_next_list = ag_obs_list

    for _ in range(round_length):
        # Get each agent's actions
        agent_acts = [ag.act(o) for ag, o in zip(agent_list, ag_obs_list)]

        # Rearrange actions to each match
        match_acts = [
            np.array([agent_acts[i] for i in match])
            for match in matches]

        # Pass actions to each match env
        env_output = [env.step(acts) for acts, env in zip(match_acts, env_list)]

        # Update scores based on env steps
        for agents, output, obs_next in zip(matches, env_output, obs_next_list):
            o, r, d, _ = output
            for ag, r_ag in zip(agents, r):
                ag_obs_next_list[ag] = o
                score_list[ag] += r_ag

        ag_obs_list = ag_obs_next_list
print(f'matches {matches}')
print(f'scores {score_list}') 

matches [[0, 3], [2, 1]]
scores [0.         4.92670313 2.16138606 2.91191081]


In [186]:
# create obs for each agent
print(env_output[0][0])
print(np.roll(env_output[0][0], 2))

[2.00000000e-01 8.00000000e-01 7.18261354e-04 6.68076748e-01]
[7.18261354e-04 6.68076748e-01 2.00000000e-01 8.00000000e-01]


In [187]:
for m, e in zip(matches, env_output):
    print(m)
    print(e[0])
    print('===')
    ag_obs_next_list[m[0]] = e[0]
    ag_obs_next_list[m[1]] = np.roll(e[0],2)

[0, 3]
[2.00000000e-01 8.00000000e-01 7.18261354e-04 6.68076748e-01]
===
[2, 1]
[0.55248612 0.62624217 0.51905748 0.40240445]
===


In [188]:
ag_obs_next_list

[array([2.00000000e-01, 8.00000000e-01, 7.18261354e-04, 6.68076748e-01]),
 array([0.51905748, 0.40240445, 0.55248612, 0.62624217]),
 array([0.55248612, 0.62624217, 0.51905748, 0.40240445]),
 array([7.18261354e-04, 6.68076748e-01, 2.00000000e-01, 8.00000000e-01])]

In [88]:
# Initialize 1-on-1 matches
env_list = [DualUltimatum(), DualUltimatum()]
matches = [(0,1), (2,3)]
agent_opponent_dict = dict()
agent_match_dict = dict()



In [89]:
for i, m in enumerate(matches):
    agent_opponent_dict[m[0]] = m[1]
    agent_opponent_dict[m[1]] = m[0]
    agent_match_dict[m[0]] = i
    agent_match_dict[m[1]] = i

In [92]:
# Generate initial obs for each match
env_obs_list = [env.reset() for env in env_list]

In [93]:
env_obs_list

[array([0.5, 0.5, 0.5, 0.5]), array([0.5, 0.5, 0.5, 0.5])]

In [103]:
ag_obs_list = [env_obs_list[agent_match_dict[ag]] for ag in range(len(agent_list))]
ag_obs_next_list = ag_obs_list

In [107]:
def one_turn():
    # Get each agent's actions
    agent_acts = [ag.act(o) for ag, o in zip(agent_list, ag_obs_list)]

    # Rearrange actions to each match
    match_acts = [
        np.array([agent_acts[i] for i in match])
        for match in matches]

    # Pass actions to each match env
    env_output = [env.step(acts) for acts, env in zip(match_acts, env_list)]

    # Update scores based on env steps
    for agents, output, obs_next in zip(matches, env_output, obs_next_list):
        o, r, d, _ = output
        print('----')
        print(o)
        print('****')
        for ag, r_ag in zip(agents, r):
            ag_obs_next_list[ag] = o
            score_list[ag] += r_ag

    ag_obs_list = ag_obs_next_list
    
    

In [112]:

num_turns = 10
print(score_list)
for _ in range(num_turns):
    # Get each agent's actions
    agent_acts = [ag.act(o) for ag, o in zip(agent_list, ag_obs_list)]

    # Rearrange actions to each match
    match_acts = [
        np.array([agent_acts[i] for i in match])
        for match in matches]

    # Pass actions to each match env
    env_output = [env.step(acts) for acts, env in zip(match_acts, env_list)]

    # Update scores based on env steps
    for agents, output, obs_next in zip(matches, env_output, obs_next_list):
        o, r, d, _ = output
#         print('----')
#         print(o)
#         print('****')
        for ag, r_ag in zip(agents, r):
            ag_obs_next_list[ag] = o
            score_list[ag] += r_ag

    ag_obs_list = ag_obs_next_list
    print(score_list)

[0. 0. 0. 0.]
[1.1 0.9 0.  0. ]
[2.2 1.8 0.  0. ]
[3.3 2.7 0.  0. ]
[4.4 3.6 0.  0. ]
[5.5 4.5 0.  0. ]
[6.6 5.4 0.  0. ]
[7.7 6.3 0.  0. ]
[8.8 7.2 0.  0. ]
[9.9 8.1 0.  0. ]
[11.  9.  0.  0.]


In [56]:
print(matches)
print(scores)

[(0, 1), (2, 3)]
[2.2 1.8 0.  0. ]


In [50]:
scores[agents[0]] += r[0]

array([0, 0])

In [48]:
print(output)

(array([0.8, 0.7, 0.2, 0.1]), array([0, 0]), False, {})


In [35]:
for i, env in enumerate(env_list):
    print(i)
    print(env.step(match_acts[i]))


0
(array([0.5, 0.5, 0.6, 0.5]), array([1.1, 0.9]), False, {})
1
(array([0.8, 0.7, 0.2, 0.1]), array([0, 0]), False, {})


In [6]:
obs, env.step(act)

(array([0.5, 0.5, 0.7, 0.1]), array([1.2, 0.8]), False, {})