# 0. Notebook Setup

The goal of this notebook will be to (1) validate the output of the ZSG results and (2) evaluate their added value.

First, we will validate by making sure the win probabilities are similar to those of the non-strategic policy evaluation. It will also be interesting to see whether the ZSG optimization benefits the weaker or stronger player more.

Second, we will examine how much the optimal policy changes using the optimal ZSG approach.

In [3]:
import function_tool as ft

import pandas as pd

import init_simple_mdp as imdp

In [4]:
print(imdp.a_list)

['SI20-c-o', 'SI20-c-m', 'SI20-c-i', 'SO20-c-o', 'SO20-c-m', 'SO20-c-i', 'D20-c-o', 'D20-c-m', 'D20-c-i', 'T20-c-o', 'T20-c-m', 'T20-c-i', 'SI20-cc-o', 'SI20-cc-m', 'SI20-cc-i', 'SO20-cc-o', 'SO20-cc-m', 'SO20-cc-i', 'D20-cc-o', 'D20-cc-m', 'D20-cc-i', 'T20-cc-o', 'T20-cc-m', 'T20-cc-i', 'SI20-cw-o', 'SI20-cw-m', 'SI20-cw-i', 'SO20-cw-o', 'SO20-cw-m', 'SO20-cw-i', 'D20-cw-o', 'D20-cw-m', 'D20-cw-i', 'T20-cw-o', 'T20-cw-m', 'T20-cw-i', 'SI1-c-o', 'SI1-c-m', 'SI1-c-i', 'SO1-c-o', 'SO1-c-m', 'SO1-c-i', 'D1-c-o', 'D1-c-m', 'D1-c-i', 'T1-c-o', 'T1-c-m', 'T1-c-i', 'SI1-cc-o', 'SI1-cc-m', 'SI1-cc-i', 'SO1-cc-o', 'SO1-cc-m', 'SO1-cc-i', 'D1-cc-o', 'D1-cc-m', 'D1-cc-i', 'T1-cc-o', 'T1-cc-m', 'T1-cc-i', 'SI1-cw-o', 'SI1-cw-m', 'SI1-cw-i', 'SO1-cw-o', 'SO1-cw-m', 'SO1-cw-i', 'D1-cw-o', 'D1-cw-m', 'D1-cw-i', 'T1-cw-o', 'T1-cw-m', 'T1-cw-i', 'SI18-c-o', 'SI18-c-m', 'SI18-c-i', 'SO18-c-o', 'SO18-c-m', 'SO18-c-i', 'D18-c-o', 'D18-c-m', 'D18-c-i', 'T18-c-o', 'T18-c-m', 'T18-c-i', 'SI18-cc-o', 'SI18-cc

# 1. Win Probability Validation

This section will compare $\epsilon=1$ vs $\epsilon=2$.

In [None]:
zsg_e1e2_value = ft.load_pickle('result/zsg_value_W_player10e2_S_player10e1__optboth.pkl')
#zsg_e1e9_value = ft.load_pickle('result/zsg_value_W_player10e9_S_player10e1__optboth.pkl')

In [34]:
df_ns_win_probs = pd.read_csv('player10_win_probability_results_stronger_e1.csv')

In [35]:
df_ns_win_probs_e1e2 = df_ns_win_probs.loc[(df_ns_win_probs.epsilon_weaker==2) & (df_ns_win_probs.epsilon_stronger==1)]

The values printed out below correspond to the probability the weaker player wins given that it is the weaker player's turn at the starting state. The values are printed out for each policy to be compared.

In [19]:
# Test Cases

print("-----------------------------------------------------------------------------------------------")
# 2 vs 2
print("Case 3: weaker 2 vs. stronger 2")
ns_val = df_ns_win_probs_e1e2[(df_ns_win_probs_e1e2.score_weaker==2) & (df_ns_win_probs_e1e2.score_stronger==2) & (df_ns_win_probs_e1e2.current_turn=='weaker')].win_probability_weaker.values
print(f"ZSG Values: {zsg_101_e1e2value_pw[:,2,2]}")
print(f"NonStrategic Values: {ns_val}")

print("-----------------------------------------------------------------------------------------------")
# 7 vs 7
print("Case 3: weaker 7 vs. stronger 7")
ns_val = df_ns_win_probs_e1e2[(df_ns_win_probs_e1e2.score_weaker==7) & (df_ns_win_probs_e1e2.score_stronger==7) & (df_ns_win_probs_e1e2.current_turn=='weaker')].win_probability_weaker.values
print(f"ZSG Values: {zsg_101_e1e2value_pw[:,7,7]}")
print(f"NonStrategic Values: {ns_val}")

print("-----------------------------------------------------------------------------------------------")
# 100 vs 100
print("Case 3: weaker 75 vs. stronger 100")
ns_val = df_ns_win_probs_e1e2[(df_ns_win_probs_e1e2.score_weaker==75) & (df_ns_win_probs_e1e2.score_stronger==100) & (df_ns_win_probs_e1e2.current_turn=='weaker')].win_probability_weaker.values
print(f"ZSG Values: {zsg_101_e1e2value_pw[:,75,100]}")
print(f"NonStrategic Values: {ns_val}")

print("-----------------------------------------------------------------------------------------------")
# 100 vs 75
print("Case 3: weaker 100 vs. stronger 75")
ns_val = df_ns_win_probs_e1e2[(df_ns_win_probs_e1e2.score_weaker==100) & (df_ns_win_probs_e1e2.score_stronger==75) & (df_ns_win_probs_e1e2.current_turn=='weaker')].win_probability_weaker.values
print(f"ZSG Values: {zsg_101_e1e2value_pw[:,100,75]}")
print(f"NonStrategic Values: {ns_val}")

print("-----------------------------------------------------------------------------------------------")
# 75 vs 75
print("Case 3: stronger 75 vs. weaker 75")
ns_val = df_ns_win_probs_e1e2[(df_ns_win_probs_e1e2.score_weaker==75) & (df_ns_win_probs_e1e2.score_stronger==75) & (df_ns_win_probs_e1e2.current_turn=='weaker')].win_probability_weaker.values
print(f"ZSG Values: {zsg_101_e1e2value_pw[:,75,75]}")
print(f"NonStrategic Values: {ns_val}")

-----------------------------------------------------------------------------------------------
Case 3: weaker 2 vs. stronger 2
ZSG Values: [0.55092268 1.         1.         1.         1.         1.
 1.         1.         1.         1.        ]
NonStrategic Values: [0.55092268 1.         1.         1.         1.         1.
 1.         1.         1.         1.        ]
-----------------------------------------------------------------------------------------------
Case 3: weaker 7 vs. stronger 7
ZSG Values: [0.56210404 0.49659474 1.         1.         1.         1.
 1.         1.         1.         1.        ]
NonStrategic Values: [0.5590919  0.96926162 1.         1.         1.         1.
 1.         1.         1.         1.        ]
-----------------------------------------------------------------------------------------------
Case 3: weaker 75 vs. stronger 100
ZSG Values: [0.50811969 0.31045129 0.31986119 0.52260567 0.52446767 0.52457973
 0.52457977 0.52457977 0.52457977 0.52457977]
No

In [38]:
score_weak = 8
score_strong = 3
ns_val = df_ns_win_probs_e1e2[(df_ns_win_probs_e1e2.score_weaker==score_weak) & (df_ns_win_probs_e1e2.score_stronger==score_strong) & (df_ns_win_probs_e1e2.current_turn=='weaker')].win_probability_weaker.values

ns_val

array([0.74102472, 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ])

# 3.0 Policy Exploration

In [4]:
# ZSG Policy
zsg_e1e2 = ft.load_pickle('result/zsg_W_player10e2_S_player10e1__optboth.pkl')

In [5]:
zsg_e1e2_e2_policy = zsg_e1e2['optimal_action_index_dic_pw']
zsg_e1e2_e1_policy = zsg_e1e2['optimal_action_index_dic_ps']

In [None]:
# Non-strategic policy
e1_ns_result_dic = ft.load_pickle('result/singlegame_player10_e1_turn_tokens.pkl')
e2_ns_result_dic =ft.load_pickle('result/singlegame_player10_e2_turn_tokens.pkl')
e9_ns_result_dic =ft.load_pickle('result/singlegame_player10_e9_turn_tokens.pkl')

In [13]:
zsg_e1e2_e2_policy[4][2]

[None,
 array([[294, 226,  56, ...,  -1,  -1,  -1],
        [759,   0, 752, ...,  -1,  -1,  -1],
        [759,   0, 752, ...,  -1,  -1,  -1],
        ...,
        [759,   0, 752, ...,  -1,  -1,  -1],
        [759,   0, 752, ...,  -1,  -1,  -1],
        [759,   0, 752, ...,  -1,  -1,  -1]]),
 array([[294,  40,  42,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
          -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
          -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
          -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
          -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1],
        [759,  40, 752,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
          -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
          -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
          -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
          -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1],
      

In [33]:
imdp.a_list[752]

'D1'

In [29]:
print(zsg_e1e2_e2_policy[4][2][2][9])

[730 730 752  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
  -1  -1  -1  -1  -1  -1  -1]


In [30]:
print(zsg_e1e2_e2_policy[4][2][1][9])

[759   0 752  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1]
