# **WARNING**

In order for this notebook to run, it is recommended to first run the notebook `2_0_generate_turn_data.ipynb` where the singleplayer game results are saved as .pkl files. Doing this will speed up the runtime as those files can be loaded more quickly than needing to completely re-solve the game (i.e. to get the non-strategic policies that are used to initialize the twoplayer model). 

# 1.0 Notebook Parameters

Please specify the following variables:
1. `project_directory`: your project directory path
2. `epsilon_list`: the list of execution error multiplers (epsilon) that you want to consider in your analysis.
3. `player_id`: the players (i.e. skill model) you want to consider in your analysis; use one base model and scale it with different epsilons; this should match whatever was used in `2_0_generate_turn_data.ipynb`.
4. `starting_score_default`: the default starting score that will be used for all solves (i.e. will only solve the MDP up to this score state for both players).

In [1]:

# Change this to the project directory path for your computer  
project_directory = '/Users/rachael/Desktop/darts-thesis'

# Set the epsilons that will be considered in this analysis 
epsilon_list = [1,2,3,4,5,6,7,8,9]

# Set the ID of the player skill models you want to use 
    # 10 - symmetric DB skill model that we were using up until now 
    # ? - "average pro" skill model generated by H & W 
player_id = 10 

# Indicate the default starting score that you would like to use for all runs in this file 
starting_score_default = 25

# 2.0 Notebook Setup

Import libraries and run prerequisites.

In [2]:
# Import and configure modules from this project

import os

os.chdir(project_directory)

import function_init_simple_mdp as imdp
import function_init_board as fb
from function_helpers_twoplayer import solve_zsg_optW_fixNS, solve_zsg_optS_fixNS, solve_zsg_optboth

  from .autonotebook import tqdm as notebook_tqdm


In [None]:



# import function_init_board as fb
# import function_tool as ft
# import function_init_simple_mdp as imdp
# import function_helpers_singleplayer as h

# # Import original code files for baseline
# sys.path.insert(1, './archive/original_code/')
# import function_evaluate_policy as fep
# import function_get_aiming_grid as fgag
# import function_solve_dp as fsdp
# import function_solve_zsg_gpu as function_solve_zsg

# data_parameter_dir = fb.data_parameter_dir
# result_dir = './result'  
# name_pa = 'player{}'.format(10) 


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import warnings
warnings.filterwarnings("ignore")


# 3.0 Solve BR for PW 

Solve the best response for the weaker player.

## 3.1 BR for PW Sandbox

Solve for single instance of the game (good for testing).

In [4]:
################ PARAMETERS ################

epsilon_pw = 1
epsilon_ps = 1

starting_score = starting_score_default

############################################

name_pw = 'player{}'.format(player_id)
name_ps = 'player{}'.format(player_id)

solve_zsg_optW_fixNS(name_pw, name_ps, epsilon_pw, epsilon_ps, max_tokens_optimize=9, game_begin_score_502=starting_score,data_parameter_dir=fb.data_parameter_dir, dp_policy_folder='result/singlegame_results/', result_dir='twoplayer_br_results', postfix='', gpu_device=None)

W_player10e1_S_player10e1_optW
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_no_tokens/player10_e1_gaussin_prob_grid_custom_no_tokens.pkl
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_no_tokens/player10_e1_gaussin_prob_grid_custom_no_tokens.pkl
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_tokens/t_gaussin_prob_grid_custom_tokens.pkl
load weaker player policy result/singlegame_results//singlegame_player10_e1_turn_tokens.pkl
load stronger player policy result/singlegame_results//singlegame_player10_e1_turn_tokens.pkl
solve_zsg_opt_player10e1_fix_player10e1 in 14.947728157043457 seconds
t_policy_evaluation  = 1.6771411895751953 seconds
t_policy_improvement = 13.228193998336792 seconds
t_other = 0 seconds
save twoplayer_br_results/zsg_W_player10e1_S_player10e1__optW.pkl
save twoplayer_br_results/zsg_value_W_player10e1_S_player10e1__optW.pkl


'save'

## 3.2 BR for PW Solve Loop

Iterate through epsilon list and solve all stronger-weaker epsilon combinations. 

In [11]:
################ PARAMETERS ################

epsilon_pw = 1
epsilon_ps = 1

# Can override the default if needed
starting_score = starting_score_default

############################################

name_pw = 'player{}'.format(player_id)
name_ps = 'player{}'.format(player_id)

start_time = time.time()

for es in epsilon_list:

    for ew in epsilon_list:

        if ew >= es:

            result_dic = solve_zsg_optW_fixNS(name_pw, name_ps, ew, es, max_tokens_optimize=9, game_begin_score_502=starting_score,data_parameter_dir=fb.data_parameter_dir, dp_policy_folder='result', result_dir='result_zsg', postfix='', gpu_device=None)

            print(f'e_stronger={es}', f'e_weaker={ew}','completed at:',time.time() - start_time)

print('Full loop completed at ', time.time() - start_time)

W_player10e1_S_player10e1_optW
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_no_tokens/player10_e1_gaussin_prob_grid_custom_no_tokens.pkl
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_no_tokens/player10_e1_gaussin_prob_grid_custom_no_tokens.pkl
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_tokens/t_gaussin_prob_grid_custom_tokens.pkl
load weaker player policy result/singlegame_player10_e1_turn_tokens.pkl
load stronger player policy result/singlegame_player10_e1_turn_tokens.pkl
solve_zsg_opt_player10e1_fix_player10e1 in 11.780767917633057 seconds
t_policy_evaluation  = 1.50980544090271 seconds
t_policy_improvement = 10.23784589767456 seconds
t_other = 0 seconds
save result_zsg/zsg_W_player10e1_S_player10e1__optW.pkl
save result_zsg/zsg_value_W_player10e1_S_player10e1__optW.pkl
e_stronger=1 e_weaker=1 completed at: 12.02187705039978
W_player10e2_S_player10e1_optW
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_no_t

# 4.0 Solve BR for PS

Optimize the best response for the stronger player to the weaker player's independent (non-strategic) policy. This function was not used for any analysis, so only have a sandbox here in case some investigation is needed. 

In [4]:
################ PARAMETERS ################

epsilon_pw = 1
epsilon_ps = 1

starting_score = starting_score_default

############################################

name_pw = 'player{}'.format(player_id)
name_ps = 'player{}'.format(player_id)

result_dic = solve_zsg_optS_fixNS(name_pw, name_ps, epsilon_pw, epsilon_ps, game_begin_score_502=starting_score, data_parameter_dir=fb.data_parameter_dir, dp_policy_folder='result/singlegame_results/', result_dir='result/twoplayer_br_results', postfix='', gpu_device=None)

W_player10e1_S_player10e1_optS
W_player10e1_S_player10e1_optW
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_no_tokens/player10_e1_gaussin_prob_grid_custom_no_tokens.pkl
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_no_tokens/player10_e1_gaussin_prob_grid_custom_no_tokens.pkl
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_tokens/t_gaussin_prob_grid_custom_tokens.pkl
load weaker player policy result/singlegame_results//singlegame_player10_e1_turn_tokens.pkl
load stronger player policy result/singlegame_results//singlegame_player10_e1_turn_tokens.pkl
solve_zsg_opt_player10e1_fix_player10e1 in 1.034317970275879 seconds
t_policy_evaluation  = 0.12665963172912598 seconds
t_policy_improvement = 0.8940823078155518 seconds
t_other = 0 seconds
save result/twoplayer_br_results/zsg_W_player10e1_S_player10e1__optS.pkl
save result/twoplayer_br_results/zsg_value_W_player10e1_S_player10e1__optS.pkl


# 5.0 Full ZSG

## 5.1 ZSG Sandbox

Solve for single instance of the game (good for testing).

In [5]:
################# PARAMETERS #################

epsilon_pw = 1
epsilon_ps = 1

starting_score = starting_score_default

##############################################

name_pw = 'player{}'.format(player_id)
name_ps = 'player{}'.format(player_id)
dp_policy_folder='result/singlegame_results/'
result_dir='result/twoplayer_zsg_results'
postfix=''
gpu_device=None

solve_zsg_optboth(name_pw, name_ps, epsilon_pw, epsilon_ps, game_begin_score_502=starting_score, data_parameter_dir=fb.data_parameter_dir, dp_policy_folder=dp_policy_folder, result_dir=result_dir, postfix='', gpu_device=None)

W_player10e1_S_player10e1_optboth
player W is player10 e1 and player S is player10 e1
optimize both players
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_no_tokens/player10_e1_gaussin_prob_grid_custom_no_tokens.pkl
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_no_tokens/player10_e1_gaussin_prob_grid_custom_no_tokens.pkl
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_tokens/t_gaussin_prob_grid_custom_tokens.pkl
load weaker player policy result/singlegame_results//singlegame_player10_e1_turn_tokens.pkl
load stronger player policy result/singlegame_results//singlegame_player10_e1_turn_tokens.pkl
solve_zsg_opt_player10e1_fix_player10e1 in 19.83309817314148 seconds
t_policy_evaluation  = 2.4668238162994385 seconds
t_policy_improvement = 17.218329668045044 seconds
t_other = 0 seconds
save result/twoplayer_zsg_results/zsg_W_player10e1_S_player10e1__optboth.pkl
save result/twoplayer_zsg_results/zsg_value_W_player10e1_S_player10e1__optboth

'save'

## 3.2 ZSG Solve Loop

Iterate through epsilon list and solve all stronger-weaker epsilon combinations. 

In [9]:
################# PARAMETERS #################

starting_score = starting_score_default

##############################################

name_pw = 'player{}'.format(player_id)
name_ps = 'player{}'.format(player_id)

start_time = time.time()

for es in epsilon_list:

    for ew in epsilon_list:

        if ew >= es:

            print(f'e_stronger={es}', f'e_weaker={ew}','started at:',time.time() - start_time)

            result_dic = solve_zsg_optboth(name_pw, name_ps, ew, es, game_begin_score_502=starting_score, data_parameter_dir=fb.data_parameter_dir, dp_policy_folder='result', result_dir='result', postfix='', gpu_device=None)

print('Full loop completed at ', time.time() - start_time)

e_stronger=3 e_weaker=3 started at: 5.6743621826171875e-05
W_player10e3_S_player10e3_optboth
player W is player10 e3 and player S is player10 e3
optimize both players
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_no_tokens/player10_e3_gaussin_prob_grid_custom_no_tokens.pkl
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_no_tokens/player10_e3_gaussin_prob_grid_custom_no_tokens.pkl
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_tokens/t_gaussin_prob_grid_custom_tokens.pkl
solve weaker player NS policy


solve prob_policy_transit in 4.330611944198608 seconds
solve dp_turn_policyiter in 29.381951093673706 seconds
[[0.     0.     3.0946 ... 8.938  8.9526 8.9673]
 [0.     0.     1.     ... 7.3119 7.3263 7.3409]
 [0.     0.     1.     ... 6.7576 6.772  6.7866]
 ...
 [0.     0.     1.     ... 4.1578 4.1653 4.1713]
 [0.     0.     1.     ... 3.7995 3.7591 3.7597]
 [0.     0.     1.     ... 3.     3.     3.    ]]
solve stronger player NS policy
solve prob_policy_transit in 4.334450006484985 seconds
solve dp_turn_policyiter in 27.905056953430176 seconds
[[0.     0.     3.0946 ... 8.938  8.9526 8.9673]
 [0.     0.     1.     ... 7.3119 7.3263 7.3409]
 [0.     0.     1.     ... 6.7576 6.772  6.7866]
 ...
 [0.     0.     1.     ... 4.1578 4.1653 4.1713]
 [0.     0.     1.     ... 3.7995 3.7591 3.7597]
 [0.     0.     1.     ... 3.     3.     3.    ]]
solve_zsg_opt_player10e3_fix_player10e3 in 18.391089916229248 seconds
t_policy_evaluation  = 2.690549612045288 seconds
t_policy_improvement = 15.578