# 1.0 Notebook Parameters

Please specify the following variables:
1. `project_directory`: your project directory path
2. `playerID_list`: the list of players (i.e. skill models) you want to consider in your analysis; usually use one base model and scale it with different epsilons
3. `epsilon_list`: the list of execution error multiplers (epsilon) that you want to consider in your analysis.

### <font color="darkred">**Warning: Player IDs** </font> 
It is recommended that you only use one player ID in the `playerID_list` to generate the dataset. Otherwise, you will need to go through later analysis files and add a filter for the player you want. 

In [6]:
# Change this to the project directory path for your computer  
project_directory = '/Users/rachael/Desktop/darts-thesis'

# Set the ID(s) of the player skill models you want to use 
    # 10 - symmetric DB skill model that we were using up until now 
    # ? - "average pro" skill model generated by H & W 
    
playerID_list = [10]
#epsilon_list = [1,2,3,4,5,6,7,8,9] # This will suffice for most analysis files
epsilon_list = [1,2,3,4,5,6,7,8,9,25,50,100] # Use this for full coverage (including policy heatmapts)
#epsilon_list = [1,2,3,4,5,6,7,8,9,10,12,14,16,18,20,32,64,128,25,50,100] #Full list tried 

# 2.0 Notebook Setup

Import libraries and run prerequisites.

In [7]:
# Import and configure modules from this project

import os

os.chdir(project_directory)

import function_helpers_singleplayer as h 
import function_init_board as fb 
import function_init_simple_mdp as imdp
import function_tool as ft 

data_parameter_dir = fb.data_parameter_dir
result_dir = './result/singlegame_results'  


In [8]:
# Import and configure external modules 

import pandas as pd 

import numpy as np
np.set_printoptions(precision=4)
np.set_printoptions(linewidth=300)
np.set_printoptions(threshold=300)

import torch
torch.set_printoptions(precision=4)
torch.set_printoptions(linewidth=300)
torch.set_printoptions(threshold=300)

# This makes the code output easier to read
# Can comment out if you want to see warnings 
import warnings
warnings.filterwarnings("ignore")


### <font color="darkred">**Notice: Loading Skill Models** </font> 

If you have not run the file `analysis/1_analysis_singleplayer_noturn/1_0_generate_noturn_data.ipynb` then will need to run the two cells below to generate player skill models. Otherwise, can skip these cells. 

In [None]:
# Integrate Skill Models for Desired Players to Get Transition Probabilities

for player in playerID_list:

    for epsilon in epsilon_list: 

        # Path of data file 
        data_path = project_directory + '/data_parameter/player_gaussin_fit/grid_full/e{}_player{}_gaussin_prob_grid.pkl'.format(epsilon,player)
        
    # Generate if it does not yet exist
    if not(os.path.exists(data_path)):

        h.evaluate_score_probability([player],epsilon=epsilon,f_density_grid_pixel_per_mm=0.5)

In [None]:
# Reconfigure and Save Transition Probabilities

#################################
## Token Transition Probabilities 
#################################

# Path of data file 
data_path = project_directory + '/data_parameter/player_gaussin_fit/grid_custom_tokens/player10_gaussin_prob_grid_custom_tokens.pkl'

# Generate if it does not yet exist
if not(os.path.exists(data_path)):
    h.save_aiming_grid_custom_tokens()

#####################################
## No-Token Transition Probabilities 
#####################################

for player in playerID_list:

    for epsilon in epsilon_list: 

        # Path of data file 
        path = project_directory + '/data_parameter/player_gaussin_fit/grid_custom_no_tokens/player{}_e{}_gaussin_prob_grid_custom_no_tokens.pkl'.format(player,epsilon)

        # Generate if it does not yet exist
        if not(os.path.exists(data_path)):

            h.save_aiming_grid_custom_no_tokens([player],epsilon=epsilon)

# 3.0 Single Player Sandbox

Can alter the parameters and examine results for a single player here.

In [6]:
## Solve for Single Case

###################################################
# Inputs  
###################################################
player_num = 10 
epsilon = 1
tokens = 9

###################################################

# Reformat player name
name_pa = 'player{}'.format(player_num)

# Load skill model 
[aiming_grid, prob_grid_normalscore_nt, prob_grid_singlescore_nt, prob_grid_doublescore_nt, prob_grid_triplescore_nt, prob_grid_bullscore_nt] = h.load_aiming_grid(name_pa, epsilon=epsilon, data_parameter_dir=data_parameter_dir, grid_version='custom_no_tokens')
[aiming_grid, prob_grid_normalscore_t, prob_grid_singlescore_t, prob_grid_doublescore_t, prob_grid_triplescore_t, prob_grid_bullscore_t] = h.load_aiming_grid('t', data_parameter_dir=data_parameter_dir, grid_version='custom_tokens')
prob_grid_doublescore_dic = None
prob_grid_doublescore_dic_t = None

# Solve the model and get the results in a dictionary
result_dic = h.solve_dp_turn_tokens(tokens, aiming_grid, prob_grid_normalscore_nt, prob_grid_singlescore_nt, prob_grid_doublescore_nt, prob_grid_triplescore_nt, prob_grid_bullscore_nt,prob_grid_normalscore_t, prob_grid_singlescore_t, prob_grid_doublescore_t, prob_grid_triplescore_t, prob_grid_bullscore_t)


load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_no_tokens/player10_e1_gaussin_prob_grid_custom_no_tokens.pkl
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_tokens/t_gaussin_prob_grid_custom_tokens.pkl
solve prob_policy_transit in 2.8311071395874023 seconds
solve dp_turn_policyiter in 28.98804998397827 seconds
[[0.     0.     1.8856 ... 5.5944 5.5758 5.6116]
 [0.     0.     1.     ... 4.9684 4.9631 4.9765]
 [0.     0.     1.     ... 4.7444 4.7403 4.7516]
 ...
 [0.     0.     1.     ... 3.8073 3.8027 3.8082]
 [0.     0.     1.     ... 3.5561 3.5447 3.5554]
 [0.     0.     1.     ... 3.     3.     3.    ]]


**Description of Output**

The output above contains the abbreviated value function for the solved MDP. The rows correspond to the number of dynamic credits a player has, the columns correspond to their score (from 0 on the left to 501 on the right). 

-----

Can uncomment the cell below to see the policy and value functions in more detail. Both the optimal values and policies are indexed as follows : score (from 2 to 501) -> remaining throws in turn (3,2,1) --> credits --> score gained in turn. 

For exmaple, below is the code to get the optimal action at the following state:
* Player is at a score of 78
* Player has 2 throws remaining in their turn
* Player has 1 credit remaining 
* Player has a score gained of 16 in their turn so far. 

`result_dic['optimal_action_index_dic'][78][2][1][16]`

This will give you the index of the optimal action. To get the actual action name, you can look up the index in the action list object `a_list` in the MDP initialization module as is illustrated below. 

`optimal_action_index = result_dic['optimal_action_index_dic'][78][2][1][16]`

`imdp.a_list[optimal_action_index]`


In [18]:
# Can uncomment the code below to see the optimal values and policy in more detail 
# print('------------------------------------------------------------------')
# print('Optimal Values: ')
# print(result_dic['optimal_value_dic'])
# print('------------------------------------------------------------------')
# print('Optimal Policy}: ')
# print(result_dic['optimal_action_index_dic'])


# 4.0 Generate Dataset

Solve model for different epsilons and store optimal values and policy in a data table.

## 4.1 Generate .csv for Analysis

In [9]:
# Helper function to deconstruct the solved MDPs and reformat as a dataframe 
def create_result_dataframes(player_num,result_dic,epsilon):
    
    col_names = ['player','epsilon','tokens','score','remaining_throws','score_gained','optimal_value','optimal_policy']
    df_list = []

    for s in  result_dic['optimal_value_dic'].keys():
        for rt in range(1,4):
            for t in range(0,result_dic['optimal_value_dic'][s][rt].shape[0]):
                for u in range(0,result_dic['optimal_value_dic'][s][rt].shape[1]):
                    optimal_value = result_dic['optimal_value_dic'][s][rt][t][u]
                    if optimal_value == 1000:
                        continue
                    else:
                        optimal_policy = result_dic['optimal_action_index_dic'][s][rt][t][u]
                        df_list.append([player_num,epsilon,t,s,rt,u,optimal_value,optimal_policy])

    df = pd.DataFrame(df_list,columns=col_names)
        
    return df 


In [11]:
# Run Experiments
col_names = ['player','epsilon','tokens','score','remaining_throws','score_gained','optimal_value','optimal_policy']
df = pd.DataFrame(columns=col_names)
tokens = 9

for player_num in playerID_list: 

    name_pa = 'player{}'.format(player_num)

    [aiming_grid, prob_grid_normalscore_t, prob_grid_singlescore_t, prob_grid_doublescore_t, prob_grid_triplescore_t, prob_grid_bullscore_t] = h.load_aiming_grid('t', data_parameter_dir=data_parameter_dir, grid_version='custom_tokens')

    for epsilon in epsilon_list:
        [aiming_grid, prob_grid_normalscore_nt, prob_grid_singlescore_nt, prob_grid_doublescore_nt, prob_grid_triplescore_nt, prob_grid_bullscore_nt] = h.load_aiming_grid(name_pa, epsilon=epsilon, data_parameter_dir=data_parameter_dir, grid_version='custom_no_tokens')
        result_dic = h.solve_dp_turn_tokens(tokens, aiming_grid, prob_grid_normalscore_nt, prob_grid_singlescore_nt, prob_grid_doublescore_nt, prob_grid_triplescore_nt, prob_grid_bullscore_nt,prob_grid_normalscore_t, prob_grid_singlescore_t, prob_grid_doublescore_t, prob_grid_triplescore_t, prob_grid_bullscore_t)
        df = pd.concat([df,create_result_dataframes(player_num,result_dic,epsilon)])
        df = df.reset_index().drop(['index'],axis=1)

load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_tokens/t_gaussin_prob_grid_custom_tokens.pkl
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_no_tokens/player10_e1_gaussin_prob_grid_custom_no_tokens.pkl
solve prob_policy_transit in 2.8373711109161377 seconds
solve dp_turn_policyiter in 29.295045852661133 seconds
[[0.     0.     1.8856 ... 5.5944 5.5758 5.6116]
 [0.     0.     1.     ... 4.9684 4.9631 4.9765]
 [0.     0.     1.     ... 4.7444 4.7403 4.7516]
 ...
 [0.     0.     1.     ... 3.8073 3.8027 3.8082]
 [0.     0.     1.     ... 3.5561 3.5447 3.5554]
 [0.     0.     1.     ... 3.     3.     3.    ]]
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_no_tokens/player10_e2_gaussin_prob_grid_custom_no_tokens.pkl
solve prob_policy_transit in 3.7609472274780273 seconds
solve dp_turn_policyiter in 35.747639179229736 seconds
[[0.     0.     2.5371 ... 7.405  7.4122 7.4278]
 [0.     0.     1.     ... 6.2669 6.2766 6.2896]
 [0.     0.     1.

In [12]:
# Save dataset
result_path = result_dir + '/player10_turn_results.csv'
df.to_csv(result_path)

## 4.2 Generate .pkl files to be called by two-player models

In [26]:
for player_num in playerID_list: 

    name_pa = 'player{}'.format(player_num)

    [aiming_grid, prob_grid_normalscore_t, prob_grid_singlescore_t, prob_grid_doublescore_t, prob_grid_triplescore_t, prob_grid_bullscore_t] = h.load_aiming_grid('t', data_parameter_dir=data_parameter_dir, grid_version='custom_tokens')

    for epsilon in epsilon_list:

        h.solve_singlegame_token(name_pa, epsilon, tokens=9, data_parameter_dir=fb.data_parameter_dir, result_dir='result', postfix='')


load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_tokens/t_gaussin_prob_grid_custom_tokens.pkl
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_no_tokens/player10_e1_gaussin_prob_grid_custom_no_tokens.pkl
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_tokens/t_gaussin_prob_grid_custom_tokens.pkl
runing solve_dp_turn with credits
solve prob_policy_transit in 2.823310136795044 seconds
solve dp_turn_policyiter in 29.23418664932251 seconds
[[0.     0.     1.8856 ... 5.5944 5.5758 5.6116]
 [0.     0.     1.     ... 4.9684 4.9631 4.9765]
 [0.     0.     1.     ... 4.7444 4.7403 4.7516]
 ...
 [0.     0.     1.     ... 3.8073 3.8027 3.8082]
 [0.     0.     1.     ... 3.5561 3.5447 3.5554]
 [0.     0.     1.     ... 3.     3.     3.    ]]
dump_pickle to result/singlegame_results/singlegame_player10_e1_turn_tokens.pkl
load_pickle from ./data_parameter/player_gaussin_fit/grid_custom_no_tokens/player10_e2_gaussin_prob_grid_custom_no_tokens.pkl
load_