# Create params_with_metrics
```
input: paramsMatrix.csv
output: params_with_metrics.csv
```
Add these columns to params matrix:
- Feature drop rate
- Heuristic quality
- Planning depth
- ELO rating

In [1]:
import numpy as np
import pandas as pd
from fourinarowfunctions import *

%load_ext autoreload
%autoreload 2

Provide the directory where the data is kept

In [2]:
direc = '../data/'

Read parameters and calculate feature drop rate and heuristic quality

In [3]:
df = pd.read_csv(direc + 'paramsMatrix.csv')
df = df.drop("Unnamed: 0", axis = 1)
params = np.array([expand_params(r) for r in df.values[:,-10:].astype(float)])

df['feature drop rate'] = params[:,-1]
df['heuristic quality'] = np.apply_along_axis(get_heuristic_quality,1,params)

We compute planning depth on the cluster. First, export the parameters

In [4]:
np.savetxt(direc + 'params.txt', params)

Create a directory on the cluster and move params.txt into it. Edit the compute_planning_depth.sh shell script so that the directory is correct, and change the number of jobs to the number of lines in params.txt (in this case 370). Run the script, then zip the depth folder with ```zip -r depth.zip depth```, move it back to your local computer and unzip in the directory where paramsMatrix.csv is stored. 

In [5]:
depth = np.vstack([np.loadtxt(direc + f"depth/depth_fourinarow-dev_{i // 5}_{i % 5 + 1}.txt") for i in range(len(params))])
df['planning depth'] = depth

# Calculate Elo
Run only one of the two cells below depending on how your game data is store. Then continue with the "Finish adding elo" section.

## Matlabe style *.tsv
Run the cell below only if you have *.tsv files in the format Matlab uses

In [6]:
games = load_tabular(direc + "generated_games/*.tsv")
# create dataframe
forEloData = []
for game in games:
    first_row = game.iloc[0]
    name = first_row.player_name # Extract player name and opponent level
    forEloData.append([name, name.split("_vs_")[1], first_row.player_color.lower(), get_tabular_outcome(game)])
forElo = pd.DataFrame(data = forEloData, columns = ['subject', 'level', 'user_color', 'outcome'])
print(f"Elo data for {len(pd.unique(forElo['subject']))} subjects")

Loaded 37 games from ../data/generated_games\Age0Sub200_vs_Level110.tsv
Loaded 37 games from ../data/generated_games\Age0Sub200_vs_Level118.tsv
Loaded 37 games from ../data/generated_games\Age0Sub200_vs_Level92.tsv
Loaded 37 games from ../data/generated_games\Age1Sub117_vs_Level110.tsv
Loaded 37 games from ../data/generated_games\Age1Sub117_vs_Level118.tsv
Loaded 37 games from ../data/generated_games\Age1Sub117_vs_Level92.tsv
Loaded 37 games from ../data/generated_games\Age2Sub1099_vs_Level110.tsv
Loaded 37 games from ../data/generated_games\Age2Sub1099_vs_Level118.tsv
Loaded 37 games from ../data/generated_games\Age2Sub1099_vs_Level92.tsv
Elo data for 9 subjects


## Psiturk style *.csv
Run the cell below only if you have a *.csv from Psiturk (probably "trialdata.csv")

In [None]:
data = load_data(direc + "trialdata.csv")

# Compute an array with 1 row for every game.
# Columns are username, game nr, opponent category, opponent level, player color, result
# The result column is 1 = player won, -1 = opponent won, 0 = draw
def categoryHist(username, data):
    table = np.empty([0,6])
    category = 2
    for event in data:
        if event['event_type'] == 'user move':
            color = event['event_info']['user_color']
        if event['event_type'] == 'adjust level':
            category = event['event_info']['category']
        if event['event_type'] == 'end game':
            result = event['event_info']['result']
            resultCode = 1 if result == 'win' else -1 if result == 'opponent win' else 0
            table = np.vstack((table, [username, event['event_info']['game_num'], int(category), event['event_info']['level'], color, resultCode]))
    return table

# create dataframe
forEloData = np.empty([0,6]) 
for username, userdata in data.items():
    forEloData = np.vstack((forEloData, categoryHist(username, userdata)))
forElo = pd.DataFrame(data = forEloData, columns = ['subject', 'gameNumber', 'category', 'level', 'user_color', 'outcome'])
forElo["outcome"] = forElo["outcome"].astype(int)
print(f"Elo data for {len(pd.unique(forElo['subject']))} subjects")

## Finish adding elo
Run the cells below regardless of how you loaded your game data. From this cell down the processing of matlab and psiturk files is the same.

In [7]:
# Convert the forElo to an array with one row per game.
# The columns are Black player ID, White player ID, Winner.
# Player IDs are subject ID for the player color and level for an AI player.
# Outcome is 1 = black player won, -1 = white player won, 0 = draw
def to_pgn(row):
    subject = row['subject'].split(':')[1] if ":" in row['subject'] else row['subject']
    level = str(row['level'])
    return [subject, level, row['outcome']] if row['user_color']=='black' else [level, subject, -row['outcome']]
results = [to_pgn(row) for _,row in forElo.iterrows()]
create_bayeselo_input(results, direc + 'games.pgn')

In [8]:
bayeselo_direc = './'

elo = run_bayeselo(bayeselo_direc,[direc + 'tournament_results_short.pgn', direc + 'games.pgn'])
elo = {key : val for key,val in elo.items() if not key.isdigit()}
df["elo"] = df["subject"].apply(lambda x: x.split(":")[1] if ":" in x else x).map(elo)

All the metrics are in the datafrome now. Let's save it.

In [9]:
df.to_csv(direc + 'params_with_metrics.csv', index = False)