In [1]:
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import StratifiedGroupKFold

***

In [2]:
df_train = pd.read_csv('../data/raw/train.csv')

df_train['Game'] = df_train['LudRules'].apply(lambda x: x.split('"')[1])

df_train["utility_agent1_rank"] = (
    df_train["utility_agent1"].rank(method='dense', ascending=True).astype(int)
)

***
### stratify by `GameRulesetName`

In [None]:
group_col = "GameRulesetName"
y_col = "utility_agent1_rank"
num_folds = 5

cv_random_state = 2112

gkf1 = StratifiedGroupKFold(
    n_splits=num_folds, 
    random_state=cv_random_state, 
    shuffle=True
)
split1 = list(gkf1.split(
    df_train, 
    groups=df_train[group_col],
    y=df_train[y_col]
))

with open('./splits/cv1_GameRulesetName.pkl', 'wb') as f:
    pickle.dump(split1, f)

In [None]:
cv_random_state = 19

gkf2 = StratifiedGroupKFold(
    n_splits=num_folds, 
    random_state=cv_random_state, 
    shuffle=True
)
split2 = list(gkf2.split(
    df_train, 
    groups=df_train[group_col],
    y=df_train[y_col]
))

with open('./splits/cv2_GameRulesetName.pkl', 'wb') as f:
    pickle.dump(split2, f)

In [None]:
cv_random_state = 23

gkf3 = StratifiedGroupKFold(
    n_splits=num_folds, 
    random_state=cv_random_state, 
    shuffle=True
)
split3 = list(gkf3.split(
    df_train, 
    groups=df_train[group_col],
    y=df_train[y_col]
))

with open('./splits/cv3_GameRulesetName.pkl', 'wb') as f:
    pickle.dump(split3, f)

***
### stratify by `Game`

In [3]:
df_train['Game'].value_counts()

Game
Ludus Coriovalli       24626
Ludus Latrunculorum     5390
58 Holes                4550
Ratio                   2516
Senet                   1688
                       ...  
Dice Chess                76
Puluc                     74
Lange Puff                74
Bheri Bakhri              72
Faraday                    4
Name: count, Length: 1030, dtype: int64

In [4]:
common_idx = df_train.query("Game == 'Ludus Coriovalli'").index
_df_train = df_train.drop(common_idx)
_df_train

Unnamed: 0,Id,GameRulesetName,agent1,agent2,Properties,Format,Time,Discrete,Realtime,Turns,...,PlayoutsPerSecond,MovesPerSecond,EnglishRules,LudRules,num_wins_agent1,num_draws_agent1,num_losses_agent1,utility_agent1,Game,utility_agent1_rank
0,0,00Y,MCTS-ProgressiveHistory-0.1-MAST-false,MCTS-ProgressiveHistory-0.6-Random200-false,1,1,1,1,0,1,...,298.07,18877.17,Goal: Connect all three edge colors with a sin...,"(game ""00'Y'"" (players 2) (equipment { (board ...",4,0,11,-0.466667,00'Y',18
1,1,00Y,MCTS-ProgressiveHistory-0.1-MAST-false,MCTS-UCB1GRAVE-0.6-NST-true,1,1,1,1,0,1,...,298.07,18877.17,Goal: Connect all three edge colors with a sin...,"(game ""00'Y'"" (players 2) (equipment { (board ...",5,0,10,-0.333333,00'Y',23
2,2,00Y,MCTS-ProgressiveHistory-0.1-MAST-true,MCTS-UCB1-0.1-NST-false,1,1,1,1,0,1,...,298.07,18877.17,Goal: Connect all three edge colors with a sin...,"(game ""00'Y'"" (players 2) (equipment { (board ...",7,0,8,-0.066667,00'Y',31
3,3,00Y,MCTS-ProgressiveHistory-0.1-MAST-true,MCTS-UCB1-0.6-NST-false,1,1,1,1,0,1,...,298.07,18877.17,Goal: Connect all three edge colors with a sin...,"(game ""00'Y'"" (players 2) (equipment { (board ...",5,0,10,-0.333333,00'Y',23
4,4,00Y,MCTS-ProgressiveHistory-0.1-MAST-true,MCTS-UCB1GRAVE-1.41421356237-NST-false,1,1,1,1,0,1,...,298.07,18877.17,Goal: Connect all three edge colors with a sin...,"(game ""00'Y'"" (players 2) (equipment { (board ...",5,0,10,-0.333333,00'Y',23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
233229,233229,Zuz_Mel_7x7,MCTS-UCB1Tuned-1.41421356237-NST-false,MCTS-ProgressiveHistory-1.41421356237-Random20...,1,1,1,1,0,1,...,157.52,157174.58,7x7 board. 24 pieces per player. Pieces begin ...,"(game ""Zuz Mel (7x7)"" (players 2) (equipment {...",2,0,13,-0.733333,Zuz Mel (7x7),10
233230,233230,Zuz_Mel_7x7,MCTS-UCB1Tuned-1.41421356237-Random200-false,MCTS-UCB1-0.6-MAST-false,1,1,1,1,0,1,...,157.52,157174.58,7x7 board. 24 pieces per player. Pieces begin ...,"(game ""Zuz Mel (7x7)"" (players 2) (equipment {...",9,1,5,0.266667,Zuz Mel (7x7),43
233231,233231,Zuz_Mel_7x7,MCTS-UCB1Tuned-1.41421356237-Random200-false,MCTS-UCB1GRAVE-1.41421356237-NST-false,1,1,1,1,0,1,...,157.52,157174.58,7x7 board. 24 pieces per player. Pieces begin ...,"(game ""Zuz Mel (7x7)"" (players 2) (equipment {...",11,3,1,0.666667,Zuz Mel (7x7),57
233232,233232,Zuz_Mel_7x7,MCTS-UCB1Tuned-1.41421356237-Random200-false,MCTS-UCB1GRAVE-1.41421356237-NST-true,1,1,1,1,0,1,...,157.52,157174.58,7x7 board. 24 pieces per player. Pieces begin ...,"(game ""Zuz Mel (7x7)"" (players 2) (equipment {...",24,2,4,0.666667,Zuz Mel (7x7),57


In [None]:
group_col = "Game"
y_col = "utility_agent1_rank"
num_folds = 5

cv_random_state = 1992

gkf1 = StratifiedGroupKFold(
    n_splits=num_folds, 
    random_state=cv_random_state, 
    shuffle=True
)
_split1 = list(gkf1.split(
    _df_train, 
    groups=_df_train[group_col],
    y=_df_train[y_col]
))

split1 = list()
for _train_idx, _valid_idx in _split1:

    train_idx = _df_train.iloc[_train_idx].index.values
    valid_idx = _df_train.iloc[_valid_idx].index.values
    
    train_idx = np.concatenate([train_idx, common_idx.values])
    split1.append((train_idx, valid_idx))

with open('./splits/cv1_Game.pkl', 'wb') as f:
    pickle.dump(split1, f)

In [None]:
cv_random_state = 1984

gkf2 = StratifiedGroupKFold(
    n_splits=num_folds, 
    random_state=cv_random_state, 
    shuffle=True
)
_split2 = list(gkf2.split(
    _df_train, 
    groups=_df_train[group_col],
    y=_df_train[y_col]
))

split2 = list()
for _train_idx, _valid_idx in _split2:

    train_idx = _df_train.iloc[_train_idx].index.values
    valid_idx = _df_train.iloc[_valid_idx].index.values
    
    train_idx = np.concatenate([train_idx, common_idx.values])
    split2.append((train_idx, valid_idx))

with open('./splits/cv2_Game.pkl', 'wb') as f:
    pickle.dump(split2, f)

In [None]:
cv_random_state = 2113

gkf3 = StratifiedGroupKFold(
    n_splits=num_folds, 
    random_state=cv_random_state, 
    shuffle=True
)
_split3 = list(gkf3.split(
    _df_train, 
    groups=_df_train[group_col],
    y=_df_train[y_col]
))

split3 = list()
for _train_idx, _valid_idx in _split3:

    train_idx = _df_train.iloc[_train_idx].index.values
    valid_idx = _df_train.iloc[_valid_idx].index.values
    
    train_idx = np.concatenate([train_idx, common_idx.values])
    split3.append((train_idx, valid_idx))

with open('./splits/cv3_Game.pkl', 'wb') as f:
    pickle.dump(split3, f)


In [6]:
group_col = "Game"
y_col = "utility_agent1_rank"
num_folds = 5

cv_random_state = 3112

gkf4 = StratifiedGroupKFold(
    n_splits=num_folds, 
    random_state=cv_random_state, 
    shuffle=True
)
_split4 = list(gkf4.split(
    _df_train, 
    groups=_df_train[group_col],
    y=_df_train[y_col]
))

split4 = list()
for _train_idx, _valid_idx in _split4:

    train_idx = _df_train.iloc[_train_idx].index.values
    valid_idx = _df_train.iloc[_valid_idx].index.values
    
    train_idx = np.concatenate([train_idx, common_idx.values])
    split4.append((train_idx, valid_idx))

with open('./splits/cv4_Game.pkl', 'wb') as f:
    pickle.dump(split4, f)



***