In [None]:
#assumptions that will break if we use MCTS for play traces in this code
#1. Turn wont be either 0 or 1
#2. Cant just copy the final two rows to extend round number to maxNoOfRounds

#comments: So play traces for a given strategy will change dependent on the opponent's strategy, e.g. evolution of 
#bigmoney deck is different if it is played vs another bigmoney startegy as opposed to say a witch strategy which will 
#introduce curse cards into the bigmoney trace

In [2]:
import pandas as pd
import pdb
import math
import matplotlib.pyplot as plt
from sklearn.manifold import MDS 
import itertools
pd.set_option('display.max_rows', 100)
noPlayers = 2

In [3]:
data  = pd.read_csv("data/featureslogfile.txt", sep = '\t')

In [4]:
print(data.columns)

Index(['GameID', 'Player', 'Round', 'Turn', 'CurrentScore', 'SCORE',
       'SCORE_ADV', 'ORDINAL', 'OUR_TURN', 'HAS_WON', 'FINAL_ORD', 'ROUND',
       'CURSE', 'ESTATE', 'DUCHY', 'PROVINCE', 'COPPER', 'SILVER', 'GOLD',
       'CELLAR', 'CHAPEL', 'MOAT', 'HARBINGER', 'MERCHANT', 'VASSAL',
       'VILLAGE', 'WORKSHOP', 'BUREAUCRAT', 'GARDENS', 'MILITIA',
       'MONEYLENDER', 'POACHER', 'REMODEL', 'SMITHY', 'THRONE_ROOM', 'BANDIT',
       'COUNCIL_ROOM', 'FESTIVAL', 'LABORATORY', 'LIBRARY', 'MARKET', 'MINE',
       'SENTRY', 'WITCH', 'ARTISAN', 'PlayerCount', 'TotalRounds',
       'ActionScore', 'Win', 'Ordinal', 'FinalScore'],
      dtype='object')


In [5]:
#check number of games
print(data['GameID'].unique())

[ 2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17. 18. 19.
 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35. 36. 37.
 38. 39. 40. 41. 42. 43. 44. 45. 46. 47. 48. 49. 50. 51.]


In [6]:
#note that Player field swaps round half way through the tournament when there are two players. Also the GameID starts
#at 2 for some reason
noGamesPerMatchUp = 25
def assignName(gameID, player, noGamesPerMatchUp):
    name = ""
    if gameID < (noGamesPerMatchUp + 2):
        if player == 0:
            name = 'BigMoneyWithGardens'
        else:
            name = 'DoubleWitch'
    else:
        if player == 1:
            name = 'BigMoneyWithGardens'
        else:
            name = 'DoubleWitch'
    return name

In [7]:
#add label for player types
data['PlayerName'] = data.apply(lambda df: assignName(df['GameID'], df['Player'], noGamesPerMatchUp), axis = 1)

In [8]:
#we can only record events each turn so we need to use the final turn in each round for each player to determine their
#deck at the end of the round
print(data['Turn'].unique())
data = data[data['Turn'] == 1]
print(data['Turn'].unique())

[0. 1.]
[1.]


In [9]:
#extract columns of interest 
card_types_in_supply = ['ARTISAN', 'BANDIT', 'BUREAUCRAT', 'CHAPEL', 'FESTIVAL', 'GARDENS', 'SENTRY',
                        'THRONE ROOM', 'WITCH', 'CURSE','WORKSHOP','PROVINCE', 'DUCHY', 'ESTATE', 'GOLD', 'SILVER', 'COPPER']
card_types_BigMoneyWithGardens_vs_DoubleWitch = ['GARDENS', 'WITCH', 'CURSE', 'PROVINCE', 'DUCHY', 'ESTATE', 'GOLD', 'SILVER', 'COPPER']
cols = ['PlayerName', 'Player', 'GameID', 'Round', 'CurrentScore']
cols = cols + card_types_BigMoneyWithGardens_vs_DoubleWitch

In [10]:
#all play taces in a single data frame
traces = data.loc[:, cols]
print(traces)

               PlayerName  Player  GameID  Round  CurrentScore  GARDENS  \
2     BigMoneyWithGardens     0.0     2.0    0.0           3.0      0.0   
3             DoubleWitch     1.0     2.0    0.0           3.0      0.0   
6     BigMoneyWithGardens     0.0     2.0    1.0           3.0      0.0   
7             DoubleWitch     1.0     2.0    1.0           3.0      0.0   
10    BigMoneyWithGardens     0.0     2.0    2.0           3.0      0.0   
...                   ...     ...     ...    ...           ...      ...   
4345  BigMoneyWithGardens     1.0    51.0   16.0           4.0      0.0   
4348          DoubleWitch     0.0    51.0   17.0          21.0      0.0   
4349  BigMoneyWithGardens     1.0    51.0   17.0          10.0      0.0   
4352          DoubleWitch     0.0    51.0   18.0          27.0      0.0   
4353  BigMoneyWithGardens     1.0    51.0   18.0          15.0      0.0   

      WITCH  CURSE  PROVINCE  DUCHY  ESTATE  GOLD  SILVER  COPPER  
2       0.0    0.0       0.0   

In [11]:
#we need to make sure all games have the same number of rounds in them
gameLengths = traces.groupby(['GameID'])['Round'].max()
maxNoOfRounds = int(gameLengths.max()) + 1 #round counter starts at zero
print(maxNoOfRounds)

25


In [12]:
def copy_final_deck_at_game_end(group, roundMax, noPlayers):
    #This function repeatedly copies the final decks of two players at the game end, so that the game is extended to 
    #have roundMax rounds
    final_round = int(group['Round'].max())
    if (roundMax-1) == final_round:
        #in this case we dont need to extend the play trace
        return group
    else:
        final_row_copy = pd.concat([group.iloc[-noPlayers:]] * ((roundMax-1) - final_round), ignore_index=True)
        #we need to update the Round counter so that every other row it increments by one
        final_row_copy['Round'] = [final_round + 1 + i // 2 for i in range(((roundMax-1) - final_round)*2)]
        return pd.concat([group, final_row_copy], ignore_index=True)

In [13]:
extended_traces = traces.groupby('GameID').apply(copy_final_deck_at_game_end, maxNoOfRounds, noPlayers = 2).reset_index(drop = True)
print(extended_traces)

               PlayerName  Player  GameID  Round  CurrentScore  GARDENS  \
0     BigMoneyWithGardens     0.0     2.0    0.0           3.0      0.0   
1             DoubleWitch     1.0     2.0    0.0           3.0      0.0   
2     BigMoneyWithGardens     0.0     2.0    1.0           3.0      0.0   
3             DoubleWitch     1.0     2.0    1.0           3.0      0.0   
4     BigMoneyWithGardens     0.0     2.0    2.0           3.0      0.0   
...                   ...     ...     ...    ...           ...      ...   
2495  BigMoneyWithGardens     1.0    51.0   22.0          15.0      0.0   
2496          DoubleWitch     0.0    51.0   23.0          27.0      0.0   
2497  BigMoneyWithGardens     1.0    51.0   23.0          15.0      0.0   
2498          DoubleWitch     0.0    51.0   24.0          27.0      0.0   
2499  BigMoneyWithGardens     1.0    51.0   24.0          15.0      0.0   

      WITCH  CURSE  PROVINCE  DUCHY  ESTATE  GOLD  SILVER  COPPER  
0       0.0    0.0       0.0   

In [14]:
#next we need to flatten this dataframe so that each trace is a single row.
#Note we drop the current score as we dont need it for now. We also drop the round label as it is redundant
#and it will get reintroduced when flattening through the revised column names
traces_tmp = extended_traces.drop(['CurrentScore', 'Round'], axis = 1)
cols = [card_types_BigMoneyWithGardens_vs_DoubleWitch[i] + "_R" + str(r) 
        for r in range(0, maxNoOfRounds) for i in range(0, len(card_types_BigMoneyWithGardens_vs_DoubleWitch))] 

extended_traces_flat = traces_tmp.groupby(['PlayerName', 'Player', 'GameID']).apply(lambda df: df[card_types_BigMoneyWithGardens_vs_DoubleWitch].values.flatten())
extended_traces_flat = pd.DataFrame(extended_traces_flat, columns = ['Trace']).reset_index()
extended_traces_flat = pd.concat([extended_traces_flat[['PlayerName', 'Player', 'GameID']], extended_traces_flat['Trace'].apply(pd.Series)], axis=1)
extended_traces_flat.columns = ['PlayerName', 'Player', 'GameID'] + cols
print(extended_traces_flat)

             PlayerName  Player  GameID  GARDENS_R0  WITCH_R0  CURSE_R0  \
0   BigMoneyWithGardens     0.0     2.0         0.0       0.0       0.0   
1   BigMoneyWithGardens     0.0     3.0         0.0       0.0       0.0   
2   BigMoneyWithGardens     0.0     4.0         0.0       0.0       0.0   
3   BigMoneyWithGardens     0.0     5.0         0.0       0.0       0.0   
4   BigMoneyWithGardens     0.0     6.0         0.0       0.0       0.0   
5   BigMoneyWithGardens     0.0     7.0         0.0       0.0       0.0   
6   BigMoneyWithGardens     0.0     8.0         0.0       0.0       0.0   
7   BigMoneyWithGardens     0.0     9.0         0.0       0.0       0.0   
8   BigMoneyWithGardens     0.0    10.0         0.0       0.0       0.0   
9   BigMoneyWithGardens     0.0    11.0         0.0       0.0       0.0   
10  BigMoneyWithGardens     0.0    12.0         0.0       0.0       0.0   
11  BigMoneyWithGardens     0.0    13.0         0.0       0.0       0.0   
12  BigMoneyWithGardens  

In [15]:
#check a couple of traces to make sure flattened and extended trace dataframes agree
player = 1
game = 48
card = 'GARDENS'
card_cols = [card + "_R" + str(r) for r in range(0, maxNoOfRounds)]
trace_1 = extended_traces[(extended_traces['Player'] == player) & (extended_traces['GameID'] == game)]
print(trace_1[card])
trace_2 = extended_traces_flat[(extended_traces_flat['Player'] == player) & (extended_traces_flat['GameID'] == game)]
print(trace_2[card_cols])

2301    0.0
2303    0.0
2305    0.0
2307    0.0
2309    0.0
2311    0.0
2313    0.0
2315    0.0
2317    0.0
2319    0.0
2321    0.0
2323    0.0
2325    0.0
2327    0.0
2329    0.0
2331    0.0
2333    0.0
2335    0.0
2337    1.0
2339    1.0
2341    1.0
2343    1.0
2345    1.0
2347    1.0
2349    1.0
Name: GARDENS, dtype: float64
    GARDENS_R0  GARDENS_R1  GARDENS_R2  GARDENS_R3  GARDENS_R4  GARDENS_R5  \
46         0.0         0.0         0.0         0.0         0.0         0.0   

    GARDENS_R6  GARDENS_R7  GARDENS_R8  GARDENS_R9  ...  GARDENS_R15  \
46         0.0         0.0         0.0         0.0  ...          0.0   

    GARDENS_R16  GARDENS_R17  GARDENS_R18  GARDENS_R19  GARDENS_R20  \
46          0.0          0.0          1.0          1.0          1.0   

    GARDENS_R21  GARDENS_R22  GARDENS_R23  GARDENS_R24  
46          1.0          1.0          1.0          1.0  

[1 rows x 25 columns]


In [16]:
noOfGames = extended_traces['GameID'].unique().size
noOfOffDiagGameCombos = int(noOfGames *(noOfGames -1)/2.0) #for each off diag set of games there are 4 combinations of
#player strategies (e.g. for games 2 and 3, we can compare bigmoney vs bigmoney, bigmoney vs singlewitch etc)
#for 'diagonal' set of games there is just the one trace comparison available
noOfPairWiseDistances = noOfOffDiagGameCombos * 4 + noOfGames
print(noOfPairWiseDistances)

4950


In [None]:
#next we need to collect together all pairwise combinations of traces so that we can compute a distance measure

# Step 1: Create a list of DataFrames for each 'off diagonal' combination of games
unique_values = extended_traces['GameID'].unique()
offdiag_combinations = list(itertools.combinations(unique_values, 2))
diag_combos = [(i,i) for i in unique_values]
combos = offdiag_combinations + diag_combos

list_dfs = []
for combination in combos:
    #extract traces for each game ID and strategy combination
    GameID_1, GameID_2 = combination
    keys = []
    if GameID_1 != GameID_2:
        keys = [((GameID_1, 'BigMoneyWithGardens'), (GameID_2, 'BigMoneyWithGardens')),
                ((GameID_1, 'BigMoneyWithGardens'), (GameID_2, 'DoubleWitch')),
                ((GameID_1, 'DoubleWitch'), (GameID_2, 'BigMoneyWithGardens')),
                ((GameID_1, 'DoubleWitch'), (GameID_2, 'DoubleWitch'))]
    else:
        keys = [((GameID_1, 'BigMoneyWithGardens'), (GameID_1, 'DoubleWitch'))]
             
    for key in keys:
        key1, key2 = key
        game1,player1 = key1
        game2,player2 = key2
        
        #get data for key 1
        df_A = extended_traces[(extended_traces['GameID'] == game1) & (extended_traces['PlayerName'] == player1)]
        df_1 = df_A.rename(columns={'PlayerName' : 'PlayerName_1', 'Player': 'Player_1', 'GameID': 'GameID_1', 
                                                     'CurrentScore': 'CurrentScore_P1', 'GARDENS': 'GARDENS_P1',
                                                     'WITCH': 'WITCH_P1', 'CURSE': 'CURSE_P1','PROVINCE': 'PROVINCE_P1',
                                                     'DUCHY':'DUCHY_P1', 'ESTATE': 'ESTATE_P1', 'GOLD': 'GOLD_P1', 
                                                     'SILVER':'SILVER_P1', 'COPPER':'COPPER_P1'})
        #next add opponents name for future reference
        playerNo = (int)(df_1['Player_1'].unique()[0])
        OpponentNo = 0
        if playerNo == 0:
            OpponentNo = 1
        else:
            OpponentNo = 0   
        df_1['Opponent_1'] = [OpponentNo] * len(df_1['Player_1']) 
        df_1['OpponentName_1'] = df_1.apply(lambda df: assignName(df['GameID_1'], df['Opponent_1'], noGamesPerMatchUp), axis = 1)
        
        #get data for key 2
        df_B = extended_traces[(extended_traces['GameID'] == game2) & (extended_traces['PlayerName'] == player2)]
        df_2 = df_B.rename(columns={'PlayerName' : 'PlayerName_2', 'Player': 'Player_2', 'GameID': 'GameID_2', 
                                                     'CurrentScore': 'CurrentScore_P2', 'GARDENS': 'GARDENS_P2',
                                                     'WITCH': 'WITCH_P2', 'CURSE': 'CURSE_P2','PROVINCE': 'PROVINCE_P2',
                                                     'DUCHY':'DUCHY_P2', 'ESTATE': 'ESTATE_P2', 'GOLD': 'GOLD_P2', 
                                                     'SILVER':'SILVER_P2', 'COPPER':'COPPER_P2'})
        
        #add opponents name again
        playerNo = (int)(df_2['Player_2'].unique()[0])
        OpponentNo = 0
        if playerNo == 0:
            OpponentNo = 1
        else:
            OpponentNo = 0   
        df_2['Opponent_2'] = [OpponentNo] * len(df_2['Player_2']) 
        df_2['OpponentName_2'] = df_2.apply(lambda df: assignName(df['GameID_2'], df['Opponent_2'], noGamesPerMatchUp), axis = 1)
        
        #next align play traces by linking the common round column
        df_3 = pd.merge(df_1, df_2, on = 'Round')
        
        #then add to list of dfs
        list_dfs.append(df_3)
        
#finally we need to append togther all the datafarmes in our list
sym_trace_combos = pd.concat(list_dfs, ignore_index=True)

In [None]:
#the no of rows in our symmetric trace combos should be given by noOfPairwiseDistances * maxNoRounds
print(noOfPairWiseDistances * maxNoOfRounds)
sym_trace_combos.shape

In [None]:
#next step is to calculate the distance between the decks at each round for each given pair of play traces
def deck_dist_measure(playTracePairForSingleRound, cardTypes):
    dist = 0
    for card in cardTypes:
        col1 = card + "_1"
        col2 = card + "_2"
        dist += (playTracePairForSingleRound[col1]-playTracePairForSingleRound[col2])**2
    dist = math.sqrt(dist)/math.sqrt(len(cardTypes))
    return dist

In [None]:
#next we need to define the distance measure between our trace vectors
def trace_dist_measure(playTracePairForAllRounds, maxRounds):
    return playTracePairForAllRounds['dist'].sum()/maxRounds

In [None]:
#now compute all the pairwise trace distances
pairwiseTraceDistances = sym_trace_combos[['GameID_1','GameID_2','PlayerName_1','PlayerName_2','dist']]
pairwiseTraceDistances = pairwiseTraceDistances.groupby(['GameID_1','GameID_2','PlayerName_1','PlayerName_2'])['dist'].sum()
df_pairwiseTraceDistances = pairwiseTraceDistances.reset_index()

In [None]:
print(df_pairwiseTraceDistances)

In [None]:
#next look at average distances between similar and different strategies as an initial sense check
BMBM = df_pairwiseTraceDistances['dist'][(df_pairwiseTraceDistances['PlayerName_1'] == 'BigMoneyWithGardens') &
                                        (df_pairwiseTraceDistances['PlayerName_2'] == 'BigMoneyWithGardens')]
BMDW = df_pairwiseTraceDistances['dist'][(df_pairwiseTraceDistances['PlayerName_1'] == 'BigMoneyWithGardens') &
                                        (df_pairwiseTraceDistances['PlayerName_2'] == 'DoubleWitch') |
                                        (df_pairwiseTraceDistances['PlayerName_1'] == 'DoubleWitch') &
                                        (df_pairwiseTraceDistances['PlayerName_2'] == 'BigMoneyWithGardens')]
DWDW = df_pairwiseTraceDistances['dist'][(df_pairwiseTraceDistances['PlayerName_1'] == 'DoubleWitch') &
                                        (df_pairwiseTraceDistances['PlayerName_2'] == 'DoubleWitch')]
print(len(BMBM) + len(BMDW) + len(DWDW))
print("Mean BMBM: " + str(round(BMBM.mean(),2)))
print("Mean DWDW: " + str(round(DWDW.mean(),2)))
print("Mean BMDW: " + str(round(BMDW.mean(),2)))
print("Std BMBM: " + str(round(BMBM.std(),2)))
print("Std DWDW: " + str(round(DWDW.std(),2)))
print("Std BMDW: " + str(round(BMDW.std(),2)))

In [None]:
#plot distances along the real line and colour coded
plt.scatter(BMBM, [0] * BMBM.shape[0], s = 2, label = 'Big Money With Gardens - Big Money With Gardens')
plt.scatter(DWDW, [0.01] * DWDW.shape[0], s = 2, label = 'Double Witch - Double Witch')
plt.scatter(BMDW, [0.02] * BMDW.shape[0], s = 2, label = 'Big Money With Gardens - Double Witch')
plt.yticks([])
plt.xlabel('Pairwise distances')
plt.legend(loc = (1.1,0.77))

In [None]:
#compute again our pairwise traces but this time also breakdown by opponent type
pairTraceDistsGranular = sym_trace_combos[['GameID_1','GameID_2','PlayerName_1', 'OpponentName_1', 'PlayerName_2', 'OpponentName_2','dist']]
pairTraceDistsGranular = sym_trace_combos.groupby(['GameID_1','GameID_2','PlayerName_1', 'OpponentName_1', 'PlayerName_2', 'OpponentName_2'])['dist'].sum()
df_pairTraceDistsGranular = pairTraceDistsGranular.reset_index()
print(df_pairTraceDistsGranular)

In [None]:
results_means = pairTraceDistsGranular.groupby(['PlayerName_1', 'OpponentName_1', 'PlayerName_2', 'OpponentName_2']).mean()
print(results_means)
results_std = pairTraceDistsGranular.groupby(['PlayerName_1', 'OpponentName_1', 'PlayerName_2', 'OpponentName_2']).std()
print(results_std)

In [None]:
#what is happening with the large distance value for BM vs BM? i.e. why the large amount of variance?
#look at piarwise traces for smallest and largest distance values
BMBM_data = df_pairwiseTraceDistances[(df_pairwiseTraceDistances['PlayerName_1'] == 'BigMoneyWithGardens') &
                                        (df_pairwiseTraceDistances['PlayerName_2'] == 'BigMoneyWithGardens')]

smallest_pairwise = BMBM_data.loc[BMBM_data['dist'].idxmin()]
largest_pairwise = BMBM_data.loc[BMBM_data['dist'].idxmax()]
print(smallest_pairwise)
print(largest_pairwise)

In [None]:
def deck_evol_comparison(pairwise_deck_evols, cardTypes):
    #look at evolution of number of cards of each type per round
    noCardTypes = len(cardTypes)
    noOfSubplotsInXDirection = 3
    noOfSubplotsInYDirection = 3
    fig, axs = plt.subplots(noOfSubplotsInXDirection, noOfSubplotsInYDirection)
    for i in range(0,3):
        for j in range(0,3):
            cardIndex = 3*j + i
            cardType = cardTypes[cardIndex]
            card_col_1 = cardType + "_1"
            card_col_2 = cardType + "_2"
            axs[i,j].plot(pairwise_deck_evols['Round'], pairwise_deck_evols[card_col_1], label = 'Player_1')
            axs[i,j].plot(pairwise_deck_evols['Round'], pairwise_deck_evols[card_col_2], label = 'Player_2')
            axs[i,j].set_title(cardType)
            axs[i,j].set_xlabel('Round')
            fig.tight_layout() 

In [None]:
#need to look at deck evolutions for each of these cases
largest_pairwise_decks =  sym_trace_combos[(sym_trace_combos['GameID_1'] == largest_pairwise['GameID_1']) 
                                           & (sym_trace_combos['GameID_2'] == largest_pairwise['GameID_2']) 
                                           & (sym_trace_combos['PlayerName_1'] == largest_pairwise['PlayerName_1']) 
                                           & (sym_trace_combos['PlayerName_2'] == largest_pairwise['PlayerName_2'])]
smallest_pairwise_decks =  sym_trace_combos[(sym_trace_combos['GameID_1'] == smallest_pairwise['GameID_1']) 
                                           & (sym_trace_combos['GameID_2'] == smallest_pairwise['GameID_2']) 
                                           & (sym_trace_combos['PlayerName_1'] == smallest_pairwise['PlayerName_1']) 
                                           & (sym_trace_combos['PlayerName_2'] == smallest_pairwise['PlayerName_2'])]
print(largest_pairwise_decks['dist'].sum())
print(smallest_pairwise_decks['dist'].sum())

In [None]:
deck_evol_comparison(largest_pairwise_decks, card_types_BigMoneyWithGardens_vs_DoubleWitch)

In [None]:
deck_evol_comparison(smallest_pairwise_decks, card_types_BigMoneyWithGardens_vs_DoubleWitch)

In [None]:
#note similarity metric is getting artifically inflated by games that last longer? All games last 24 turns, but if
#difference in deck is quite large and game ends at round 20,this distance is copied for each of the remaining rounds?
#compare samllest and largest differences to see what is driving this.
#do games end quicker when the deck difference is bigger? Which then inflates the distance metric?