## Data Processing

### Reading in Engineered Data

In [1]:
# imports for processing

import pandas as pd
import numpy as np
from collections import Counter

In [2]:
scores = pd.read_csv('data/hearts_anon_processed.csv').drop(columns = "Unnamed: 0") # removing automatically generated index
scores.head()

Unnamed: 0,game_id,hand_id,player,received_cards_from,total_score,points_per_hand,percent_points_per_hand,queen_spades,moon_shooter,best_player_of_hand,best_player_of_game,game_winner
0,1,1,player_1,player_4,6,6.0,6.12,0,none,player_2,player_1,player_2
1,1,2,player_1,player_2,6,0.0,0.0,0,none,player_1,player_1,player_2
2,1,3,player_1,player_3,10,4.0,4.08,0,none,player_2,player_1,player_2
3,1,4,player_1,none,26,16.0,16.33,1,none,player_4,player_1,player_2
4,1,5,player_1,player_4,26,0.0,0.0,0,none,player_1,player_1,player_2


### Summary Stats About scores df

In [3]:
### Dictionary of the number of hands in each game

num_games = scores['game_id'].unique() # returns list of unique values of game_id column, which is the number of games played
player_list = scores.sort_values(by = "player", ascending = True)['player'].unique() # ordered list of player names
games_and_hands_dict = {} # dict with keys as game_id and values as total number of hands in each game

for game in num_games:

    hands_per_game = scores.query(f'game_id == {game}')['hand_id'].unique() # returns an array of ints from 1:max number of hands
    games_and_hands_dict[f'{game}'] = len(hands_per_game)

print (scores['received_cards_from'].unique()) # making sure there are only 4 players + "none" (and no spelling errors)
print (player_list) # making sure there are only 4 players (and no spelling errors)

games_and_hands_dict

['player_4' 'player_2' 'player_3' 'none' 'player_1']
['player_1' 'player_2' 'player_3' 'player_4']


{'1': 10,
 '2': 11,
 '3': 8,
 '4': 12,
 '5': 8,
 '6': 12,
 '7': 7,
 '8': 9,
 '9': 5,
 '10': 8,
 '11': 9,
 '12': 8,
 '13': 12,
 '14': 7}

In [8]:
# initialize blank df with same columns as "scores"
# this df consists of only the final hands of each game, so that only final scores are considered in these statistics
all_last_hands = pd.DataFrame(columns = scores.columns.tolist())

last_hands_list = []
for game, last_hand in games_and_hands_dict.items():
    last_hand_df = scores.query(f'game_id == {game} & hand_id == {last_hand}')
    last_hands_list.append(last_hand_df)
    all_last_hands = pd.concat(last_hands_list)

stats_dict = {} # dict of summary stats of each player in last hand 
player_score_mean = [] # list of player avg total scores at end of game
# player_score_std = [] # list of player std total scores at end of game
player_score_max = [] # list of player max total scores at end of game
player_score_min = [] # list of player min total scores at end of game
player_mean_ppg = [] # list of player avg percentages of points per hand

for player_name in player_list:
    player_mean_ppg.append(round(all_last_hands.query(f'player == "{player_name}"')['percent_points_per_hand'].mean(), 2))
    # player_score_std.append(round(all_last_hands.query(f'player == "{player_name}"')['total_score'].std(), 2))
    player_score_max.append(round(all_last_hands.query(f'player == "{player_name}"')['total_score'].max(), 2))
    player_score_min.append(round(all_last_hands.query(f'player == "{player_name}"')['total_score'].min(), 2))
    player_score_mean.append(round(all_last_hands.query(f'player == "{player_name}"')['total_score'].mean(), 2))

# stats_dict['Player'] = player_list
stats_dict['Player'] = player_list
stats_dict['Highest Final Score'] = player_score_max
stats_dict['Lowest Final Score'] = player_score_min
stats_dict['Average Final Score'] = player_score_mean
# stats_dict['Final Score Standard Deviation'] = player_score_std
stats_dict['Average Percentage of Points Per Hand'] = player_mean_ppg

stats_df = pd.DataFrame(stats_dict)
stats_df
stats_df.sort_values(by = 'Lowest Final Score', ascending = True)

Unnamed: 0,Player,Highest Final Score,Lowest Final Score,Average Final Score,Average Percentage of Points Per Hand
3,player_4,121,15,76.71,10.36
0,player_1,121,22,79.71,18.86
1,player_2,117,31,67.57,12.05
2,player_3,109,36,68.64,9.51


### Average Points When Passed From x

In [5]:
passed_points_dict = {}
points_from_p1 = []
points_from_p2 = []
points_from_p3 = []
points_from_p4 = []

for player in player_list:
    points_from_p1.append(scores.query(f'player == "player_1" & received_cards_from == "{player}"')['points_per_hand'].mean())
    points_from_p2.append(scores.query(f'player == "player_2" & received_cards_from == "{player}"')['points_per_hand'].mean())
    points_from_p3.append(scores.query(f'player == "player_3" & received_cards_from == "{player}"')['points_per_hand'].mean())
    points_from_p4.append(scores.query(f'player == "player_4" & received_cards_from == "{player}"')['points_per_hand'].mean())

passed_points_dict['Player'] = player_list
passed_points_dict['Avg Points from Player 1'] = points_from_p1
passed_points_dict['Avg Points from Player 2'] = points_from_p2
passed_points_dict['Avg Points from Player 3'] = points_from_p3
passed_points_dict['Avg Points from Player 4'] = points_from_p4

passed_points_df = pd.DataFrame(passed_points_dict).fillna(0)
passed_points_df.round(2)

Unnamed: 0,Player,Avg Points from Player 1,Avg Points from Player 2,Avg Points from Player 3,Avg Points from Player 4
0,player_1,0.0,8.77,6.81,9.12
1,player_2,6.75,0.0,7.74,10.23
2,player_3,10.81,7.22,0.0,8.69
3,player_4,6.29,8.03,8.19,0.0
