In [11]:
import os
import pandas as pd

### Combine Titled Tuesday data 

In [12]:
folder_path = '../output/Chess.com/Titled Tuesday Pairings'

df_list = []
len_files = [] 

for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        temp_df = pd.read_csv(os.path.join(folder_path, filename))

        temp_df['filename'] = filename.split('.')[0]
        temp_df['date'] = temp_df['filename'].str.extract(r'([a-zA-Z]+-\d{2}-\d{4})')
        
        temp_df['day'] = temp_df['date'].str.extract(r'(\d{2})')
        temp_df['month'] = temp_df['date'].str.extract(r'([a-zA-Z]+)')
        temp_df['year'] = temp_df['date'].str.extract(r'(\d{4})')

        temp_df['white_result'] = temp_df['result'].str.split(' - ').str[0]
        temp_df['black_result'] = temp_df['result'].str.split(' - ').str[1]
        
        temp_df['type'] = temp_df['filename'].str.extract(r'^(early|late)')
        len_files.append(len(temp_df))
        df_list.append(temp_df)

final_df = pd.concat(df_list, ignore_index=True).drop_duplicates()

In [13]:
len(final_df)

41250

In [14]:
final_df.to_csv('../output/Chess.com/final_df.csv', index=False)

### Summary Stats

In [15]:
len(set(final_df.white_username) and set(final_df.black_username))

2233

In [16]:
username_rank = dict(zip(final_df.white_username, final_df.white_rank))
username_rank.update(dict(zip(final_df.black_username, final_df.black_rank)))

In [17]:
white_wins = final_df[final_df['white_result'] == '1'].groupby('white_username').size()
black_wins = final_df[final_df['black_result'] == '1'].groupby('black_username').size()

win_counts = pd.concat([white_wins, black_wins], axis=1, keys=['white_wins', 'black_wins']).fillna(0)
win_counts['total_wins'] = win_counts['white_wins'] + win_counts['black_wins']
win_counts = win_counts.reset_index().rename(columns={'index': 'username'})

display(win_counts)

Unnamed: 0,username,white_wins,black_wins,total_wins
0,0gZPanda,32.0,27.0,59.0
1,13MiRacLe,1.0,0.0,1.0
2,1800_strength,5.0,5.0,10.0
3,1977Ivan,2.0,1.0,3.0
4,1mbl4,2.0,0.0,2.0
...,...,...,...,...
1360,x-2509604246,0.0,1.0,1.0
1361,x-9616587394,0.0,2.0,2.0
1362,ylr5000,0.0,1.0,1.0
1363,zajka-molotok,0.0,1.0,1.0


In [18]:
win_counts.sort_values('total_wins', ascending=False)

Unnamed: 0,username,white_wins,black_wins,total_wins
328,Hikaru,479.0,414.0,893.0
376,Jospem,451.0,396.0,847.0
233,FairChess_on_YouTube,444.0,373.0,817.0
545,Oleksandr_Bortnyk,415.0,369.0,784.0
1000,mishanick,423.0,343.0,766.0
...,...,...,...,...
839,chessajedrezz2020,1.0,0.0,1.0
838,chessS1r,1.0,0.0,1.0
118,Budisavljevic,1.0,0.0,1.0
831,caropawn10,1.0,0.0,1.0


In [19]:
win_counts = pd.concat([white_wins, black_wins], axis=1, keys=['white_wins', 'black_wins']).fillna(0)

# Sum the wins from both white and black perspectives
win_counts['total_wins'] = win_counts['white_wins'] + win_counts['black_wins']

# Count total games played by each player as white and black
white_games = final_df.groupby('white_username').size()
black_games = final_df.groupby('black_username').size()

# Combine the total games counts into one DataFrame
total_games = pd.concat([white_games, black_games], axis=1, keys=['white_games', 'black_games']).fillna(0)

# Sum the total games from both white and black perspectives
total_games['total_games'] = total_games['white_games'] + total_games['black_games']

# Combine win counts and total games into one DataFrame
stats = pd.concat([win_counts['total_wins'], total_games['total_games']], axis=1)

# Calculate the proportion of wins
stats['win_proportion'] = stats['total_wins'] / stats['total_games']

# Reset the index to make the usernames a column
stats = stats.reset_index().rename(columns={'index': 'username'})

stats.shape

(2564, 4)

In [20]:
stats.to_csv('../output/Chess.com/player_stats.csv', index=False)