# The World Map of Chess.com

In [5]:
import re
import numpy as np
import pandas as pd
from datetime import datetime
import folium
from folium.plugins import MarkerCluster
from geopy.geocoders import Nominatim
import plotly.express as px
import plotly as plty
from folium.features import DivIcon
from folium.plugins import FastMarkerCluster

## Dataset: Chess.com

In [1]:
def read_chess_dataset(fname):
    df = pd.read_csv(fname, index_col=0)
    df = df.infer_objects()
    #Cleanup data
#     df.drop_duplicates(subset=['name'],keep='last',inplace=True)
#     df.dropna(subset=['name'],inplace=True)
    return df

In [7]:
def rename_country(df, country, country_by):
    idx_us = df[df['country_name'] == country].index.tolist()
    df.loc[idx_us,'country_name'] = country_by
    return df

In [8]:
def get_chess_countries(df):
    chess_countries = pd.DataFrame({'country_name':df['country_name'].unique()})
    print ('Total %d countries' % len(chess_countries))
    return chess_countries

In [9]:
def get_countries_count(df):
    return df[['country','country_name','title']].value_counts().reset_index(name='nPlayers')

## Chess.com Countries

In [10]:
def plot_chess_countries_info(df, x_col, y_col, chess_title, color_col='', orient='h'):
    ncountry = len(df['country_name'].unique().tolist())
    nplayers = df['nPlayers'].sum()
    ntitle = '%d %s from %d Countries @ Chess.com'%(nplayers, chess_title, ncountry) 
    if not color_col: color_col=x_col
    fig = px.bar(df, x=x_col, y=y_col, orientation=orient,
             color=color_col,
             barmode = 'stack', #'group'
             title=ntitle,
             height=700)
    fig.update_layout(xaxis_tickangle=-45, xaxis_title='')
    fig.show()
#     plty.offline.plot(fig,filename='%s.html'%chess_title)

In [11]:
fname = './chess_datasets/chesscom_titled_players_02_12_2020.csv'
df_data = read_chess_dataset(fname) 
df_data = rename_country(df_data, 'United States', 'United States of America')
df_data.shape

(6961, 86)

In [12]:
df_women = get_countries_count(df_data[df_data['title'].isin(['WGM','WIM','WFM','WCM'])])
plot_chess_countries_info(df_women, 'country_name', 'nPlayers', 'Women', color_col='title', orient='v')

In [13]:
df_men = get_countries_count(df_data[df_data['title'].isin(['GM','IM','FM','CM'])])
plot_chess_countries_info(df_men, 'country_name', 'nPlayers', 'Men', color_col='title', orient='v')

In [14]:
df_gms = get_countries_count(df_data[df_data['title'].isin(['WGM','GM'])])
plot_chess_countries_info(df_gms, 'country_name', 'nPlayers', 'Grandmasters', color_col='title', orient='v')

## Geolocator

In [15]:
geolocator = Nominatim(user_agent="chess_countries")

In [16]:
def get_latitude_longitude(country):
    try:
        update_country = {'Georgia':'GE'}
        if country in ['Georgia']: country = update_country[country]
        loc = geolocator.geocode(country)
        return [loc.latitude, loc.longitude]
    except Exception as details:
        print('>> ERROR getting country:%s \n%s' % (country,details))
        return [np.nan,np.nan]

In [17]:
def get_geoloc_data(df):
    df['geoloc'] = df['country_name'].apply(lambda x: get_latitude_longitude(x))
    df['latitude'] = df['geoloc'].apply(lambda x: x[0])
    df['longitude'] = df['geoloc'].apply(lambda x: x[1])
    df.pop('geoloc')
    return df

In [18]:
def get_chess_titled_geoloc(df, title, geoinfo):
    df_geo = get_countries_count(df[df['title']==title])
    df_geo['latitude'] = df_geo['country_name'].apply(lambda x: geoinfo.loc[x,:][-2])
    df_geo['longitude'] = df_geo['country_name'].apply(lambda x: geoinfo.loc[x,:][-1])
    return df_geo

In [19]:
chess_countries = get_chess_countries(df_data)

Total 189 countries


In [20]:
fname = './chess_datasets/chesscom_countries_geoloc.csv'
# data_geoloc = get_geoloc_data(chess_countries)
# data_geoloc.to_csv(fname,index=False)
# data_geoloc.index = data_geoloc['country_name']
data_geoloc = pd.read_csv(fname,index_col=0) 

In [21]:
data_geoloc.head()

Unnamed: 0_level_0,latitude,longitude
country_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Vietnam,13.290403,108.426511
Greece,38.995368,21.987713
Kazakhstan,47.228609,65.20932
Netherlands,52.50017,5.748082
Armenia,40.769627,44.673665


## World Map

In [22]:
#Setting up the world countries data URL
url = 'https://raw.githubusercontent.com/python-visualization/folium/master/examples/data'
country_geodata = f'{url}/world-countries.json'

In [47]:
def create_chess_world_map(df, geoinfo, title, startloc=[]):
#     bins = list(df['nPlayers'].quantile([0, 0.25, 0.5, 0.75, 1]))
    if not startloc: startloc=[2.8894434, -73.783892]
    m = folium.Map(location=startloc,zoom_start=2) 
 
    for idx, row in df.iterrows():
        row_values = row.values # country, country_name, title, isStreamer, nPlayers, latitude, longitude
        folium.map.Marker(row_values[-2:],
        icon=DivIcon(html='<div style="font-size: 6pt;  color:black" >%d</div>' % row_values[-3],
                    )).add_to(m)
        
#         folium.CircleMarker(row_values[-2:],
#         radius=5,
#         popup='blabla',
#         color='green',
#         fill=True,
#         fill_color='green',
#         fill_opacity=0.7).add_to(m)
            
            
#     font-weight:bold;
    choropleth = folium.Choropleth(
        geo_data=geoinfo,
        name='World Chess %s'%title,
        data=df,
        columns=['country_name', 'nPlayers'],
        key_on='feature.properties.name',
        fill_color='YlGnBu', #'YlGn',#'GnBu',
        fill_opacity=0.7,
        line_opacity=0.3,
        nan_fill_color='lightgrey',
        legend_name='Number of %s'%title,
#         bins=bins,
        reset=True,
        highlight=True
    ).add_to(m)
    
    choropleth.geojson.add_child(
        folium.features.GeoJsonTooltip(['name'], labels=False)
    )
    
    
#     FastMarkerCluster(data=list(zip(df['latitude'].values, df['longitude'].values))).add_to(m)
#     folium.LayerControl().add_to(m) 

    return m

## Woman Grandmasters (WGM)

In [24]:
WGM_countries = get_chess_titled_geoloc(df_data,'WGM',data_geoloc)
print ('Total %d WGM from %d countries' % (WGM_countries['nPlayers'].sum(), len(WGM_countries)))

Total 154 WGM from 51 countries


In [25]:
WGM_countries.head(n=10)

Unnamed: 0,country,country_name,title,nPlayers,latitude,longitude
0,RU,Russia,WGM,24,64.686314,97.745306
1,US,United States of America,WGM,10,39.78373,-100.445882
2,UA,Ukraine,WGM,9,49.487197,31.271832
3,IN,India,WGM,7,22.351115,78.667743
4,GE,Georgia,WGM,6,41.680971,44.028738
5,AZ,Azerbaijan,WGM,6,40.393629,47.787251
6,PL,Poland,WGM,6,52.018991,19.195586
7,VN,Vietnam,WGM,5,13.290403,108.426511
8,DE,Germany,WGM,5,51.08342,10.423447
9,CN,China,WGM,5,35.000074,104.999927


In [26]:
WGM_map = create_chess_world_map(WGM_countries, country_geodata, 'Woman Grandmasters')
WGM_map

In [27]:
WGM_map.save('./plots/WGM_chess_worldmap.html') 

## Grandmasters (GM)

In [28]:
GM_countries = get_chess_titled_geoloc(df_data,'GM',data_geoloc)
print ('Total %d GM from %d countries' % (GM_countries['nPlayers'].sum(), len(GM_countries)))

Total 999 GM from 86 countries


In [29]:
GM_map = create_chess_world_map(GM_countries, country_geodata, 'Grandmasters')
GM_map

In [30]:
GM_map.save('./plots/GM_chess_worldmap.html')

# Chesskids

In [31]:
fname_kids = './chess_datasets/chesskid_players_02_12_2020.csv'
# df_data_kids = read_chess_dataset(fname_kids) 
df_data_kids = pd.read_csv(fname_kids)
df_data_kids = rename_country(df_data_kids, 'United States', 'United States of America')
df_data_kids.shape

(5807, 87)

In [32]:
kids_countries = get_chess_countries(df_data_kids)
kids_countries.shape

Total 172 countries


(172, 1)

In [33]:
fname = './chess_datasets/chesscom_countries_geoloc_chesskids.csv'
# kids_geoloc = get_geoloc_data(kids_countries)
# kids_geoloc.to_csv(fname,index=False)
# kids_geoloc.index = kids_geoloc['country_name']
kids_geoloc = pd.read_csv(fname,index_col=0) 

In [34]:
kids_geoloc.shape

(172, 2)

In [35]:
df_kids = df_data_kids[['country','country_name']].value_counts().reset_index(name='nPlayers')

In [36]:
# df_kids = get_countries_count(df_data_kids)
# plot_chess_countries_info(df_kids, 'country_name', 'nPlayers', 'Chess Kids', color_col='is_streamer', orient='v')

In [37]:
df_kids.shape

(171, 3)

In [38]:
df_kids.head()

Unnamed: 0,country,country_name,nPlayers
0,US,United States of America,1259
1,IN,India,545
2,RU,Russia,316
3,VN,Vietnam,215
4,CA,Canada,197


In [39]:
df_kids['latitude'] = df_kids['country_name'].apply(lambda x: kids_geoloc.loc[x,:][-2])
df_kids['longitude'] = df_kids['country_name'].apply(lambda x: kids_geoloc.loc[x,:][-1])
df_kids.head()

Unnamed: 0,country,country_name,nPlayers,latitude,longitude
0,US,United States of America,1259,39.78373,-100.445882
1,IN,India,545,22.351115,78.667743
2,RU,Russia,316,64.686314,97.745306
3,VN,Vietnam,215,13.290403,108.426511
4,CA,Canada,197,61.066692,-107.991707


In [40]:
df_kids_map = create_chess_world_map(df_kids, country_geodata, 'ChessKids')
df_kids_map

In [41]:
GM_map.save('./plots/ChessKids_worldmap.html')

In [48]:
df_kids_map = create_chess_world_map(df_kids, country_geodata, 'ChessKids_cicle')
df_kids_map