In [75]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from pprint import pprint
import scipy.stats as st
import os.path
from os import path
import json

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_players_Titled = "output_data/players_Titled.csv"

country_filepath = "resources/countries.csv"

titled_players_type = ["GM","WGM","IM","WIM","FM","WFM","NM","WNM","CM","WCM"]

# Range of latitudes and longitudes
# lat_range = (-90, 90)
# lng_range = (-180, 180)

In [2]:
# function get usernames by country, pass country code and output file
# returns list of usernames for specific country and saves output file to csv
def get_usernames_country(country, output_file):
    try: 
        # ***********************************
        #
        # LIMIT run once per 24 hours to get
        # username list by country
        #
        # ***********************************

        # country code # countries list resource https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
        # country = "RU" # RUSSIA = RU

        query_url = f"https://api.chess.com/pub/country/{country}/players"

        print(f"Fetching player list for country {country}")

        
        # Get player usernames for country
        username_response = requests.get(query_url)
        username_json = username_response.json()        
        # username_json
        
        usernames = []
        for username in username_json["players"]:
            usernames.append(username)

        player_dict = {    
            "username":usernames
        }
        chessplayers_df = pd.DataFrame(player_dict)
        chessplayers_df.to_csv(output_file, index=True,index_label="player_index")          
        
        return chessplayers_df
        
        
    except: 
        print("Unexpected error.")

In [11]:
country_df = pd.read_csv(country_filepath)
country_df

Unnamed: 0,Code,Country,File Path
0,AD,Andorra,players_AD.csv
1,AE,United Arab Emirates,players_AE.csv
2,AF,Afghanistan,players_AF.csv
3,AG,Antigua and Barbuda,players_AG.csv
4,AI,Anguilla,players_AI.csv
...,...,...,...
244,YE,Yemen,
245,YT,Mayotte,
246,ZA,South Africa,
247,ZM,Zambia,


In [4]:
def get_titledplayers_api(playertypes):
    url = "https://api.chess.com/pub/titled/"
    
    i = 0        
    players_data = []
    
    for playertype in playertypes:
        print(f"Index {str(i)} fetching player list for titled player type {playertype}")
    
        # Build query URL
        query_url = f"{url}{playertype}"

        # increment index counter
        i += 1

        # Get weather data    
        playertype_response = requests.get(query_url)
        playertype_json = playertype_response.json()
        players_data.append(playertype_json)        
        time.sleep(1)    
        
    return players_data

In [5]:
def get_playerdata_api(player_usernames):
    url = "https://api.chess.com/pub/player/"
    
    i = 0        
    player_data = []
    
    for player_username in player_usernames:
        print(f"Index {str(i)} fetching player data for player {player_username}")
    
        # Build query URL
        query_url = f"{url}{player_username}"

        # increment index counter
        i += 1

        # Get player data    
        try:
            player_response = requests.get(query_url)
            player_json = player_response.json()
            player_data.append(player_json)        
            time.sleep(1)    
        except:
            print("unexpected error")
        
        # if i > 10:
        #    break
    return player_data

In [69]:
def get_player_dataframe(playerdata):                
    player_id = []
    player_username = []
    title = []
    followers = []
    country = []
    location = []
    last_online = []
    joined = []
    status = []
    is_streamer = []

    i = 0

    for player in playerdata: 
        try:            
            username = player["username"]
            print(f"Index {i} processing username {username}")
            player_id.append(player["player_id"])
            player_username.append(username)
            if "title" in player.keys():
                title.append(player["title"])
            else:
                title.append("")          
            followers.append(player["followers"])        
            country.append(player["country"].replace("https://api.chess.com/pub/country/","")),
            if "location" in player.keys():
                location.append(player["location"])        
            else:
                location.append("")                
            last_online.append(player["last_online"])
            joined.append(player["joined"])
            status.append(player["status"])
            if "is_streamer" in player.keys():
                is_streamer.append(player["is_streamer"])                       
            else:     
                is_streamer.append("")
        except:
            print("unexpected error")

        i += 1 

    player_dict = {
        "player_id":player_id,
        "username":player_username,
        "title":title,
        "followers":followers,
        "country":country,
        "location":location,
        "last_online":last_online,
        "joined":joined,
        "status":status,
        "is_streamer":is_streamer
    }
    chessplayer_details_df = pd.DataFrame(player_dict)

    # chessplayer_df.to_csv(output_data_players, index=True,index_label="player_index") 
    
    return chessplayer_details_df

In [7]:
for index, row in country_df.iterrows():        
    try:          
        country_name = row["Country"]
        country_code = row["Code"]
        print(f"Fetching usernames for {country_name} ({country_code})")
        output_file_country = f"output_data/players_{country_code}.csv"
        
        get_usernames_country(country_code,output_file_country)
        time.sleep(1)  
        
    except: 
        print("unepxected error/skipping country")

Fetching usernames for Andorra (AD)
Fetching player list for country AD
Fetching usernames for United Arab Emirates (AE)
Fetching player list for country AE
Fetching usernames for Afghanistan (AF)
Fetching player list for country AF
Fetching usernames for Antigua and Barbuda (AG)
Fetching player list for country AG
Fetching usernames for Anguilla (AI)
Fetching player list for country AI
Fetching usernames for Albania (AL)
Fetching player list for country AL
Fetching usernames for Armenia (AM)
Fetching player list for country AM
Fetching usernames for Angola (AO)
Fetching player list for country AO
Fetching usernames for Antarctica (AQ)
Fetching player list for country AQ
Unexpected error.
Fetching usernames for Argentina (AR)
Fetching player list for country AR
Fetching usernames for American Samoa (AS)
Fetching player list for country AS
Fetching usernames for Austria (AT)
Fetching player list for country AT
Fetching usernames for Australia (AU)
Fetching player list for country AU
Fet

Unexpected error.
Fetching usernames for Iraq (IQ)
Fetching player list for country IQ
Fetching usernames for Iran (Islamic Republic of) (IR)
Fetching player list for country IR
Fetching usernames for Iceland (IS)
Fetching player list for country IS
Fetching usernames for Italy (IT)
Fetching player list for country IT
Fetching usernames for Jersey (JE)
Fetching player list for country JE
Fetching usernames for Jamaica (JM)
Fetching player list for country JM
Fetching usernames for Jordan (JO)
Fetching player list for country JO
Fetching usernames for Japan (JP)
Fetching player list for country JP
Fetching usernames for Kenya (KE)
Fetching player list for country KE
Fetching usernames for Kyrgyzstan (KG)
Fetching player list for country KG
Fetching usernames for Cambodia (KH)
Fetching player list for country KH
Fetching usernames for Kiribati (KI)
Fetching player list for country KI
Fetching usernames for Comoros (KM)
Fetching player list for country KM
Fetching usernames for Saint Kitt

Fetching usernames for Eswatini (SZ)
Fetching player list for country SZ
Fetching usernames for Turks and Caicos Islands (TC)
Fetching player list for country TC
Fetching usernames for Chad (TD)
Fetching player list for country TD
Fetching usernames for French Southern Territories (TF)
Fetching player list for country TF
Unexpected error.
Fetching usernames for Togo (TG)
Fetching player list for country TG
Fetching usernames for Thailand (TH)
Fetching player list for country TH
Fetching usernames for Tajikistan (TJ)
Fetching player list for country TJ
Fetching usernames for Tokelau (TK)
Fetching player list for country TK
Unexpected error.
Fetching usernames for Timor-Leste (TL)
Fetching player list for country TL
Fetching usernames for Turkmenistan (TM)
Fetching player list for country TM
Fetching usernames for Tunisia (TN)
Fetching player list for country TN
Fetching usernames for Tonga (TO)
Fetching player list for country TO
Fetching usernames for Turkey (TR)
Fetching player list f

In [None]:
usernames_ = get_usernames_country(country_code,output_file_country)

In [None]:
playerlist_index = 0
i = 0

player_usernames = []

for playerlist in playerslist_data:    
    for player in playerlist["players"]:        
        player_usernames.append(player)
        
    playerlist_index += 1

In [None]:
playerslist_data = get_titledplayers_api(titled_players_type)

```{
  "avatar": "https://images.chesscomfiles.com/uploads/v1/user/14775486.9072453d.200x200o.50bb2d10a7d5.jpeg",
  "player_id": 14775486,
  "@id": "https://api.chess.com/pub/player/140mariam",
  "url": "https://www.chess.com/member/140mariam",
  "name": "Mariam Avetisyan",
  "username": "140mariam",
  "title": "WFM",
  "followers": 56,
  "country": "https://api.chess.com/pub/country/AM",
  "last_online": 1609780423,
  "joined": 1385829847,
  "status": "premium",
  "is_streamer": false
}
```

In [None]:
# check if output_data_file exists already, if it doesn't exist, do api call to get weather data
if (path.exists(output_data_players)):
    # ****************
    # pull prevously generated and saved output data from csv
    chessplayer_df = pd.read_csv(output_data_players)
else:    
    # run api calls for each chess.com username
    # this step is skipped if the output_data_file already exists
    # !!!!! DELETE CSV OUTPUT FILE to generate new playerdata    
    playerdata = get_playerdata_api(player_usernames)
    
    
    
    

In [20]:
chessplayers_country_df

Unnamed: 0,player_index,username
0,0,0bladi0blada
1,1,0lychess
2,2,11madonnaimpestata11
3,3,1steef
4,4,2good4uspn
...,...,...
763,763,zezoup
764,764,ziplla
765,765,zodby
766,766,zork1975


In [70]:
## Get sample of users from csv file of one country
country = "AD"
players_country_file = f"output_data/players_{country}.csv"

chessplayers_country_df = pd.read_csv(players_country_file)
chessplayers_count = len(chessplayers_country_df)    
sample_count = int(round(chessplayers_count * .15,0))
#
print(f"{sample_count} is the sample size (15%) of {country} population ({chessplayers_count})\n")
data = np.random.randint(0, chessplayers_count, size=sample_count)

sample_country_usernames = []
for index in data:    
    username = chessplayers_country_df.iloc[index]["username"]    
    sample_country_usernames.append(username)
    

playerdata = get_playerdata_api(sample_country_usernames)



115 is the sample size (15%) of AD population (768)

Index 0 fetching player data for player mtorresad100
Index 1 fetching player data for player rusacat
Index 2 fetching player data for player urisking
Index 3 fetching player data for player corpusdelictis
Index 4 fetching player data for player aaronquerque
Index 5 fetching player data for player buttcheekzz
Index 6 fetching player data for player manolobs
Index 7 fetching player data for player lars_kent
Index 8 fetching player data for player llorencdurgell
Index 9 fetching player data for player xogori
Index 10 fetching player data for player gjosep657
Index 11 fetching player data for player danimaia
Index 12 fetching player data for player picapedrero48
Index 13 fetching player data for player ericrocha
Index 14 fetching player data for player horuszk
Index 15 fetching player data for player pastalle
Index 16 fetching player data for player andreyb01
Index 17 fetching player data for player oliugamer
Index 18 fetching player dat

In [71]:
player_AD_df = get_player_dataframe(playerdata)

Index 0 processing username mtorresad100
Index 1 processing username rusacat
Index 2 processing username urisking
Index 3 processing username corpusdelictis
Index 4 processing username aaronquerque
Index 5 processing username buttcheekzz
Index 6 processing username manolobs
Index 7 processing username lars_kent
Index 8 processing username llorencdurgell
Index 9 processing username xogori
Index 10 processing username gjosep657
Index 11 processing username danimaia
Index 12 processing username picapedrero48
Index 13 processing username ericrocha
Index 14 processing username horuszk
Index 15 processing username pastalle
Index 16 processing username andreyb01
Index 17 processing username oliugamer
Index 18 processing username freddy402
Index 19 processing username marcomart
Index 20 processing username noveoelhake
Index 21 processing username bplebowski
Index 22 processing username tomaigle
Index 23 processing username laiens
Index 24 processing username eoge
Index 25 processing username p

In [73]:
player_AD_df

Unnamed: 0,player_id,username,title,followers,country,location,last_online,joined,status,is_streamer
0,99881252,mtorresad100,,4,AD,,1610219534,1606478661,basic,False
1,74828540,rusacat,,0,AD,,1610127386,1585412935,basic,False
2,107507840,urisking,,1,AD,,1610197204,1609074729,basic,False
3,27872816,corpusdelictis,,0,AD,andorra,1610188171,1461398448,basic,False
4,52115428,aaronquerque,,2,AD,,1610224667,1542738728,basic,False
...,...,...,...,...,...,...,...,...,...,...
110,44431136,mikipa14,,3,AD,,1610211516,1521471164,basic,False
111,90562784,llforgoll,,20,AD,,1610218005,1599246924,basic,False
112,102771138,raysnalp,,1,AD,,1610202756,1607442428,basic,False
113,98630956,xeliptik,,7,AD,,1610123919,1606073011,basic,False


In [74]:
output_data_players_AD = 'output_data/player_details_AD.csv'
player_AD_df.to_csv(output_data_players_AD, index=True,index_label="player_index") 