In [1]:
from pandas import (
    read_csv,
    DataFrame,
    merge,
)
from numpy import (
    std,
    nan,
)

In [2]:
df = read_csv(
    'ratings_fide_november_2020.csv',
    low_memory=False,
    encoding='latin-1',
)

In [3]:
"""
Check 'descriptions.txt' for details on the fields
Field names do not exactly match descriptions on the ratings page in FIDE site.
"""
df.head()

Unnamed: 0,fideid,name,country,sex,title,w_title,o_title,foa_title,rating,games,k,rapid_rating,rapid_games,rapid_k,blitz_rating,blitz_games,blitz_k,birthday,flag
0,10688862,"A Abdel Maabod, Hoda",EGY,F,,,,,,,,,,,,,,2009.0,w
1,10224084,"A B M Hasibuzzaman, Tapan",BAN,M,,,,,,,,,,,,,,1977.0,
2,10245154,"A B M Jobair, Hossain",BAN,M,,,,,,,,1599.0,0.0,20.0,,,,1998.0,
3,10243054,"A B M Mustakim, Chowdhury",BAN,M,,,,,,,,,,,,,,2013.0,
4,25121731,A C J John,IND,M,,,,,1063.0,0.0,40.0,,,,,,,1987.0,


In [4]:
df.shape

(983184, 19)

In [5]:
df['sex'].value_counts()

M    837331
F    145853
Name: sex, dtype: int64

In [6]:
"""
Drop players who have no rating
"""
df = df.dropna(
    subset=[
        'rating',
    ],
)
df.shape

(364307, 19)

In [7]:
df.head()

Unnamed: 0,fideid,name,country,sex,title,w_title,o_title,foa_title,rating,games,k,rapid_rating,rapid_games,rapid_k,blitz_rating,blitz_games,blitz_k,birthday,flag
4,25121731,A C J John,IND,M,,,,,1063.0,0.0,40.0,,,,,,,1987.0,
5,35077023,A Chakravarthy,IND,M,,,,,1151.0,0.0,40.0,,,,,,,1986.0,i
6,10207538,"A E M, Doshtagir",BAN,M,,,,,1840.0,0.0,40.0,1836.0,0.0,20.0,1860.0,0.0,20.0,1974.0,i
10,10680810,"A hamed Ashraf, Abdallah",EGY,M,,,,,1728.0,0.0,40.0,,,,1741.0,0.0,20.0,2001.0,
11,5716365,"A Hamid, Harman",MAS,M,,,NI,,1325.0,0.0,40.0,1593.0,0.0,20.0,,,,1970.0,i


In [8]:
df['sex'].value_counts()

M    325429
F     38878
Name: sex, dtype: int64

In [9]:
"""
Drop players who are inactive
"""
df = df[df['flag'].isnull()]
df.shape

(175840, 19)

In [10]:
df.head()

Unnamed: 0,fideid,name,country,sex,title,w_title,o_title,foa_title,rating,games,k,rapid_rating,rapid_games,rapid_k,blitz_rating,blitz_games,blitz_k,birthday,flag
4,25121731,A C J John,IND,M,,,,,1063.0,0.0,40.0,,,,,,,1987.0,
10,10680810,"A hamed Ashraf, Abdallah",EGY,M,,,,,1728.0,0.0,40.0,,,,1741.0,0.0,20.0,2001.0,
17,10206612,"A K M, Sourab",BAN,M,,,,,1598.0,0.0,20.0,,,,,,,,
18,5045886,"A K, Kalshyan",IND,M,,,,,1777.0,0.0,20.0,,,,,,,1964.0,
29,5716373,"A Rajah, Abdul Shukor",MAS,M,,,,,1847.0,0.0,40.0,2015.0,0.0,20.0,1873.0,0.0,20.0,,


In [11]:
df['sex'].value_counts()

M    175840
Name: sex, dtype: int64

In [12]:
"""
Sort the rows by country
"""
df = df.sort_values(
    by=[
        'country',
    ],
)

In [13]:
df.head()

Unnamed: 0,fideid,name,country,sex,title,w_title,o_title,foa_title,rating,games,k,rapid_rating,rapid_games,rapid_k,blitz_rating,blitz_games,blitz_k,birthday,flag
781825,11700580,"Sarwari, Hamidullah",AFG,M,,,,,1863.0,0.0,20.0,1840.0,0.0,20.0,1829.0,0.0,20.0,1989.0,
338981,11700718,"Habibullah, Guldoost",AFG,M,,,,,1601.0,0.0,20.0,,,,,,,,
580084,11701552,"Mohtaat, Homayoun",AFG,M,,,,AIM,1791.0,8.0,20.0,1680.0,0.0,20.0,1796.0,0.0,20.0,1972.0,
811140,11701064,Shir Ahmad Wahab Zadah,AFG,M,,,,,1292.0,0.0,40.0,1372.0,0.0,20.0,1389.0,0.0,20.0,1976.0,
965715,11702010,"Yusuf, Abdul Majid",AFG,M,,,,,1137.0,0.0,40.0,,,,,,,1958.0,


In [14]:
"""
Take players who has at least one rated game
"""
df = df[df['games'] > 0]
df.shape

(15850, 19)

In [15]:
df.head()

Unnamed: 0,fideid,name,country,sex,title,w_title,o_title,foa_title,rating,games,k,rapid_rating,rapid_games,rapid_k,blitz_rating,blitz_games,blitz_k,birthday,flag
580084,11701552,"Mohtaat, Homayoun",AFG,M,,,,AIM,1791.0,8.0,20.0,1680.0,0.0,20.0,1796.0,0.0,20.0,1972.0,
922717,4701313,"Veleshnja, Zino",ALB,M,,,,,2278.0,7.0,20.0,2104.0,0.0,20.0,2058.0,0.0,20.0,2001.0,
935875,4703529,"Vogli, Erjon",ALB,M,,,,,1557.0,5.0,40.0,1489.0,0.0,20.0,1458.0,0.0,20.0,2003.0,
66128,4700066,"Ashiku, Franc",ALB,M,IM,,"SI,SI",,2420.0,7.0,10.0,2378.0,0.0,20.0,2306.0,0.0,20.0,1993.0,
918865,4701496,"Vasili, Marinel",ALB,M,,,,,1616.0,3.0,40.0,,,,,,,2002.0,


In [16]:
df['sex'].value_counts()

M    15850
Name: sex, dtype: int64