Requirements

In [None]:
# ! python -m pip install sqlalchemy
# ! python -m pip install lxml
# ! python -m pip install psycopg2

In [35]:
import pandas as pd
import psycopg2
from sqlalchemy import create_engine

# Extract

In [36]:
# Chess rank website
website_2021 = 'https://en.chessbase.com/post/the-new-fide-world-ranking-list-december-2021'
website_2022 = 'https://en.chessbase.com/post/fide-ratings-december-2022'

# Convert table to dataframe
df1 = pd.read_html(website_2021, header=0, flavor='lxml')[0]

In [37]:
df1.head()

Unnamed: 0,Rank,Name,Title,Country,Rating,Games,B-Year
0,1,"Carlsen, Magnus",g,NOR,2856,1,1990
1,2,"Firouzja, Alireza",g,FRA,2804,20,2003
2,3,"Ding, Liren",g,CHN,2799,4,1992
3,4,"Caruana, Fabiano",g,USA,2792,11,1992
4,5,"Nepomniachtchi, Ian",g,RUS,2782,0,1990


# Transform

Name

In [38]:
df1.loc[:, 'Name'] = df1['Name'].str.split(', ')
df1.loc[:, 'Name'] = df1['Name'].apply(lambda x: x[0] if len(x) == 1 \
                                       else (x[1] + ' ' + x[0]))

In [39]:
df1.head()['Name']

0        Magnus Carlsen
1      Alireza Firouzja
2            Liren Ding
3       Fabiano Caruana
4    Ian Nepomniachtchi
Name: Name, dtype: object

Country

In [40]:
# Extract
country_code_website = 'https://www.iban.com/country-codes'
country_map = pd.read_html(country_code_website, header=0)[0]

# Transform
country_map.drop(['Alpha-2 code', 'Numeric'], axis=1, inplace=True)
country_map.head(3)

Unnamed: 0,Country,Alpha-3 code
0,Afghanistan,AFG
1,Åland Islands,ALA
2,Albania,ALB


In [41]:
# Convert to python dictionary
country_map.set_index('Alpha-3 code', inplace=True)
country_map = country_map.to_dict()
country_map = country_map['Country']

# Replace the code with its name
df1.loc[:, 'Country'] = df1['Country'].map(country_map)

Title

In [42]:
df1.drop('Title', axis=1, inplace=True)

In [43]:
# We only take top 10 entries
lim = 10
df1 = df1.iloc[:lim, :]

df1

Unnamed: 0,Rank,Name,Country,Rating,Games,B-Year
0,1,Magnus Carlsen,Norway,2856,1,1990
1,2,Alireza Firouzja,France,2804,20,2003
2,3,Liren Ding,China,2799,4,1992
3,4,Fabiano Caruana,United States of America (the),2792,11,1992
4,5,Ian Nepomniachtchi,Russian Federation (the),2782,0,1990
5,6,Levon Aronian,Armenia,2772,11,1982
6,7,Anish Giri,,2772,9,1994
7,8,Wesley So,United States of America (the),2772,0,1993
8,9,Shakhriyar Mamedyarov,Azerbaijan,2767,8,1985
9,10,Alexander Grischuk,Russian Federation (the),2764,9,1983


In [33]:
df1.dtypes

Rank        int64
Name       object
Country    object
Rating      int64
Games       int64
B-Year      int64
dtype: object

# Load

In [67]:
from db_url import db_url       # import db_url string from db_url module

engine = create_engine(db_url)
df1.to_sql('chess_player', engine, if_exists='replace', schema='Schema_2', index=False)

10

In [78]:
with engine.connect() as connection:
    result = connection.execute('SELECT * FROM "Schema_2".chess_player')
    print('Rank\tRating\tName')
    for row in result:
        print(str(row[0]) + '\t' + str(row[3]) + '\t' + row[1])

Rank	Rating	Name
1	2856	Magnus Carlsen
2	2804	Alireza Firouzja
3	2799	Liren Ding
4	2792	Fabiano Caruana
5	2782	Ian Nepomniachtchi
6	2772	Levon Aronian
7	2772	Anish Giri
8	2772	Wesley So
9	2767	Shakhriyar Mamedyarov
10	2764	Alexander Grischuk


# 2022

In [85]:
# Extract
df2 = pd.read_html(website_2022, header=0, flavor='lxml')[0]

# Name
df2.loc[:, 'Name'] = df2['Name'].str.split(', ')
df2.loc[:, 'Name'] = df2['Name'].apply(lambda x: x[0] if len(x) == 1 \
                                       else (x[1] + ' ' + x[0]))

# Country
df2.loc[:, 'Country'] = df2['Country'].map(country_map)

# Title
df2.drop('Title', axis=1, inplace=True)

# Updating the last 2021 top players
filt = df2['Name'].isin(df1['Name'].unique())
df2 = df2[filt]

In [86]:
from db_url import db_url       # import db_url string from db_url module

engine = create_engine(db_url)
df2.to_sql('chess_player', engine, if_exists='replace', schema='Schema_2', index=False)

10

In [87]:
with engine.connect() as connection:
    result = connection.execute('SELECT * FROM "Schema_2".chess_player')
    print('Rank\tRating\tName')
    for row in result:
        print(str(row[0]) + '\t' + str(row[3]) + '\t' + row[1])

Rank	Rating	Name
1	2859	Magnus Carlsen
2	2811	Liren Ding
3	2793	Ian Nepomniachtchi
4	2785	Alireza Firouzja
6	2766	Fabiano Caruana
7	2764	Anish Giri
8	2760	Wesley So
12	2745	Alexander Grischuk
14	2740	Shakhriyar Mamedyarov
17	2735	Levon Aronian
