In [136]:
# import libraries
import pandas as pd
import requests as rq

In [137]:
# initialize dataframe
df = pd.read_csv('./assets/Tokyo Medals 2021.csv')
# display first 5 rows
df.head()

Unnamed: 0,Country,Gold Medal,Silver Medal,Bronze Medal,Total,Rank By Total
0,United States of America,39,41,33,113,1
1,People's Republic of China,38,32,18,88,2
2,Japan,27,14,17,58,5
3,Great Britain,22,21,22,65,4
4,ROC,20,28,23,71,3


In [138]:
# The following are disputed countries in the 2020 Olympics for various reasons.
# Russia competed as ROC (Russian Olympic Committee) due to a ban from the World Anti-Doping Agency
# Taiwan (Republic of China) competed as Chinese Taipei due to the One China Policy
# Hong Kong, China does not return anything from the API, but plain Hong Kong does
# Macedonia changed its name to North Macedonia in 2019; this information is still not reflected on the API
disputed_countries = {
    'ROC': 'Russian Federation', 
    'Chinese Taipei': 'Republic of China',
    'Hong Kong, China': 'Hong Kong',
    'North Macedonia': 'Macedonia'
}

In [139]:
"""
    This function will return the field name specified from the restcountries API
    full_country_name - The country's full name
    field - Field to be retrieved
"""
def get_field_name(full_country_name, field):
    field_value=None
    country_name=''

    # check first if there is an entry in the disputed_countries dictionary
    try:
        country_name = disputed_countries[full_country_name]
    # exception will be triggered if key is non-existent in dictionary
    except KeyError:
        country_name = full_country_name
    finally:
        # based on personal experience, using the fullText parameter yields more accurate results
        response = rq.get('https://restcountries.eu/rest/v2/name/'+ country_name +'?fullText=true')

        # API call was successful
        if response.status_code == 200:
            field_value = response.json()[0][field]
        # API call was unsuccessful: attempt another API call without the fullText parameter
        else:
            response2 = rq.get('https://restcountries.eu/rest/v2/name/'+ country_name)
            if response2.status_code == 200:
                field_value = response2.json()[0][field]

        return field_value

In [140]:
# regions will be a list where each country's region will be stored
regions = []
# create a reference to df['Country'] column for easier referencing
df_country = df['Country']

# loop thru the df['Country'] column
for index in range (0, len(df_country)):
    regions.append(get_field_name(df_country[index], 'region'))

In [141]:
# create a new column called Region in the dataframe, and the values will be the regions obtained from the previous step
# Region will be immediately after the first column (Country)
df.insert(1, 'Region', regions)

In [145]:
# verify results
df.head()

Unnamed: 0,Country,Region,Gold Medal,Silver Medal,Bronze Medal,Total,Rank By Total
0,United States of America,Americas,39,41,33,113,1
1,People's Republic of China,Asia,38,32,18,88,2
2,Japan,Asia,27,14,17,58,5
3,Great Britain,Europe,22,21,22,65,4
4,ROC,Europe,20,28,23,71,3


In [144]:
# Check if any null values are present after transformation
df.isnull().sum()

Country          0
Region           0
Gold Medal       0
Silver Medal     0
Bronze Medal     0
Total            0
Rank By Total    0
dtype: int64