In [17]:
# Import dependencies
import pandas as pd
from google.colab import files

In [2]:
# Get a DataFrame that includes International Country Codes and Names
code_path = "https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv"
code_df = pd.read_csv(code_path, encoding = "UTF-8", header = 0)
code_df.head()

Unnamed: 0,name,alpha-2,alpha-3,country-code,iso_3166-2,region,sub-region,intermediate-region,region-code,sub-region-code,intermediate-region-code
0,Afghanistan,AF,AFG,4,ISO 3166-2:AF,Asia,Southern Asia,,142.0,34.0,
1,Åland Islands,AX,ALA,248,ISO 3166-2:AX,Europe,Northern Europe,,150.0,154.0,
2,Albania,AL,ALB,8,ISO 3166-2:AL,Europe,Southern Europe,,150.0,39.0,
3,Algeria,DZ,DZA,12,ISO 3166-2:DZ,Africa,Northern Africa,,2.0,15.0,
4,American Samoa,AS,ASM,16,ISO 3166-2:AS,Oceania,Polynesia,,9.0,61.0,


In [3]:
# Make a new DataFrame that only includes the columns we will use.
reduced_df = code_df[["country-code", "alpha-2", "name"]]

reduced_df.head()

Unnamed: 0,country-code,alpha-2,name
0,4,AF,Afghanistan
1,248,AX,Åland Islands
2,8,AL,Albania
3,12,DZ,Algeria
4,16,AS,American Samoa


In [5]:
# Rename the columns to match the column names in the ERD.
reduced_df = reduced_df.rename(columns = {"country-code": "Country_ID", "alpha-2": "Country_ID_Alpha", "name": "Country_Name"})

reduced_df.head()

Unnamed: 0,Country_ID,Country_ID_Alpha,Country_Name
0,4,AF,Afghanistan
1,248,AX,Åland Islands
2,8,AL,Albania
3,12,DZ,Algeria
4,16,AS,American Samoa


In [6]:
# Check for NaN values.
reduced_df.isna().sum()

Country_ID          0
Country_ID_Alpha    1
Country_Name        0
dtype: int64

In [7]:
# Identify the rows that have NaN values.
reduced_df[reduced_df["Country_ID_Alpha"].isna()]

Unnamed: 0,Country_ID,Country_ID_Alpha,Country_Name
153,516,,Namibia


In [9]:
# Replace the nan values with the country code for Namibia ("NA")
reduced_df = reduced_df.fillna("NA")
reduced_df["Country_ID_Alpha"][153]

'NA'

In [11]:
# Verify the previous code
reduced_df.isna().sum()

Country_ID          0
Country_ID_Alpha    0
Country_Name        0
dtype: int64

In [13]:
# Check the number of values of the DF.
reduced_df.count()

Country_ID          249
Country_ID_Alpha    249
Country_Name        249
dtype: int64

In [16]:
# Check the data types.
reduced_df.dtypes

Country_ID           int64
Country_ID_Alpha    object
Country_Name        object
dtype: object

In [18]:
# Export the table
reduced_df.to_csv("Country_Codes.csv", index=False)
files.download("Country_Codes.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>