# Citations
##### OECD (2021), "Data warehouse", OECD.Stat (database), https://doi.org/10.1787/data-00900-en (accessed on 17 January 2021).

# Obtaining a list of countries and the codes

In [1]:
# Importing the relevant dependencies
import pandas as pd

In [2]:
# Obtaining a list of all the countries and their associated three letter codes
countries = pd.read_html('https://countrycode.org/')

In [3]:
# Viewing the Dataframe
countries

[               COUNTRY COUNTRY CODE ISO CODES  POPULATION  AREA KM2  \
 0          Afghanistan           93  AF / AFG    29121286    647500   
 1              Albania          355  AL / ALB     2986952     28748   
 2              Algeria          213  DZ / DZA    34586184   2381740   
 3       American Samoa        1-684  AS / ASM       57881       199   
 4              Andorra          376  AD / AND       84000       468   
 ..                 ...          ...       ...         ...       ...   
 235  Wallis and Futuna          681  WF / WLF       16025       274   
 236     Western Sahara          212  EH / ESH      273008    266000   
 237              Yemen          967  YE / YEM    23495361    527970   
 238             Zambia          260  ZM / ZMB    13460305    752614   
 239           Zimbabwe          263  ZW / ZWE    11651858    390580   
 
           GDP $USD  
 0    20.65 Billion  
 1     12.8 Billion  
 2    215.7 Billion  
 3    462.2 Million  
 4      4.8 Billion  
 .

In [4]:
# Determing the type for the country variable
type(countries)

list

In [5]:
# Obtain the first key of the list
countries_list = countries[0]
countries_list

Unnamed: 0,COUNTRY,COUNTRY CODE,ISO CODES,POPULATION,AREA KM2,GDP $USD
0,Afghanistan,93,AF / AFG,29121286,647500,20.65 Billion
1,Albania,355,AL / ALB,2986952,28748,12.8 Billion
2,Algeria,213,DZ / DZA,34586184,2381740,215.7 Billion
3,American Samoa,1-684,AS / ASM,57881,199,462.2 Million
4,Andorra,376,AD / AND,84000,468,4.8 Billion
...,...,...,...,...,...,...
235,Wallis and Futuna,681,WF / WLF,16025,274,
236,Western Sahara,212,EH / ESH,273008,266000,
237,Yemen,967,YE / YEM,23495361,527970,43.89 Billion
238,Zambia,260,ZM / ZMB,13460305,752614,22.24 Billion


In [6]:
# Splitting the column relating to ISO CODES
iso_code_split = countries_list['ISO CODES'].str.split("/",n=1, expand=True)
iso_code_split

Unnamed: 0,0,1
0,AF,AFG
1,AL,ALB
2,DZ,DZA
3,AS,ASM
4,AD,AND
...,...,...
235,WF,WLF
236,EH,ESH
237,YE,YEM
238,ZM,ZMB


In [7]:
# Adding the split list back to the dataframe
countries_list['ISO CODE 2L'] = iso_code_split[0]
countries_list

Unnamed: 0,COUNTRY,COUNTRY CODE,ISO CODES,POPULATION,AREA KM2,GDP $USD,ISO CODE 2L
0,Afghanistan,93,AF / AFG,29121286,647500,20.65 Billion,AF
1,Albania,355,AL / ALB,2986952,28748,12.8 Billion,AL
2,Algeria,213,DZ / DZA,34586184,2381740,215.7 Billion,DZ
3,American Samoa,1-684,AS / ASM,57881,199,462.2 Million,AS
4,Andorra,376,AD / AND,84000,468,4.8 Billion,AD
...,...,...,...,...,...,...,...
235,Wallis and Futuna,681,WF / WLF,16025,274,,WF
236,Western Sahara,212,EH / ESH,273008,266000,,EH
237,Yemen,967,YE / YEM,23495361,527970,43.89 Billion,YE
238,Zambia,260,ZM / ZMB,13460305,752614,22.24 Billion,ZM


In [8]:
# Adding the second three letter code list back to the original dataframe
countries_list['ISO CODE 3L'] = iso_code_split[1]
countries_list

Unnamed: 0,COUNTRY,COUNTRY CODE,ISO CODES,POPULATION,AREA KM2,GDP $USD,ISO CODE 2L,ISO CODE 3L
0,Afghanistan,93,AF / AFG,29121286,647500,20.65 Billion,AF,AFG
1,Albania,355,AL / ALB,2986952,28748,12.8 Billion,AL,ALB
2,Algeria,213,DZ / DZA,34586184,2381740,215.7 Billion,DZ,DZA
3,American Samoa,1-684,AS / ASM,57881,199,462.2 Million,AS,ASM
4,Andorra,376,AD / AND,84000,468,4.8 Billion,AD,AND
...,...,...,...,...,...,...,...,...
235,Wallis and Futuna,681,WF / WLF,16025,274,,WF,WLF
236,Western Sahara,212,EH / ESH,273008,266000,,EH,ESH
237,Yemen,967,YE / YEM,23495361,527970,43.89 Billion,YE,YEM
238,Zambia,260,ZM / ZMB,13460305,752614,22.24 Billion,ZM,ZMB


In [9]:
# Creating a list of countries to iterate
list_of_countries = countries_list['COUNTRY'].tolist()
list_of_countries[:5]

['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra']

In [10]:
# Creating a list of the ISO codes to iterate over
_2l_ISO_CODES = countries_list['ISO CODE 2L'].tolist()
_2l_ISO_CODES[:5]

['AF ', 'AL ', 'DZ ', 'AS ', 'AD ']

In [11]:
# Removing the spaces within the list
_2l_ISO_CODES = [x.strip(' ') for x in _2l_ISO_CODES]
_2l_ISO_CODES[:5]

['AF', 'AL', 'DZ', 'AS', 'AD']

In [12]:
# Creating a list of the three letter ISO CODES to iterate over
_3l_ISO_CODES = countries_list['ISO CODE 3L'].tolist()
_3l_ISO_CODES[:5]

[' AFG', ' ALB', ' DZA', ' ASM', ' AND']

In [13]:
# Removing spaces within the list
_3l_ISO_CODES = [x.strip(' ') for x in _3l_ISO_CODES]
_3l_ISO_CODES[:5]

['AFG', 'ALB', 'DZA', 'ASM', 'AND']

In [14]:
# Creating a dictionary for the list of countries and their codes
countries_dict = {
    "Country":list_of_countries,
    "LOCATION":_3l_ISO_CODES,
    "ISO CODE 2 letter":_2l_ISO_CODES
}

In [15]:
# Converting the Dictionary to a dataframe
country_df = pd.DataFrame(countries_dict)
country_df

Unnamed: 0,Country,LOCATION,ISO CODE 2 letter
0,Afghanistan,AFG,AF
1,Albania,ALB,AL
2,Algeria,DZA,DZ
3,American Samoa,ASM,AS
4,Andorra,AND,AD
...,...,...,...
235,Wallis and Futuna,WLF,WF
236,Western Sahara,ESH,EH
237,Yemen,YEM,YE
238,Zambia,ZMB,ZM


# Financial Sector

In [16]:
# Importing the CSV Files to tidy up the data
teller_machines = pd.read_csv('raw_data/automated_teller_machines.csv')
teller_machines

Unnamed: 0,LOCATION,TIME,"Automated teller machines (ATMs) (per 100,000 adults)"
0,AFG,1960,0.000000
1,AFG,1961,0.000000
2,AFG,1962,0.000000
3,AFG,1963,0.000000
4,AFG,1964,0.000000
...,...,...,...
16099,WLD,2016,38.330143
16100,WLD,2017,38.034455
16101,WLD,2018,40.949620
16102,WLD,2019,49.615022


In [17]:
# Renaming the columns
teller_machines1 = teller_machines.rename(columns={'Automated teller machines (ATMs) (per 100,000 adults)':'ATM_per_100000'}).copy()
teller_machines1

Unnamed: 0,LOCATION,TIME,ATM_per_100000
0,AFG,1960,0.000000
1,AFG,1961,0.000000
2,AFG,1962,0.000000
3,AFG,1963,0.000000
4,AFG,1964,0.000000
...,...,...,...
16099,WLD,2016,38.330143
16100,WLD,2017,38.034455
16101,WLD,2018,40.949620
16102,WLD,2019,49.615022


In [19]:
# Bank Loans
bank_loans = pd.read_csv('raw_data/borrowersfrombanks.csv')
bank_loans

Unnamed: 0,LOCATION,TIME,"Borrowers from commercial banks (per 1,000 adults)"
0,AFG,1960,0.000000
1,AFG,1961,0.000000
2,AFG,1962,0.000000
3,AFG,1963,0.000000
4,AFG,1964,0.000000
...,...,...,...
16099,WLD,2016,164.010187
16100,WLD,2017,164.077303
16101,WLD,2018,0.000000
16102,WLD,2019,0.000000


In [21]:
# Merging the two dataframes together
merge1 = pd.merge(teller_machines1, bank_loans, on=['LOCATION','TIME'], how='inner')
merge1

Unnamed: 0,LOCATION,TIME,ATM_per_100000,"Borrowers from commercial banks (per 1,000 adults)"
0,AFG,1960,0.000000,0.000000
1,AFG,1961,0.000000,0.000000
2,AFG,1962,0.000000,0.000000
3,AFG,1963,0.000000,0.000000
4,AFG,1964,0.000000,0.000000
...,...,...,...,...
16099,WLD,2016,38.330143,164.010187
16100,WLD,2017,38.034455,164.077303
16101,WLD,2018,40.949620,0.000000
16102,WLD,2019,49.615022,0.000000
