# Citations
##### OECD (2021), "Data warehouse", OECD.Stat (database), https://doi.org/10.1787/data-00900-en (accessed on 17 January 2021).

In [1]:
# Importing the relevant dependencies
import pandas as pd

In [2]:
# Obtaining a list of all the countries and their associated three letter codes
countries = pd.read_html('https://countrycode.org/')

In [3]:
# Viewing the Dataframe
countries

[               COUNTRY COUNTRY CODE ISO CODES  POPULATION  AREA KM2  \
 0          Afghanistan           93  AF / AFG    29121286    647500   
 1              Albania          355  AL / ALB     2986952     28748   
 2              Algeria          213  DZ / DZA    34586184   2381740   
 3       American Samoa        1-684  AS / ASM       57881       199   
 4              Andorra          376  AD / AND       84000       468   
 ..                 ...          ...       ...         ...       ...   
 235  Wallis and Futuna          681  WF / WLF       16025       274   
 236     Western Sahara          212  EH / ESH      273008    266000   
 237              Yemen          967  YE / YEM    23495361    527970   
 238             Zambia          260  ZM / ZMB    13460305    752614   
 239           Zimbabwe          263  ZW / ZWE    11651858    390580   
 
           GDP $USD  
 0    20.65 Billion  
 1     12.8 Billion  
 2    215.7 Billion  
 3    462.2 Million  
 4      4.8 Billion  
 .

In [4]:
# Determing the type for the country variable
type(countries)

list

In [5]:
# Obtain the first key of the list
countries_list = countries[0]
countries_list

Unnamed: 0,COUNTRY,COUNTRY CODE,ISO CODES,POPULATION,AREA KM2,GDP $USD
0,Afghanistan,93,AF / AFG,29121286,647500,20.65 Billion
1,Albania,355,AL / ALB,2986952,28748,12.8 Billion
2,Algeria,213,DZ / DZA,34586184,2381740,215.7 Billion
3,American Samoa,1-684,AS / ASM,57881,199,462.2 Million
4,Andorra,376,AD / AND,84000,468,4.8 Billion
...,...,...,...,...,...,...
235,Wallis and Futuna,681,WF / WLF,16025,274,
236,Western Sahara,212,EH / ESH,273008,266000,
237,Yemen,967,YE / YEM,23495361,527970,43.89 Billion
238,Zambia,260,ZM / ZMB,13460305,752614,22.24 Billion


In [6]:
# Splitting the column relating to ISO CODES
iso_code_split = countries_list['ISO CODES'].str.split("/",n=1, expand=True)
iso_code_split

Unnamed: 0,0,1
0,AF,AFG
1,AL,ALB
2,DZ,DZA
3,AS,ASM
4,AD,AND
...,...,...
235,WF,WLF
236,EH,ESH
237,YE,YEM
238,ZM,ZMB


In [7]:
# Adding the split list back to the dataframe
countries_list['ISO CODE 2L'] = iso_code_split[0]
countries_list

Unnamed: 0,COUNTRY,COUNTRY CODE,ISO CODES,POPULATION,AREA KM2,GDP $USD,ISO CODE 2L
0,Afghanistan,93,AF / AFG,29121286,647500,20.65 Billion,AF
1,Albania,355,AL / ALB,2986952,28748,12.8 Billion,AL
2,Algeria,213,DZ / DZA,34586184,2381740,215.7 Billion,DZ
3,American Samoa,1-684,AS / ASM,57881,199,462.2 Million,AS
4,Andorra,376,AD / AND,84000,468,4.8 Billion,AD
...,...,...,...,...,...,...,...
235,Wallis and Futuna,681,WF / WLF,16025,274,,WF
236,Western Sahara,212,EH / ESH,273008,266000,,EH
237,Yemen,967,YE / YEM,23495361,527970,43.89 Billion,YE
238,Zambia,260,ZM / ZMB,13460305,752614,22.24 Billion,ZM


In [8]:
# Adding the second three letter code list back to the original dataframe
countries_list['ISO CODE 3L'] = iso_code_split[1]
countries_list

Unnamed: 0,COUNTRY,COUNTRY CODE,ISO CODES,POPULATION,AREA KM2,GDP $USD,ISO CODE 2L,ISO CODE 3L
0,Afghanistan,93,AF / AFG,29121286,647500,20.65 Billion,AF,AFG
1,Albania,355,AL / ALB,2986952,28748,12.8 Billion,AL,ALB
2,Algeria,213,DZ / DZA,34586184,2381740,215.7 Billion,DZ,DZA
3,American Samoa,1-684,AS / ASM,57881,199,462.2 Million,AS,ASM
4,Andorra,376,AD / AND,84000,468,4.8 Billion,AD,AND
...,...,...,...,...,...,...,...,...
235,Wallis and Futuna,681,WF / WLF,16025,274,,WF,WLF
236,Western Sahara,212,EH / ESH,273008,266000,,EH,ESH
237,Yemen,967,YE / YEM,23495361,527970,43.89 Billion,YE,YEM
238,Zambia,260,ZM / ZMB,13460305,752614,22.24 Billion,ZM,ZMB


In [9]:
# Creating a list of countries to iterate
list_of_countries = countries_list['COUNTRY'].tolist()
list_of_countries[:5]

['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra']

In [10]:
# Creating a list of the ISO codes to iterate over
_2l_ISO_CODES = countries_list['ISO CODE 2L'].tolist()
_2l_ISO_CODES[:5]

['AF ', 'AL ', 'DZ ', 'AS ', 'AD ']

In [11]:
# Removing the spaces within the list
_2l_ISO_CODES = [x.strip(' ') for x in _2l_ISO_CODES]
_2l_ISO_CODES[:5]

['AF', 'AL', 'DZ', 'AS', 'AD']

In [12]:
# Creating a list of the three letter ISO CODES to iterate over
_3l_ISO_CODES = countries_list['ISO CODE 3L'].tolist()
_3l_ISO_CODES[:5]

[' AFG', ' ALB', ' DZA', ' ASM', ' AND']

In [13]:
# Removing spaces within the list
_3l_ISO_CODES = [x.strip(' ') for x in _3l_ISO_CODES]
_3l_ISO_CODES[:5]

['AFG', 'ALB', 'DZA', 'ASM', 'AND']

In [14]:
# Creating a dictionary for the list of countries and their codes
countries_dict = {
    "Country":list_of_countries,
    "LOCATION":_3l_ISO_CODES,
    "ISO CODE 2 letter":_2l_ISO_CODES
}

In [15]:
# Converting the Dictionary to a dataframe
country_df = pd.DataFrame(countries_dict)

In [16]:
# Opening the realestate data frame
real_estate = pd.read_csv('raw_data/worldrealestate.csv')
real_estate

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,AUS,HOUSECOST,NOMINAL,IDX2015,A,1970,2.552380,
1,AUS,HOUSECOST,NOMINAL,IDX2015,A,1971,2.851352,
2,AUS,HOUSECOST,NOMINAL,IDX2015,A,1972,3.183667,
3,AUS,HOUSECOST,NOMINAL,IDX2015,A,1973,3.837183,
4,AUS,HOUSECOST,NOMINAL,IDX2015,A,1974,4.690199,
...,...,...,...,...,...,...,...,...
1556,SAU,HOUSECOST,NOMINAL,IDX2015,A,2015,100.000000,
1557,SAU,HOUSECOST,NOMINAL,IDX2015,A,2016,95.091000,
1558,SAU,HOUSECOST,NOMINAL,IDX2015,A,2017,88.967500,
1559,SAU,HOUSECOST,NOMINAL,IDX2015,A,2018,86.462500,


In [17]:
# Renaming the column
real_estate = real_estate.rename(columns={'Value':'Nominal_house_price'})

In [18]:
# Final Real Estate Data
final_real_estate_data = pd.merge(country_df, real_estate, how='inner',on='LOCATION')
final_real_estate_data

Unnamed: 0,Country,LOCATION,ISO CODE 2 letter,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Nominal_house_price,Flag Codes
0,Australia,AUS,AU,HOUSECOST,NOMINAL,IDX2015,A,1970,2.552380,
1,Australia,AUS,AU,HOUSECOST,NOMINAL,IDX2015,A,1971,2.851352,
2,Australia,AUS,AU,HOUSECOST,NOMINAL,IDX2015,A,1972,3.183667,
3,Australia,AUS,AU,HOUSECOST,NOMINAL,IDX2015,A,1973,3.837183,
4,Australia,AUS,AU,HOUSECOST,NOMINAL,IDX2015,A,1974,4.690199,
...,...,...,...,...,...,...,...,...,...,...
1416,United States,USA,US,HOUSECOST,NOMINAL,IDX2015,A,2015,100.000000,
1417,United States,USA,US,HOUSECOST,NOMINAL,IDX2015,A,2016,105.730769,
1418,United States,USA,US,HOUSECOST,NOMINAL,IDX2015,A,2017,112.382036,
1419,United States,USA,US,HOUSECOST,NOMINAL,IDX2015,A,2018,119.534759,


In [19]:
# Exporting Real Estate Data
final_real_estate_data.to_csv('cleaned_data/realestateOECD.csv')

# Unemployment Rate OECD

In [20]:
unemployment_df = pd.read_csv('raw_data/worldunemployment.csv')
unemployment_df

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,AUS,HUR,TOT,PC_LF,A,1967,1.875000,
1,AUS,HUR,TOT,PC_LF,A,1968,1.850000,
2,AUS,HUR,TOT,PC_LF,A,1969,1.800000,
3,AUS,HUR,TOT,PC_LF,A,1970,1.625000,
4,AUS,HUR,TOT,PC_LF,A,1971,1.925000,
...,...,...,...,...,...,...,...,...
1330,EU27_2020,HUR,TOT,PC_LF,A,2015,10.066670,
1331,EU27_2020,HUR,TOT,PC_LF,A,2016,9.125000,
1332,EU27_2020,HUR,TOT,PC_LF,A,2017,8.150000,
1333,EU27_2020,HUR,TOT,PC_LF,A,2018,7.283333,


In [21]:
# Renaming a Column
unemployment_df_1 = unemployment_df.rename(columns={'Value':'Unemployment_%'})
unemployment_df_1

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Unemployment_%,Flag Codes
0,AUS,HUR,TOT,PC_LF,A,1967,1.875000,
1,AUS,HUR,TOT,PC_LF,A,1968,1.850000,
2,AUS,HUR,TOT,PC_LF,A,1969,1.800000,
3,AUS,HUR,TOT,PC_LF,A,1970,1.625000,
4,AUS,HUR,TOT,PC_LF,A,1971,1.925000,
...,...,...,...,...,...,...,...,...
1330,EU27_2020,HUR,TOT,PC_LF,A,2015,10.066670,
1331,EU27_2020,HUR,TOT,PC_LF,A,2016,9.125000,
1332,EU27_2020,HUR,TOT,PC_LF,A,2017,8.150000,
1333,EU27_2020,HUR,TOT,PC_LF,A,2018,7.283333,


In [22]:
# Creating a final data frame
unemployment_df_final = pd.merge(country_df,unemployment_df_1, on='LOCATION',how='inner')
unemployment_df_final

Unnamed: 0,Country,LOCATION,ISO CODE 2 letter,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Unemployment_%,Flag Codes
0,Australia,AUS,AU,HUR,TOT,PC_LF,A,1967,1.875000,
1,Australia,AUS,AU,HUR,TOT,PC_LF,A,1968,1.850000,
2,Australia,AUS,AU,HUR,TOT,PC_LF,A,1969,1.800000,
3,Australia,AUS,AU,HUR,TOT,PC_LF,A,1970,1.625000,
4,Australia,AUS,AU,HUR,TOT,PC_LF,A,1971,1.925000,
...,...,...,...,...,...,...,...,...,...,...
1213,United States,USA,US,HUR,TOT,PC_LF,A,2015,5.291667,
1214,United States,USA,US,HUR,TOT,PC_LF,A,2016,4.866667,
1215,United States,USA,US,HUR,TOT,PC_LF,A,2017,4.350000,
1216,United States,USA,US,HUR,TOT,PC_LF,A,2018,3.900000,


In [23]:
# Exporting the dataframe
unemployment_df_final.to_csv('cleaned_data/unemploymentrateOECD.csv')

# Banking sector

In [24]:
banking = pd.read_csv('raw_data/worldbankingsector.csv')
banking

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,FRA,BANKLEVERAGE,TOT,PC_NVA,A,1995,19.490000,
1,FRA,BANKLEVERAGE,TOT,PC_NVA,A,1996,19.820640,
2,FRA,BANKLEVERAGE,TOT,PC_NVA,A,1997,15.312700,
3,FRA,BANKLEVERAGE,TOT,PC_NVA,A,1998,18.017870,
4,FRA,BANKLEVERAGE,TOT,PC_NVA,A,1999,13.623330,
...,...,...,...,...,...,...,...,...
746,CHE,BANKLEVERAGE,TOT,PC_NVA,A,2014,11.025050,
747,CHE,BANKLEVERAGE,TOT,PC_NVA,A,2015,11.705780,
748,CHE,BANKLEVERAGE,TOT,PC_NVA,A,2016,12.029350,
749,CHE,BANKLEVERAGE,TOT,PC_NVA,A,2017,9.523528,


In [25]:
# Renaming the columns
banking1 = banking.rename(columns={'Value':'net_value_added_%'})
banking1

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,net_value_added_%,Flag Codes
0,FRA,BANKLEVERAGE,TOT,PC_NVA,A,1995,19.490000,
1,FRA,BANKLEVERAGE,TOT,PC_NVA,A,1996,19.820640,
2,FRA,BANKLEVERAGE,TOT,PC_NVA,A,1997,15.312700,
3,FRA,BANKLEVERAGE,TOT,PC_NVA,A,1998,18.017870,
4,FRA,BANKLEVERAGE,TOT,PC_NVA,A,1999,13.623330,
...,...,...,...,...,...,...,...,...
746,CHE,BANKLEVERAGE,TOT,PC_NVA,A,2014,11.025050,
747,CHE,BANKLEVERAGE,TOT,PC_NVA,A,2015,11.705780,
748,CHE,BANKLEVERAGE,TOT,PC_NVA,A,2016,12.029350,
749,CHE,BANKLEVERAGE,TOT,PC_NVA,A,2017,9.523528,


In [26]:
# Adding country name to the dataframe
banking2 = pd.merge(country_df, banking1, on='LOCATION', how='inner')
banking2

Unnamed: 0,Country,LOCATION,ISO CODE 2 letter,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,net_value_added_%,Flag Codes
0,Australia,AUS,AU,BANKLEVERAGE,TOT,PC_NVA,A,1995,6.079339,
1,Australia,AUS,AU,BANKLEVERAGE,TOT,PC_NVA,A,1996,5.526472,
2,Australia,AUS,AU,BANKLEVERAGE,TOT,PC_NVA,A,1997,4.680646,
3,Australia,AUS,AU,BANKLEVERAGE,TOT,PC_NVA,A,1998,4.526383,
4,Australia,AUS,AU,BANKLEVERAGE,TOT,PC_NVA,A,1999,4.786476,
...,...,...,...,...,...,...,...,...,...,...
746,United States,USA,US,BANKLEVERAGE,TOT,PC_NVA,A,2015,6.913682,
747,United States,USA,US,BANKLEVERAGE,TOT,PC_NVA,A,2016,6.343794,
748,United States,USA,US,BANKLEVERAGE,TOT,PC_NVA,A,2017,6.087101,
749,United States,USA,US,BANKLEVERAGE,TOT,PC_NVA,A,2018,6.488258,


In [27]:
# Exporting dataframe
banking2.to_csv('cleaned_data/bankingOECD.csv')

# Insurance Spending

In [28]:
# Reading the dataframe
insurance = pd.read_csv('raw_data/worldinsurance.csv')
insurance

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,% Insurance of GDP,Flag Codes
0,AUS,INSURANCEEXP,TOT,PC_GDP,A,1983,1.963,
1,AUS,INSURANCEEXP,TOT,PC_GDP,A,1984,3.874,
2,AUS,INSURANCEEXP,TOT,PC_GDP,A,1985,3.767,
3,AUS,INSURANCEEXP,TOT,PC_GDP,A,1986,1.823,
4,AUS,INSURANCEEXP,TOT,PC_GDP,A,1987,5.256,
...,...,...,...,...,...,...,...,...
1219,HND,INSURANCEEXP,TOT,PC_GDP,A,2018,2.633,
1220,TUN,INSURANCEEXP,TOT,PC_GDP,A,2016,2.043,
1221,TUN,INSURANCEEXP,TOT,PC_GDP,A,2017,2.155,
1222,DOM,INSURANCEEXP,TOT,PC_GDP,A,2018,1.511,


In [29]:
# Merging dataframes to countries 
insurance1 = pd.merge(country_df,insurance, on='LOCATION',how='inner')
insurance1

Unnamed: 0,Country,LOCATION,ISO CODE 2 letter,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,% Insurance of GDP,Flag Codes
0,Argentina,ARG,AR,INSURANCEEXP,TOT,PC_GDP,A,2012,2.251,
1,Argentina,ARG,AR,INSURANCEEXP,TOT,PC_GDP,A,2013,2.408,
2,Argentina,ARG,AR,INSURANCEEXP,TOT,PC_GDP,A,2014,2.378,
3,Argentina,ARG,AR,INSURANCEEXP,TOT,PC_GDP,A,2015,2.575,
4,Argentina,ARG,AR,INSURANCEEXP,TOT,PC_GDP,A,2016,2.565,
...,...,...,...,...,...,...,...,...,...,...
1183,Uruguay,URY,UY,INSURANCEEXP,TOT,PC_GDP,A,2012,2.071,
1184,Uruguay,URY,UY,INSURANCEEXP,TOT,PC_GDP,A,2013,2.100,
1185,Uruguay,URY,UY,INSURANCEEXP,TOT,PC_GDP,A,2014,2.276,
1186,Uruguay,URY,UY,INSURANCEEXP,TOT,PC_GDP,A,2015,2.398,


In [30]:
# Renaming Column
insurance1 = insurance1.rename(columns={'% Insurance of GDP':'%_GDP'})

In [31]:
# Exporting Dataframe
insurance1.to_csv('cleaned_data/insuranceOECD.csv')

# Purchasing power parities

In [32]:
# Reading the dataframe
purchasingpp = pd.read_csv('raw_data/pppworld.csv')
purchasingpp

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,AUS,PPP,TOT,NATUSD,A,2000,1.311473,
1,AUS,PPP,TOT,NATUSD,A,2001,1.326806,
2,AUS,PPP,TOT,NATUSD,A,2002,1.336490,
3,AUS,PPP,TOT,NATUSD,A,2003,1.353924,
4,AUS,PPP,TOT,NATUSD,A,2004,1.366397,
...,...,...,...,...,...,...,...,...
1189,SGP,PPP,TOT,NATUSD,A,2014,0.864060,
1190,SGP,PPP,TOT,NATUSD,A,2015,0.879622,
1191,SGP,PPP,TOT,NATUSD,A,2016,0.878245,
1192,SGP,PPP,TOT,NATUSD,A,2017,0.885980,


In [33]:
# Renaming columns
purchasingpp1 = purchasingpp.rename(columns={"Value":"value_to_usdollar"})
purchasingpp1

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,value_to_usdollar,Flag Codes
0,AUS,PPP,TOT,NATUSD,A,2000,1.311473,
1,AUS,PPP,TOT,NATUSD,A,2001,1.326806,
2,AUS,PPP,TOT,NATUSD,A,2002,1.336490,
3,AUS,PPP,TOT,NATUSD,A,2003,1.353924,
4,AUS,PPP,TOT,NATUSD,A,2004,1.366397,
...,...,...,...,...,...,...,...,...
1189,SGP,PPP,TOT,NATUSD,A,2014,0.864060,
1190,SGP,PPP,TOT,NATUSD,A,2015,0.879622,
1191,SGP,PPP,TOT,NATUSD,A,2016,0.878245,
1192,SGP,PPP,TOT,NATUSD,A,2017,0.885980,


In [34]:
# Merging the dataframes with country dataframe
purchasingpp2 = pd.merge(country_df, purchasingpp1, on='LOCATION', how='inner')
purchasingpp2

Unnamed: 0,Country,LOCATION,ISO CODE 2 letter,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,value_to_usdollar,Flag Codes
0,Argentina,ARG,AR,PPP,TOT,NATUSD,A,2000,0.663416,
1,Argentina,ARG,AR,PPP,TOT,NATUSD,A,2001,0.642064,
2,Argentina,ARG,AR,PPP,TOT,NATUSD,A,2002,0.825195,
3,Argentina,ARG,AR,PPP,TOT,NATUSD,A,2003,0.895181,
4,Argentina,ARG,AR,PPP,TOT,NATUSD,A,2004,1.031788,
...,...,...,...,...,...,...,...,...,...,...
1149,Zambia,ZMB,ZM,PPP,TOT,NATUSD,A,2014,3.064750,
1150,Zambia,ZMB,ZM,PPP,TOT,NATUSD,A,2015,3.365914,
1151,Zambia,ZMB,ZM,PPP,TOT,NATUSD,A,2016,3.877613,
1152,Zambia,ZMB,ZM,PPP,TOT,NATUSD,A,2017,4.192580,


In [35]:
# Exporting dataframe
purchasingpp2.to_csv('cleaned_data/purchasingppOECD.csv')

# World Interest Rates


In [36]:
# Reading the dataframe
interest_rates = pd.read_csv('raw_data/worldinterestrates.csv')
interest_rates

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,PRT,LTINT,TOT,PC_PA,A,1994,10.478330,
1,PRT,LTINT,TOT,PC_PA,A,1995,11.465000,
2,PRT,LTINT,TOT,PC_PA,A,1996,8.559167,
3,PRT,LTINT,TOT,PC_PA,A,1997,6.358333,
4,PRT,LTINT,TOT,PC_PA,A,1998,4.877500,
...,...,...,...,...,...,...,...,...
1377,IND,LTINT,TOT,PC_PA,A,2015,7.776181,
1378,IND,LTINT,TOT,PC_PA,A,2016,7.205917,
1379,IND,LTINT,TOT,PC_PA,A,2017,6.924250,
1380,IND,LTINT,TOT,PC_PA,A,2018,7.704333,


In [37]:
# Renaming the columns
interest_rates1 = interest_rates.rename(columns={'Value':'interest_rate_pa_%'})
interest_rates1

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,interest_rate_pa_%,Flag Codes
0,PRT,LTINT,TOT,PC_PA,A,1994,10.478330,
1,PRT,LTINT,TOT,PC_PA,A,1995,11.465000,
2,PRT,LTINT,TOT,PC_PA,A,1996,8.559167,
3,PRT,LTINT,TOT,PC_PA,A,1997,6.358333,
4,PRT,LTINT,TOT,PC_PA,A,1998,4.877500,
...,...,...,...,...,...,...,...,...
1377,IND,LTINT,TOT,PC_PA,A,2015,7.776181,
1378,IND,LTINT,TOT,PC_PA,A,2016,7.205917,
1379,IND,LTINT,TOT,PC_PA,A,2017,6.924250,
1380,IND,LTINT,TOT,PC_PA,A,2018,7.704333,


In [38]:
# Merging the dataframe to country
interest_rates2 = pd.merge(country_df,interest_rates1, on='LOCATION', how='inner')
interest_rates2

Unnamed: 0,Country,LOCATION,ISO CODE 2 letter,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,interest_rate_pa_%,Flag Codes
0,Australia,AUS,AU,LTINT,TOT,PC_PA,A,1970,6.646667,
1,Australia,AUS,AU,LTINT,TOT,PC_PA,A,1971,6.713333,
2,Australia,AUS,AU,LTINT,TOT,PC_PA,A,1972,5.831666,
3,Australia,AUS,AU,LTINT,TOT,PC_PA,A,1973,6.933333,
4,Australia,AUS,AU,LTINT,TOT,PC_PA,A,1974,9.036667,
...,...,...,...,...,...,...,...,...,...,...
1327,United States,USA,US,LTINT,TOT,PC_PA,A,2015,2.135833,
1328,United States,USA,US,LTINT,TOT,PC_PA,A,2016,1.841667,
1329,United States,USA,US,LTINT,TOT,PC_PA,A,2017,2.330000,
1330,United States,USA,US,LTINT,TOT,PC_PA,A,2018,2.910000,


In [39]:
# Exporting dataframe
interest_rates2.to_csv('cleaned_data/interestratesOECD.csv')

# World Tourism

In [43]:
# Reading the dataframe
tourism = pd.read_csv('raw_data/worldtourism.csv')
tourism

Unnamed: 0,LOCATION,VARIABLE,Variable,SOURCE,Source,YEAR,Year,visitors,Flag Codes,Flags
0,AUS,INB_ARRIVALS_TOTAL,Total international arrivals,DEMAND,Tourism demand surveys,2008,2008,5621800,,
1,AUS,INB_ARRIVALS_TOTAL,Total international arrivals,DEMAND,Tourism demand surveys,2009,2009,5515000,,
2,AUS,INB_ARRIVALS_TOTAL,Total international arrivals,DEMAND,Tourism demand surveys,2010,2010,5671000,,
3,AUS,INB_ARRIVALS_TOTAL,Total international arrivals,DEMAND,Tourism demand surveys,2011,2011,5899800,,
4,AUS,INB_ARRIVALS_TOTAL,Total international arrivals,DEMAND,Tourism demand surveys,2012,2012,5990800,,
...,...,...,...,...,...,...,...,...,...,...
6009,RUS,KAZ,Kazakhstan,DEMAND,Tourism demand surveys,2014,2014,3733319,,
6010,RUS,KAZ,Kazakhstan,DEMAND,Tourism demand surveys,2015,2015,4711082,,
6011,RUS,KAZ,Kazakhstan,DEMAND,Tourism demand surveys,2016,2016,3564152,,
6012,RUS,KAZ,Kazakhstan,DEMAND,Tourism demand surveys,2017,2017,3484687,,


In [44]:
# Merging with countries df
tourism1 = pd.merge(country_df, tourism, on='LOCATION', how='inner')
tourism1

Unnamed: 0,Country,LOCATION,ISO CODE 2 letter,VARIABLE,Variable,SOURCE,Source,YEAR,Year,visitors,Flag Codes,Flags
0,Argentina,ARG,AR,INB_ARRIVALS_OVERNIGHT,Overnight visitors (tourists),SUPPLY,Tourism supply surveys,2008,2008,4700000,,
1,Argentina,ARG,AR,INB_ARRIVALS_OVERNIGHT,Overnight visitors (tourists),DEMAND,Tourism demand surveys,2008,2008,4700000,,
2,Argentina,ARG,AR,INB_ARRIVALS_OVERNIGHT,Overnight visitors (tourists),DEMAND,Tourism demand surveys,2009,2009,4308000,,
3,Argentina,ARG,AR,INB_ARRIVALS_OVERNIGHT,Overnight visitors (tourists),DEMAND,Tourism demand surveys,2010,2010,5325000,,
4,Argentina,ARG,AR,INB_ARRIVALS_OVERNIGHT,Overnight visitors (tourists),DEMAND,Tourism demand surveys,2011,2011,5704648,,
...,...,...,...,...,...,...,...,...,...,...,...,...
5712,United States,USA,US,INB_ACCOMM_PRIVATE_NIGHTS,Private accommodation,DEMAND,Tourism demand surveys,2014,2014,164056964,,
5713,United States,USA,US,INB_ACCOMM_PRIVATE_NIGHTS,Private accommodation,DEMAND,Tourism demand surveys,2015,2015,172721248,,
5714,United States,USA,US,INB_ACCOMM_PRIVATE_NIGHTS,Private accommodation,DEMAND,Tourism demand surveys,2016,2016,165987359,,
5715,United States,USA,US,INB_ACCOMM_PRIVATE_NIGHTS,Private accommodation,DEMAND,Tourism demand surveys,2017,2017,179489131,,


In [45]:
# Exporting dataframes
tourism1.to_csv('cleaned_data/tourisminboundOECD.csv')

In [48]:
# Outbound Tourism by Country
outbound_tourism = pd.read_csv('raw_data/worldtourismoutbound.csv')
outbound_tourism

Unnamed: 0,LOCATION,VARIABLE,Variable,YEAR,Year,Unit Code,Unit,PowerCode Code,PowerCode,Reference Period Code,Reference Period,travellers,Flag Codes,Flags
0,AUS,OBND_DEP_TOTAL,Total international departures,2008,2008,NBR,Number,0,Units,,,5716200.0,,
1,AUS,OBND_DEP_TOTAL,Total international departures,2009,2009,NBR,Number,0,Units,,,5858200.0,,
2,AUS,OBND_DEP_TOTAL,Total international departures,2010,2010,NBR,Number,0,Units,,,6680900.0,,
3,AUS,OBND_DEP_TOTAL,Total international departures,2011,2011,NBR,Number,0,Units,,,7424500.0,,
4,AUS,OBND_DEP_TOTAL,Total international departures,2012,2012,NBR,Number,0,Units,,,8015600.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2931,TUR,ENS,Europe not specified,2014,2014,NBR,Number,0,Units,,,648742.0,,
2932,TUR,ENS,Europe not specified,2015,2015,NBR,Number,0,Units,,,767269.0,,
2933,TUR,ENS,Europe not specified,2016,2016,NBR,Number,0,Units,,,921701.0,,
2934,TUR,ENS,Europe not specified,2017,2017,NBR,Number,0,Units,,,1104725.0,,


In [49]:
# Merging the dataframe with countries list
outbound_tourism1 = pd.merge(country_df, outbound_tourism, on='LOCATION', how='inner')
outbound_tourism1

Unnamed: 0,Country,LOCATION,ISO CODE 2 letter,VARIABLE,Variable,YEAR,Year,Unit Code,Unit,PowerCode Code,PowerCode,Reference Period Code,Reference Period,travellers,Flag Codes,Flags
0,Argentina,ARG,AR,OBND_DEP_OVERNIGHT,Overnight visitors (tourists),2008,2008,NBR,Number,0,Units,,,4614000.000,,
1,Argentina,ARG,AR,OBND_DEP_OVERNIGHT,Overnight visitors (tourists),2009,2009,NBR,Number,0,Units,,,4981000.000,,
2,Argentina,ARG,AR,OBND_DEP_OVERNIGHT,Overnight visitors (tourists),2010,2010,NBR,Number,0,Units,,,5307000.000,,
3,Argentina,ARG,AR,OBND_DEP_OVERNIGHT,Overnight visitors (tourists),2011,2011,NBR,Number,0,Units,,,6686193.000,,
4,Argentina,ARG,AR,OBND_DEP_OVERNIGHT,Overnight visitors (tourists),2012,2012,NBR,Number,0,Units,,,7247040.466,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2898,United States,USA,US,GBR,United Kingdom,2014,2014,NBR,Number,0,Units,,,2831760.000,,
2899,United States,USA,US,GBR,United Kingdom,2015,2015,NBR,Number,0,Units,,,2885432.000,,
2900,United States,USA,US,GBR,United Kingdom,2016,2016,NBR,Number,0,Units,,,3197467.000,,
2901,United States,USA,US,GBR,United Kingdom,2017,2017,NBR,Number,0,Units,,,3756046.000,,


In [50]:
# Exporting the dataframe to 
outbound_tourism1.to_csv('cleaned_data/outboundtourismOECD.csv')