In [1]:
# Dependencies
import pandas as pd
import matplotlib.pyplot as plt

# Set file paths
adult_pop_path = "Resources/population_adult/adult_pop_api.csv"
total_pop_path = "Resources/population_total/total_pop_api.csv"
unemployment_female_path = "Resources/unemployment_female/unemployment_female_api.csv"
unemployment_male_path = "Resources/unemployment_male/unemployment_male_api.csv"
unemployment_total_path = "Resources/unemployment_total/unemployment_total_api.csv"

In [2]:
# Import adult (15-64) population csv, skip blank rows, and select the data we want
adult_pop_df = pd.read_csv(adult_pop_path, skiprows=4)
adult_pop_df = adult_pop_df[['Country Name', '2015', '2016', '2017', '2018', '2019']]
adult_pop_df.head()

Unnamed: 0,Country Name,2015,2016,2017,2018,2019
0,Aruba,69.159774,69.137615,68.946339,68.646606,68.321199
1,Afghanistan,52.644299,53.207964,53.763223,54.324898,54.911934
2,Angola,50.615167,50.685629,50.807787,50.974702,51.178633
3,Albania,68.704725,68.660092,68.642614,68.58239,68.397805
4,Andorra,,,,,


In [3]:
# Import total population csv, skip blank rows, and select the data we want
total_pop_df = pd.read_csv(total_pop_path, skiprows=4)
total_pop_df = total_pop_df[['Country Name', '2015', '2016', '2017', '2018', '2019']]
total_pop_df.head()

Unnamed: 0,Country Name,2015,2016,2017,2018,2019
0,Aruba,104341.0,104872.0,105366.0,105845.0,106314.0
1,Afghanistan,34413603.0,35383128.0,36296400.0,37172386.0,38041754.0
2,Angola,27884381.0,28842484.0,29816748.0,30809762.0,31825295.0
3,Albania,2880703.0,2876101.0,2873457.0,2866376.0,2854191.0
4,Andorra,78011.0,77297.0,77001.0,77006.0,77142.0


In [4]:
# Import female unemployment csv, skip blank rows, and select the data we want
unemployment_female_df = pd.read_csv(unemployment_female_path, skiprows=4)
unemployment_female_df = unemployment_female_df[['Country Name', '2015', '2016', '2017', '2018', '2019']]
unemployment_female_df.head()

Unnamed: 0,Country Name,2015,2016,2017,2018,2019
0,Aruba,,,,,
1,Afghanistan,14.427,14.314,14.09,13.906,14.004
2,Angola,7.681,7.563,7.467,7.327,6.942
3,Albania,17.098,14.573,12.563,11.229,11.604
4,Andorra,,,,,


In [5]:
# Import male unemployment csv, skip blank rows, and select the data we want
unemployment_male_df = pd.read_csv(unemployment_male_path, skiprows=4)
unemployment_male_df = unemployment_male_df[['Country Name', '2015', '2016', '2017', '2018', '2019']]
unemployment_male_df.head()

Unnamed: 0,Country Name,2015,2016,2017,2018,2019
0,Aruba,,,,,
1,Afghanistan,10.684,10.571,10.416,10.287,10.331
2,Angola,6.883,6.882,6.769,6.708,6.83
3,Albania,17.068001,15.678,14.59,13.127,12.846
4,Andorra,,,,,


In [6]:
# Import total unemployment csv, skip blank rows, and select the data we want
unemployment_total_df = pd.read_csv(unemployment_total_path, skiprows=4)
unemployment_total_df = unemployment_total_df[['Country Name', '2015', '2016', '2017', '2018', '2019']]
unemployment_total_df.head()

Unnamed: 0,Country Name,2015,2016,2017,2018,2019
0,Aruba,,,,,
1,Afghanistan,11.387,11.313,11.184,11.057,11.118
2,Angola,7.282,7.223,7.119,7.019,6.886
3,Albania,17.08,15.22,13.75,12.34,12.331
4,Andorra,,,,,


In [7]:
# Merge the two population dataframes (adult and total) together
population_df = total_pop_df.merge(adult_pop_df, how='left', on='Country Name')
population_df.head()

Unnamed: 0,Country Name,2015_x,2016_x,2017_x,2018_x,2019_x,2015_y,2016_y,2017_y,2018_y,2019_y
0,Aruba,104341.0,104872.0,105366.0,105845.0,106314.0,69.159774,69.137615,68.946339,68.646606,68.321199
1,Afghanistan,34413603.0,35383128.0,36296400.0,37172386.0,38041754.0,52.644299,53.207964,53.763223,54.324898,54.911934
2,Angola,27884381.0,28842484.0,29816748.0,30809762.0,31825295.0,50.615167,50.685629,50.807787,50.974702,51.178633
3,Albania,2880703.0,2876101.0,2873457.0,2866376.0,2854191.0,68.704725,68.660092,68.642614,68.58239,68.397805
4,Andorra,78011.0,77297.0,77001.0,77006.0,77142.0,,,,,


In [11]:
# Rename the columns
population_df = population_df.rename(columns={'2015_x':'2015 Total Pop', '2016_x':'2016 Total Pop', '2017_x':'2017 Total Pop',
                                              '2018_x':'2018 Total Pop', '2019_x':'2019 Total Pop',
                                              '2015_y':'2015 Adults (%)', '2016_y':'2016 Adults (%)', '2017_y':'2017 Adults (%)',
                                              '2018_y':'2018 Adults (%)', '2019_y':'2019 Adults (%)'})
population_df.head()

Unnamed: 0,Country Name,2015 Total Pop,2016 Total Pop,2017 Total Pop,2018 Total Pop,2019 Total Pop,2015 Adults (%),2016 Adults (%),2017 Adults (%),2018 Adults (%),2019 Adults (%)
0,Aruba,104341.0,104872.0,105366.0,105845.0,106314.0,69.159774,69.137615,68.946339,68.646606,68.321199
1,Afghanistan,34413603.0,35383128.0,36296400.0,37172386.0,38041754.0,52.644299,53.207964,53.763223,54.324898,54.911934
2,Angola,27884381.0,28842484.0,29816748.0,30809762.0,31825295.0,50.615167,50.685629,50.807787,50.974702,51.178633
3,Albania,2880703.0,2876101.0,2873457.0,2866376.0,2854191.0,68.704725,68.660092,68.642614,68.58239,68.397805
4,Andorra,78011.0,77297.0,77001.0,77006.0,77142.0,,,,,


In [9]:
# Merge the total and female unemployment rate dataframes together
unemployment_df = unemployment_total_df.merge(unemployment_female_df, how='left', on='Country Name')
unemployment_df.head()

Unnamed: 0,Country Name,2015_x,2016_x,2017_x,2018_x,2019_x,2015_y,2016_y,2017_y,2018_y,2019_y
0,Aruba,,,,,,,,,,
1,Afghanistan,11.387,11.313,11.184,11.057,11.118,14.427,14.314,14.09,13.906,14.004
2,Angola,7.282,7.223,7.119,7.019,6.886,7.681,7.563,7.467,7.327,6.942
3,Albania,17.08,15.22,13.75,12.34,12.331,17.098,14.573,12.563,11.229,11.604
4,Andorra,,,,,,,,,,


In [12]:
# Rename the columns 
unemployment_df = unemployment_df.rename(columns={'2015_x':'2015 Total Unemploment', '2016_x':'2016 Total Unemploment',
                                                  '2017_x':'2017 Total Unemploment', '2018_x':'2018 Total Unemploment', 
                                                  '2019_x':'2019 Total Unemploment',
                                                  '2015_y':'2015 Female Unemploment', '2016_y':'2016 Female Unemploment', 
                                                  '2017_y':'2017 Female Unemploment', '2018_y':'2018 Female Unemploment', 
                                                  '2019_y':'2019 Female Unemploment'})
unemployment_df.head()

Unnamed: 0,Country Name,2015 Total Unemploment,2016 Total Unemploment,2017 Total Unemploment,2018 Total Unemploment,2019 Total Unemploment,2015 Female Unemploment,2016 Female Unemploment,2017 Female Unemploment,2018 Female Unemploment,2019 Female Unemploment
0,Aruba,,,,,,,,,,
1,Afghanistan,11.387,11.313,11.184,11.057,11.118,14.427,14.314,14.09,13.906,14.004
2,Angola,7.282,7.223,7.119,7.019,6.886,7.681,7.563,7.467,7.327,6.942
3,Albania,17.08,15.22,13.75,12.34,12.331,17.098,14.573,12.563,11.229,11.604
4,Andorra,,,,,,,,,,


In [13]:
# Merge the unemployment and male unemployment dataframes together
unemployment_df = unemployment_df.merge(unemployment_male_df, how='left', on='Country Name')
unemployment_df.head()

Unnamed: 0,Country Name,2015 Total Unemploment,2016 Total Unemploment,2017 Total Unemploment,2018 Total Unemploment,2019 Total Unemploment,2015 Female Unemploment,2016 Female Unemploment,2017 Female Unemploment,2018 Female Unemploment,2019 Female Unemploment,2015,2016,2017,2018,2019
0,Aruba,,,,,,,,,,,,,,,
1,Afghanistan,11.387,11.313,11.184,11.057,11.118,14.427,14.314,14.09,13.906,14.004,10.684,10.571,10.416,10.287,10.331
2,Angola,7.282,7.223,7.119,7.019,6.886,7.681,7.563,7.467,7.327,6.942,6.883,6.882,6.769,6.708,6.83
3,Albania,17.08,15.22,13.75,12.34,12.331,17.098,14.573,12.563,11.229,11.604,17.068001,15.678,14.59,13.127,12.846
4,Andorra,,,,,,,,,,,,,,,


In [14]:
# Rename the new columns
unemployment_df = unemployment_df.rename(columns={'2015': '2015 Male Unemployment', '2016': '2016 Male Unemployment', 
                                                  '2017': '2017 Male Unemployment', '2018': '2018 Male Unemployment', 
                                                  '2019': '2019 Male Unemployment'})
unemployment_df.head()

Unnamed: 0,Country Name,2015 Total Unemploment,2016 Total Unemploment,2017 Total Unemploment,2018 Total Unemploment,2019 Total Unemploment,2015 Female Unemploment,2016 Female Unemploment,2017 Female Unemploment,2018 Female Unemploment,2019 Female Unemploment,2015 Male Unemployment,2016 Male Unemployment,2017 Male Unemployment,2018 Male Unemployment,2019 Male Unemployment
0,Aruba,,,,,,,,,,,,,,,
1,Afghanistan,11.387,11.313,11.184,11.057,11.118,14.427,14.314,14.09,13.906,14.004,10.684,10.571,10.416,10.287,10.331
2,Angola,7.282,7.223,7.119,7.019,6.886,7.681,7.563,7.467,7.327,6.942,6.883,6.882,6.769,6.708,6.83
3,Albania,17.08,15.22,13.75,12.34,12.331,17.098,14.573,12.563,11.229,11.604,17.068001,15.678,14.59,13.127,12.846
4,Andorra,,,,,,,,,,,,,,,


In [15]:
# Merge the population and unemployment dataframes together
pop_unemployment_df = population_df.merge(unemployment_df, how='left', on='Country Name')
pop_unemployment_df.head()

Unnamed: 0,Country Name,2015 Total Pop,2016 Total Pop,2017 Total Pop,2018 Total Pop,2019 Total Pop,2015 Adults (%),2016 Adults (%),2017 Adults (%),2018 Adults (%),...,2015 Female Unemploment,2016 Female Unemploment,2017 Female Unemploment,2018 Female Unemploment,2019 Female Unemploment,2015 Male Unemployment,2016 Male Unemployment,2017 Male Unemployment,2018 Male Unemployment,2019 Male Unemployment
0,Aruba,104341.0,104872.0,105366.0,105845.0,106314.0,69.159774,69.137615,68.946339,68.646606,...,,,,,,,,,,
1,Afghanistan,34413603.0,35383128.0,36296400.0,37172386.0,38041754.0,52.644299,53.207964,53.763223,54.324898,...,14.427,14.314,14.09,13.906,14.004,10.684,10.571,10.416,10.287,10.331
2,Angola,27884381.0,28842484.0,29816748.0,30809762.0,31825295.0,50.615167,50.685629,50.807787,50.974702,...,7.681,7.563,7.467,7.327,6.942,6.883,6.882,6.769,6.708,6.83
3,Albania,2880703.0,2876101.0,2873457.0,2866376.0,2854191.0,68.704725,68.660092,68.642614,68.58239,...,17.098,14.573,12.563,11.229,11.604,17.068001,15.678,14.59,13.127,12.846
4,Andorra,78011.0,77297.0,77001.0,77006.0,77142.0,,,,,...,,,,,,,,,,


In [22]:
# Print the dataframe to an Excel sheet
pop_unemployment_df.to_csv('pop_unemployment.csv')