In [21]:
# Dependencies
import pandas as pd

In [22]:
# Store file path in variable
unemployment_file = "Resources/Unemployment_raw.csv"

In [23]:
# Read Data files with the pandas library
unemployment_df = pd.read_csv(unemployment_file, encoding="UTF-8")

In [24]:
# Print out Column Titles
unemployment_df.columns

Index(['WEO Country Code', 'ISO', 'WEO Subject Code', 'Country',
       'Subject Descriptor', 'Subject Notes', 'Units', 'Scale',
       'Country/Series-specific Notes', '1980', '1981', '1982', '1983', '1984',
       '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993',
       '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002',
       '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011',
       '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020',
       '2021', '2022', '2023', '2024', '2025', 'Estimates Start After'],
      dtype='object')

In [25]:
# Remove columns not being used for analysis
unemployment_df.drop(columns = ['WEO Country Code', 'ISO', 'WEO Subject Code',
                                'Scale', '1980', '1981', '1982', '1983', '1984',
                                '1985', '1986', '1987', '1988', '1989', '1990', 
                                '1991', '1992', '1993', '1994', '1995', '1996', 
                                '1997', '1998', '1999', '2000', '2001', '2002',
                                '2003', '2004', '2005', '2006', '2007', '2008',
                                '2009', '2010', '2011', '2012', '2013', '2014', 
                                '2020', '2021', '2022', '2023', '2024', '2025',
                                'Estimates Start After'], inplace = True)
unemployment_df.head()

Unnamed: 0,Country,Subject Descriptor,Subject Notes,Units,Country/Series-specific Notes,2015,2016,2017,2018,2019
0,Afghanistan,"Gross domestic product, constant prices",Expressed in billions of national currency uni...,National currency,Source: National Statistics Office Latest actu...,1197.01,1222.92,1255.29,1270.22,1319.9
1,Afghanistan,"Gross domestic product, constant prices",Annual percentages of constant price GDP are y...,Percent change,"See notes for: Gross domestic product, consta...",0.988,2.164,2.647,1.189,3.912
2,Afghanistan,"Gross domestic product, current prices",Expressed in billions of national currency uni...,National currency,Source: National Statistics Office Latest actu...,1226.57,1222.92,1285.46,1327.69,1469.6
3,Afghanistan,"Gross domestic product, current prices",Values are based upon GDP in national currency...,U.S. dollars,"See notes for: Gross domestic product, curren...",20.22,17.994,18.91,18.401,18.876
4,Afghanistan,"Gross domestic product, current prices",These data form the basis for the country weig...,Purchasing power parity; international dollars,"See notes for: Gross domestic product, curren...",72.056,70.098,74.712,77.416,81.88


In [26]:
# Print out the categories included in dataset
unemployment_df['Subject Descriptor'].value_counts()

Gross domestic product per capita, current prices                                     585
Gross domestic product, current prices                                                585
General government total expenditure                                                  390
Current account balance                                                               390
General government revenue                                                            390
General government net debt                                                           390
General government structural balance                                                 390
General government gross debt                                                         390
Inflation, average consumer prices                                                    390
Gross domestic product, constant prices                                               390
General government primary net lending/borrowing                                      390
Gross dome

In [27]:
# Only keep rows for Unemployment Rate
unemployment_df = unemployment_df[unemployment_df['Subject Descriptor']== "Unemployment rate"]

In [28]:
# Replace 'NaN' with blanks for the Country/Series-specific Notes Column
unemployment_df['Country/Series-specific Notes'] = unemployment_df['Country/Series-specific Notes'].fillna('')

In [29]:
# Drop all rows with 'NaN'
unemployment_df = unemployment_df.dropna()
unemployment_df.reset_index(inplace = True, drop = True)

In [30]:
# Convert Unemployment Rate data types to float
unemployment_df[['2015', '2016', '2017', '2018', '2019']] = unemployment_df[['2015', '2016', '2017', '2018', 
                                                                             '2019']].astype(float)

In [31]:
# Create new column and calculate average unemployment rate
unemployment_df['Avg. Unemployment'] = (unemployment_df['2015'] + 
                                        unemployment_df['2016'] +
                                        unemployment_df['2017'] +
                                        unemployment_df['2018'] +
                                        unemployment_df['2019']) / (5)
# Save df to csv file
unemployment_df.to_csv("Resources/unemployment_complete_df.csv", index = False, header = True)
unemployment_df.head()

Unnamed: 0,Country,Subject Descriptor,Subject Notes,Units,Country/Series-specific Notes,2015,2016,2017,2018,2019,Avg. Unemployment
0,Albania,Unemployment rate,Unemployment rate can be defined by either the...,Percent of total labor force,Source: National Statistics Office Latest actu...,17.1,15.2,13.7,12.3,11.5,13.96
1,Algeria,Unemployment rate,Unemployment rate can be defined by either the...,Percent of total labor force,Source: National Statistics Office Latest actu...,11.214,10.498,11.709,11.731,11.383,11.307
2,Argentina,Unemployment rate,Unemployment rate can be defined by either the...,Percent of total labor force,Source: National Statistics Office Latest actu...,6.533,8.467,8.35,9.2,9.825,8.475
3,Armenia,Unemployment rate,Unemployment rate can be defined by either the...,Percent of total labor force,Source: ILO Latest actual data: 2015 Employmen...,18.5,18.0,20.9,20.5,18.9,19.36
4,Aruba,Unemployment rate,Unemployment rate can be defined by either the...,Percent of total labor force,Source: National Statistics Office Latest actu...,7.298,7.694,8.923,7.283,7.544,7.7484


In [32]:
# Create a new DataFrame with select columns
unemployment_filtered_df = unemployment_df[['Country', '2015',\
                                            '2016', '2017', '2018',\
                                            '2019', 'Avg. Unemployment']]
unemployment_filtered_df.rename(columns = {'2015': "Unemp. Rate_2015",\
                                          '2016': "Unemp. Rate_2016",\
                                          '2017': "Unemp. Rate_2017",\
                                          '2018': "Unemp. Rate_2018",\
                                          '2019': "Unemp. Rate_2019"},\
                                inplace = True)
unemployment_filtered_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,Country,Unemp. Rate_2015,Unemp. Rate_2016,Unemp. Rate_2017,Unemp. Rate_2018,Unemp. Rate_2019,Avg. Unemployment
0,Albania,17.100,15.200,13.700,12.300,11.500,13.9600
1,Algeria,11.214,10.498,11.709,11.731,11.383,11.3070
2,Argentina,6.533,8.467,8.350,9.200,9.825,8.4750
3,Armenia,18.500,18.000,20.900,20.500,18.900,19.3600
4,Aruba,7.298,7.694,8.923,7.283,7.544,7.7484
...,...,...,...,...,...,...,...
101,United Kingdom,5.375,4.875,4.425,4.075,3.825,4.5150
102,United States,5.275,4.875,4.342,3.892,3.667,4.4102
103,Uruguay,7.533,7.867,7.925,8.367,8.925,8.1234
104,Vietnam,2.330,2.330,2.210,2.210,2.210,2.2580


In [37]:
# Pull in average happiness data
happiness_file = "../Happiness/Resources/avg_happiness.csv"
happiness_df = pd.read_csv(happiness_file, encoding = "UTF-8")
happiness_df.head()

Unnamed: 0,Country,Happiness Score_2015,Happiness Score_2016,Happiness Score_2017,Happiness Score_2018,Happiness Score_2019,Avg Happiness
0,Switzerland,7.587,7.509,7.494,7.487,7.48,7.5114
1,Iceland,7.561,7.501,7.504,7.495,7.494,7.511
2,Denmark,7.527,7.526,7.522,7.555,7.6,7.546
3,Norway,7.522,7.498,7.537,7.594,7.554,7.541
4,Canada,7.427,7.404,7.316,7.328,7.278,7.3506


In [34]:
# Merge Happiness and Unemployment Data
happiness_unemployment_df = happiness_df.merge(unemployment_filtered_df,\
                                               how = "left", on = "Country")
happiness_unemployment_df.head()

Unnamed: 0,Country,Happiness Score_2015,Happiness Score_2016,Happiness Score_2017,Happiness Score_2018,Happiness Score_2019,Avg Happiness,Unemp. Rate_2015,Unemp. Rate_2016,Unemp. Rate_2017,Unemp. Rate_2018,Unemp. Rate_2019,Avg. Unemployment
0,Switzerland,7.587,7.509,7.494,7.487,7.48,7.5114,3.178,3.323,3.088,2.547,2.306,2.8884
1,Iceland,7.561,7.501,7.504,7.495,7.494,7.511,3.992,3.008,2.825,2.742,3.55,3.2234
2,Denmark,7.527,7.526,7.522,7.555,7.6,7.546,6.283,6.0,5.808,5.117,5.042,5.65
3,Norway,7.522,7.498,7.537,7.594,7.554,7.541,4.531,4.74,4.216,3.854,3.728,4.2138
4,Canada,7.427,7.404,7.316,7.328,7.278,7.3506,6.9,6.992,6.342,5.833,5.667,6.3468


In [35]:
# Drop rows with 'NaN'
happiness_unemployment_df = happiness_unemployment_df.dropna()

# Save df to csv file
happiness_unemployment_df.to_csv("Resources/happiness_unemployment_yearly.csv",
                                index = False, header = True)
happiness_unemployment_df.head()

Unnamed: 0,Country,Happiness Score_2015,Happiness Score_2016,Happiness Score_2017,Happiness Score_2018,Happiness Score_2019,Avg Happiness,Unemp. Rate_2015,Unemp. Rate_2016,Unemp. Rate_2017,Unemp. Rate_2018,Unemp. Rate_2019,Avg. Unemployment
0,Switzerland,7.587,7.509,7.494,7.487,7.48,7.5114,3.178,3.323,3.088,2.547,2.306,2.8884
1,Iceland,7.561,7.501,7.504,7.495,7.494,7.511,3.992,3.008,2.825,2.742,3.55,3.2234
2,Denmark,7.527,7.526,7.522,7.555,7.6,7.546,6.283,6.0,5.808,5.117,5.042,5.65
3,Norway,7.522,7.498,7.537,7.594,7.554,7.541,4.531,4.74,4.216,3.854,3.728,4.2138
4,Canada,7.427,7.404,7.316,7.328,7.278,7.3506,6.9,6.992,6.342,5.833,5.667,6.3468


In [36]:
# Create new DataFrame with specified columns
happ_unemp_summary_df = happiness_unemployment_df[['Country', 
                                                   'Avg Happiness', 
                                                   'Avg. Unemployment']]

# Save df to csv file
unemployment_df.to_csv("Resources/happiness_unemployment_summary.csv",
                      index = False, header = True)
unemployment_df.head()

Unnamed: 0,Country,Subject Descriptor,Subject Notes,Units,Country/Series-specific Notes,2015,2016,2017,2018,2019,Avg. Unemployment
0,Albania,Unemployment rate,Unemployment rate can be defined by either the...,Percent of total labor force,Source: National Statistics Office Latest actu...,17.1,15.2,13.7,12.3,11.5,13.96
1,Algeria,Unemployment rate,Unemployment rate can be defined by either the...,Percent of total labor force,Source: National Statistics Office Latest actu...,11.214,10.498,11.709,11.731,11.383,11.307
2,Argentina,Unemployment rate,Unemployment rate can be defined by either the...,Percent of total labor force,Source: National Statistics Office Latest actu...,6.533,8.467,8.35,9.2,9.825,8.475
3,Armenia,Unemployment rate,Unemployment rate can be defined by either the...,Percent of total labor force,Source: ILO Latest actual data: 2015 Employmen...,18.5,18.0,20.9,20.5,18.9,19.36
4,Aruba,Unemployment rate,Unemployment rate can be defined by either the...,Percent of total labor force,Source: National Statistics Office Latest actu...,7.298,7.694,8.923,7.283,7.544,7.7484
