In [1]:
# import dependencies
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
# import files and upload them as dataframe
world_happiness_2016 = pd.read_csv("Input Data/world_happiness_2016.csv")
world_happiness_2015 = pd.read_csv("Input Data/world_happiness_2015.csv")
suicide_rates = pd.read_csv("Input Data/suicide_rates_overview1985-2016.csv")

In [3]:
suicide_rates.head()

Unnamed: 0,country,year,sex,age,suicides_no,population,suicides/100k pop,country-year,HDI for year,gdp_for_year ($),gdp_per_capita ($),generation
0,Albania,1987,male,15-24 years,21,312900,6.71,Albania1987,,2156624900,796,Generation X
1,Albania,1987,male,35-54 years,16,308000,5.19,Albania1987,,2156624900,796,Silent
2,Albania,1987,female,15-24 years,14,289700,4.83,Albania1987,,2156624900,796,Generation X
3,Albania,1987,male,75+ years,1,21800,4.59,Albania1987,,2156624900,796,G.I. Generation
4,Albania,1987,male,25-34 years,9,274300,3.28,Albania1987,,2156624900,796,Boomers


In [4]:
# see if there are any null values and data types
suicide_rates.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27820 entries, 0 to 27819
Data columns (total 12 columns):
country               27820 non-null object
year                  27820 non-null int64
sex                   27820 non-null object
age                   27820 non-null object
suicides_no           27820 non-null int64
population            27820 non-null int64
suicides/100k pop     27820 non-null float64
country-year          27820 non-null object
HDI for year          8364 non-null float64
 gdp_for_year ($)     27820 non-null object
gdp_per_capita ($)    27820 non-null int64
generation            27820 non-null object
dtypes: float64(2), int64(4), object(6)
memory usage: 2.5+ MB


In [5]:
# checking info on numerical data types
suicide_rates.describe()

Unnamed: 0,year,suicides_no,population,suicides/100k pop,HDI for year,gdp_per_capita ($)
count,27820.0,27820.0,27820.0,27820.0,8364.0,27820.0
mean,2001.258375,242.574407,1844794.0,12.816097,0.776601,16866.464414
std,8.469055,902.047917,3911779.0,18.961511,0.093367,18887.576472
min,1985.0,0.0,278.0,0.0,0.483,251.0
25%,1995.0,3.0,97498.5,0.92,0.713,3447.0
50%,2002.0,25.0,430150.0,5.99,0.779,9372.0
75%,2008.0,131.0,1486143.0,16.62,0.855,24874.0
max,2016.0,22338.0,43805210.0,224.97,0.944,126352.0


In [6]:
# filter out suicide rates to only include years that overlap with world happines report
suicide_rates_2015 = suicide_rates.loc[suicide_rates["year"] == 2015]
suicide_rates_2016 = suicide_rates.loc[suicide_rates["year"] == 2016]

In [7]:
world_happiness_2015

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,Switzerland,Western Europe,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
1,Iceland,Western Europe,2,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,0.4363,2.70201
2,Denmark,Western Europe,3,7.527,0.03328,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
3,Norway,Western Europe,4,7.522,0.0388,1.459,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
4,Canada,North America,5,7.427,0.03553,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176
5,Finland,Western Europe,6,7.406,0.0314,1.29025,1.31826,0.88911,0.64169,0.41372,0.23351,2.61955
6,Netherlands,Western Europe,7,7.378,0.02799,1.32944,1.28017,0.89284,0.61576,0.31814,0.4761,2.4657
7,Sweden,Western Europe,8,7.364,0.03157,1.33171,1.28907,0.91087,0.6598,0.43844,0.36262,2.37119
8,New Zealand,Australia and New Zealand,9,7.286,0.03371,1.25018,1.31967,0.90837,0.63938,0.42922,0.47501,2.26425
9,Australia,Australia and New Zealand,10,7.284,0.04083,1.33358,1.30923,0.93156,0.65124,0.35637,0.43562,2.26646


In [8]:
world_happiness_2015.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 158 entries, 0 to 157
Data columns (total 12 columns):
Country                          158 non-null object
Region                           158 non-null object
Happiness Rank                   158 non-null int64
Happiness Score                  158 non-null float64
Standard Error                   158 non-null float64
Economy (GDP per Capita)         158 non-null float64
Family                           158 non-null float64
Health (Life Expectancy)         158 non-null float64
Freedom                          158 non-null float64
Trust (Government Corruption)    158 non-null float64
Generosity                       158 non-null float64
Dystopia Residual                158 non-null float64
dtypes: float64(9), int64(1), object(2)
memory usage: 14.9+ KB


In [9]:
# add year column to dataframe, so we can merge the two data sets 
world_happiness_2015["year"] = 2015
world_happiness_2016["year"] = 2016

In [10]:
suicide_rates_2015.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 744 entries, 576 to 27555
Data columns (total 12 columns):
country               744 non-null object
year                  744 non-null int64
sex                   744 non-null object
age                   744 non-null object
suicides_no           744 non-null int64
population            744 non-null int64
suicides/100k pop     744 non-null float64
country-year          744 non-null object
HDI for year          0 non-null float64
 gdp_for_year ($)     744 non-null object
gdp_per_capita ($)    744 non-null int64
generation            744 non-null object
dtypes: float64(2), int64(4), object(6)
memory usage: 75.6+ KB


In [11]:
suicide_rates_2015.rename(columns={'country':'Country'}, inplace=True)
suicide_rates_2016.rename(columns={'country':'Country'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [12]:
len(world_happiness_2015['Country'].unique())

158

In [13]:
data_2015 = pd.merge(world_happiness_2015, suicide_rates_2015, on="Country",how="right")
data_2016 = pd.merge(world_happiness_2016, suicide_rates_2016, on="Country",how="right")

In [14]:
data_2015.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 744 entries, 0 to 743
Data columns (total 24 columns):
Country                          744 non-null object
Region                           636 non-null object
Happiness Rank                   636 non-null float64
Happiness Score                  636 non-null float64
Standard Error                   636 non-null float64
Economy (GDP per Capita)         636 non-null float64
Family                           636 non-null float64
Health (Life Expectancy)         636 non-null float64
Freedom                          636 non-null float64
Trust (Government Corruption)    636 non-null float64
Generosity                       636 non-null float64
Dystopia Residual                636 non-null float64
year_x                           636 non-null float64
year_y                           744 non-null int64
sex                              744 non-null object
age                              744 non-null object
suicides_no                      744 

In [15]:
data_2016.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 160 entries, 0 to 159
Data columns (total 25 columns):
Country                          160 non-null object
Region                           150 non-null object
Happiness Rank                   150 non-null float64
Happiness Score                  150 non-null float64
Lower Confidence Interval        150 non-null float64
Upper Confidence Interval        150 non-null float64
Economy (GDP per Capita)         150 non-null float64
Family                           150 non-null float64
Health (Life Expectancy)         150 non-null float64
Freedom                          150 non-null float64
Trust (Government Corruption)    150 non-null float64
Generosity                       150 non-null float64
Dystopia Residual                150 non-null float64
year_x                           150 non-null float64
year_y                           160 non-null int64
sex                              160 non-null object
age                              160

In [16]:
# drop hdi column
data_2015 = data_2015.drop(columns=["HDI for year"])
data_2015 = data_2015.drop(columns=["year_x"])
data_2016 = data_2016.drop(columns=["HDI for year"])
data_2016 = data_2016.drop(columns=["year_x"])

In [17]:
# drop null value
data_2015.dropna(inplace=True)
data_2016.dropna(inplace=True)

In [18]:
data_2016

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Lower Confidence Interval,Upper Confidence Interval,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual,year_y,sex,age,suicides_no,population,suicides/100k pop,country-year,gdp_for_year ($),gdp_per_capita ($),generation
0,Iceland,Western Europe,3.0,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137,2016,male,25-34 years,8,25029,31.96,Iceland2016,20304098101,64708,Millenials
1,Iceland,Western Europe,3.0,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137,2016,male,55-74 years,10,33247,30.08,Iceland2016,20304098101,64708,Boomers
2,Iceland,Western Europe,3.0,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137,2016,male,35-54 years,11,44024,24.99,Iceland2016,20304098101,64708,Generation X
3,Iceland,Western Europe,3.0,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137,2016,male,15-24 years,6,24099,24.9,Iceland2016,20304098101,64708,Millenials
4,Iceland,Western Europe,3.0,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137,2016,male,75+ years,1,8723,11.46,Iceland2016,20304098101,64708,Silent
5,Iceland,Western Europe,3.0,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137,2016,female,25-34 years,2,23233,8.61,Iceland2016,20304098101,64708,Millenials
6,Iceland,Western Europe,3.0,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137,2016,female,55-74 years,2,32963,6.07,Iceland2016,20304098101,64708,Boomers
7,Iceland,Western Europe,3.0,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137,2016,female,15-24 years,0,22971,0.0,Iceland2016,20304098101,64708,Millenials
8,Iceland,Western Europe,3.0,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137,2016,female,35-54 years,0,43216,0.0,Iceland2016,20304098101,64708,Generation X
9,Iceland,Western Europe,3.0,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137,2016,female,75+ years,0,11268,0.0,Iceland2016,20304098101,64708,Silent


In [23]:
df = pd.concat([data_2015, data_2016],sort=False)

In [26]:
df

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual,year_y,sex,age,suicides_no,population,suicides/100k pop,country-year,gdp_for_year ($),gdp_per_capita ($),generation,Lower Confidence Interval,Upper Confidence Interval
0,Switzerland,Western Europe,1.0,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,2015,male,75+ years,143,275569,51.89,Switzerland2015,679289166858,86068,Silent,,
1,Switzerland,Western Europe,1.0,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,2015,male,55-74 years,264,891482,29.61,Switzerland2015,679289166858,86068,Boomers,,
2,Switzerland,Western Europe,1.0,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,2015,male,35-54 years,242,1248988,19.38,Switzerland2015,679289166858,86068,Generation X,,
3,Switzerland,Western Europe,1.0,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,2015,male,25-34 years,80,586880,13.63,Switzerland2015,679289166858,86068,Millenials,,
4,Switzerland,Western Europe,1.0,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,2015,male,15-24 years,62,482708,12.84,Switzerland2015,679289166858,86068,Millenials,,
5,Switzerland,Western Europe,1.0,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,2015,female,75+ years,41,414205,9.9,Switzerland2015,679289166858,86068,Silent,,
6,Switzerland,Western Europe,1.0,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,2015,female,35-54 years,111,1227126,9.05,Switzerland2015,679289166858,86068,Generation X,,
7,Switzerland,Western Europe,1.0,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,2015,female,55-74 years,82,922222,8.89,Switzerland2015,679289166858,86068,Boomers,,
8,Switzerland,Western Europe,1.0,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,2015,female,15-24 years,20,462559,4.32,Switzerland2015,679289166858,86068,Millenials,,
9,Switzerland,Western Europe,1.0,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,2015,female,25-34 years,24,577973,4.15,Switzerland2015,679289166858,86068,Millenials,,
