In [1]:
import plotly.plotly as py
import plotly.graph_objs as go
import numpy as np
import pandas as pd
from plotly.offline import init_notebook_mode, iplot, plot
from plotly import tools

# Lets explore suicide rate of countries

Institute for Health Metrics and Evaluation (IHME) (http://ghdx.healthdata.org/gbd-results-tool) provide a data reposiotry which contains suicide rates (deaths for 100,000 lives) for countries from 1990-2016. In the following visualization, we are interested in exploring relationship between suicide rate and several variables. Variables of interest are,
* Per capita income in Dollors (extracted from world bank data - http://databank.worldbank.org/data/reports.aspx?source=world-development-indicators&Type=TABLE&preview=on)
* Percentage of enrolled in secondary education (extracted from world bank data - http://databank.worldbank.org/data/reports.aspx?source=world-development-indicators&Type=TABLE&preview=on). This metric is calculated by taking percentage of ratio of total enrollment in secondary studies, regardless of age and population of the age group that officially corresponds to the secondary level. Therefore in some instances this value could exceed 100%
* Yearly average temperature (extracted from http://sdwebx.worldbank.org/climateportal/index.cfm?page=country_historical_climat)

In [2]:
init_notebook_mode(connected=True)

In [3]:
#!jupyter notebook --NotebookApp.iopub_data_rate_limit=1.0e10

In [4]:
suicide = pd.read_csv('suicide.csv')

In [5]:
suicide.head()

Unnamed: 0,country,year,deaths,mf_ratio,country_code,population,per_capita,temperature,enrollment
0,Paraguay,1990,3.090264,2.053975,PRY,4213742.0,1351.578137,,31.00964
1,Paraguay,1991,3.236037,2.051104,PRY,4323410.0,1615.476618,23.64615,31.109859
2,Paraguay,1992,3.441868,2.045573,PRY,4432736.0,1614.674104,22.790292,34.308071
3,Paraguay,1994,3.480025,2.180785,PRY,4651225.0,1692.238533,24.120992,38.555759
4,Paraguay,1995,3.568877,2.199288,PRY,4760850.0,1903.469193,23.614717,41.35878


In [6]:
# remove the skewness in plots due to GreenLand
suicide = suicide[suicide.country != 'Greenland']

# Gap minder plots

In [7]:
# Create a trace
years_per_capita = sorted(suicide.year.unique())
years_temp = years_per_capita[1:-1] # 1990 and 2016 not available

colorscale=[[0, 'rgb(33,102,172)'], [0.05, 'rgb(69,117,180)'], [0.1, 'rgb(255,255,191)'], 
            [1, 'rgb(215,48,39)']]

data_per_capita = [dict(go.Scatter(
    x = suicide[suicide.year == y].per_capita,
    y = suicide[suicide.year == y].deaths,
    name = '',
    mode = 'markers',
    text = suicide[suicide.year == y].country+'<br>Male to Female ratio: '+\
            suicide[suicide.year == y].mf_ratio.astype(str)+'<br>Population: '+\
            suicide[suicide.year == y].population.astype(str),
    marker=dict(
        size = suicide[suicide.year == y].population/1000000,
        color = suicide[suicide.year == y].mf_ratio,
        colorscale=colorscale,
        cmax = 10,
        cmin = 0,
        colorbar = dict(title = 'Male Female<Br>Ratio', thickness = 8, len=1),
        showscale=True,
        sizemode = 'area',
    ),
    
    visible = False
)) for y in years_per_capita]
data_per_capita[0]['visible'] = True # defualt value


data_temperatue = [dict(go.Scatter(
    x = suicide[suicide.year == y].temperature,
    y = suicide[suicide.year == y].deaths,
    name = '',
    mode = 'markers',
    text = suicide[suicide.year == y].country+'<br>Male to Female ratio: '+\
        suicide[suicide.year == y].mf_ratio.astype(str)+'<br>Population: '+\
        suicide[suicide.year == y].population.astype(str),
    marker=dict(
        size=suicide[suicide.year == y].population/1000000,
        color = suicide[suicide.year == y].mf_ratio,
        colorscale=colorscale,
        cmax = 10,
        cmin = 0,
        colorbar = dict(title = 'Male Female<Br>Ratio', thickness = 8, len=1),
        showscale=True,
        sizemode = 'area',
    ),
    visible = False,
)) for y in years_temp]
data_temperatue[0]['visible'] = True # defualt value


data_enrollment = [dict(go.Scatter(
    x = suicide[suicide.year == y].enrollment,
    y = suicide[suicide.year == y].deaths,
    name = '',
    mode = 'markers',
    text = suicide[suicide.year == y].country+'<br>Male to Female ratio: '+\
            suicide[suicide.year == y].mf_ratio.astype(str)+'<br>Population: '+\
            suicide[suicide.year == y].population.astype(str),
    marker=dict(
        size = suicide[suicide.year == y].population/1000000,
        color = suicide[suicide.year == y].mf_ratio,
        colorscale=colorscale,
        cmax = 10,
        cmin = 0,
        colorbar = dict(title = 'Male Female<Br>Ratio', thickness = 8, len=1),
        showscale=True,
        sizemode = 'area',
    ),
    
    visible = False
)) for y in years_per_capita] # same years as per_capita
data_enrollment[0]['visible'] = True # defualt value


data = [data_per_capita, data_temperatue, data_enrollment]

In [8]:
# add the slider
steps_per_capita = []
for i in range(len(years_per_capita)):
    step = dict(
        method = 'restyle',
        args = ['visible', [False] * len(years_per_capita)],
        label = years_per_capita[i],
    )
    step['args'][1][i] = True # Toggle i'th trace to "visible"
    steps_per_capita.append(step)
    
steps_temp = []
for i in range(len(years_temp)):
    step = dict(
        method = 'restyle',
        args = ['visible', [False] * len(years_temp)],
        label = years_temp[i],
    )
    step['args'][1][i] = True # Toggle i'th trace to "visible"
    steps_temp.append(step)
    
sliders_per_capita = [dict(
    active = 0,
    currentvalue = {"prefix": "Year: "},
    pad = {"t": 50},
    steps = steps_per_capita,
)]

sliders_temp = [dict(
    active = 0,
    currentvalue = {"prefix": "Year: "},
    pad = {"t": 50},
    steps = steps_temp,
)]

# steps_per_capita and sliders_per_capita can be reused for enrollment

In [9]:
layout = go.Layout(height = 450, width = 500,
                   title='Suicide rate vs GDP per Capita',
                   xaxis = dict(title = 'GDP per capita $'),
                  yaxis = dict(title = 'Suicide rate (for 100,000 lives)'))
layout.update(dict(sliders=sliders_per_capita, xaxis =dict(range=[-10000, 120000]),
                  yaxis = dict(range=[0, 50])))
fig = go.Figure(data = data_per_capita, layout = layout)
iplot(fig)

We can see that over the years most of the countries were able to improve on GDP per capita. In general suicide rate of countries reduces has reduced when GDP per capita is increased. A good example of this is China. In 1990 China has a suicide rate of 21.9 and GDP per capita of \$317. In 2016, suicide rate is reduced to 9.6 and GDP per capita is \$8123. However, this is not the same for Japan. In 1990 Japan has a suicide rate of 20.0 and GDP per capita of \$25417. In 2016, suicide rate is increased to 23.1 despite a increased GDP per capita of \$38972. This makes us doubt whether money can buy happiness.

In [10]:
plot(fig, filename = 'suicide_percapita.html')#, auto_open=False)

'file:///Users/nis89mad/Dropbox/MSAN/Spring2/Data_viz/nishancm.github.io/suicide_percapita.html'

In [11]:
layout = go.Layout(height = 450, width = 500,
                   xaxis = dict(title = 'Average temprature'),
                   title='Suicide Rate vs Average Yearly Temperature',
                  yaxis = dict(title = 'Suicide rate (for 100,000 lives)'))
layout.update(dict(sliders=sliders_temp, xaxis =dict(range=[-10, 30]),
                  yaxis = dict(range=[0, 50])))
fig = go.Figure(data = data_temperatue, layout = layout)
iplot(fig)

When we move through the years we can notice that average temperature of countries drift slightly towards right, which possibly suggest the impact of global warming.In general we are seeing a negative correlation between temperature and suicide rate. May be this means people in colder countries are more likely to suicide. We will leave it there but there studies on that if your interested. It is also worth while to highlight how Male to Female suicide rate ratio (MF ratio) changes overtime. We can see that only handful countries have a MF ratio less than 1 in 1990, indicating More higher male suicide rates compared to female suicide rates. Over the years we can see that MF ratio trends to greater than 1 even for these countries

In [26]:
plot(fig, filename = 'suicide_temperature.html')#, auto_open=False)

'file:///Users/nis89mad/Dropbox/MSAN/Spring2/Data_viz/nishancm.github.io/suicide_temperature.html'

In [12]:
layout = go.Layout(height = 450, width = 500,
                   title='Suicide rate vs <br> Percentage Enrollment in Secondary Education',
                   xaxis = dict(title = 'Percentage enrolled in secondary education (%)'),
                  yaxis = dict(title = 'Suicide rate (for 100,000 lives)'))
layout.update(dict(sliders=sliders_per_capita, xaxis =dict(range=[0, 165]),
                  yaxis = dict(range=[0, 50])))
fig = go.Figure(data = data_enrollment, layout = layout)
iplot(fig)

In [13]:
plot(fig, filename = 'suicide_enroll.html')#, auto_open=False)

'file:///Users/nis89mad/Dropbox/MSAN/Spring2/Data_viz/nishancm.github.io/suicide_enroll.html'

When we move over the years we can see that percentage enrolled in secondary studies improves for most of the countries, which is a very healthy sign. China is a major success story. In line with that we can see a general downward shift in suicide rate.

# Line chart

In [29]:
countries = sorted(suicide.country.unique())

data = [go.Scatter(
    x=np.array(suicide[suicide.country == c].year)\
                [np.argsort(suicide[suicide.country == c].year)],
    y=np.array(suicide[suicide.country == c].deaths)\
                [np.argsort(suicide[suicide.country == c].year)],
        visible = False,
        name = "") \
        for c in countries]
data[0]['visible'] = True #default value

buttons = []
for i, c in enumerate(countries):
    buttons.append(dict(label = c,
                        method = 'update',
                        args = [{'visible': [False]*len(countries)},
                                {'title': 'Suicide rate '+c}]))
    buttons[i]['args'][0]['visible'][i] = True

updatemenus = list([
    dict(active=0,
         buttons=list(buttons),
         x = 0.5,
         y = 1.12
    )
])

layout = go.Layout(height = 450, width = 500,
    title='Suicide rates',
    yaxis = dict(title='Suicide rate (for 100,000 lives)', rangemode='tozero'),
    xaxis = dict(title='Year', range = [1990,2017]),
    showlegend = False,
    updatemenus=updatemenus
)

fig = go.Figure(data=data, layout=layout)
iplot(fig)

Using this plot we can see how suicide rate change for each country over the years. We can see that China has been able to continously reduce the suicide rate. On the other hand we can see that Japan has actually being able to reduce the number of deaths from suicide after its peak in 2005-2007 period.

In [30]:
plot(fig, filename = 'line_chart.html')

'file:///Users/nis89mad/Dropbox/MSAN/Spring2/Data_viz/nishancm.github.io/line_chart.html'