## Worldwide Suicide Analysis between 1985 and 2015

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import cm
from matplotlib.colors import LinearSegmentedColormap
import seaborn as sns
sns.set()
plt.style.use('default')
data = pd.read_csv("../input/suicide-rates-overview-1985-to-2016/master.csv")
data = data.drop(data[data.year == 2016].index)  #drop 2016 because few countries have reports
data['age'].replace({"5-14 years": "05-14 years"}, inplace=True)  #fixing ages for sorting
co = data.country.value_counts() == data.country.value_counts().max()  #keeping only countries that have data for ever year
data = data.set_index('country')
data = data[co]
data

### Only looking at the countries with full reports between 1985 and 2015:

In [None]:
data.index.unique()

### GDP per capita and number of suicides in 2015:

In [None]:
plotdata = pd.DataFrame(data.groupby(['gdp_per_capita ($)','year','country']).suicides_no.sum())
plotdata = plotdata.reset_index()
plotdata = plotdata[plotdata.year == 2015]
plt.figure(figsize = (10,5))
sns.scatterplot(plotdata['gdp_per_capita ($)'],plotdata.suicides_no);
plt.title("Suicide numbers and GDP Per Capita in 2015")
plt.text(plotdata['gdp_per_capita ($)'].max()-10000, 2000, "Luxembourg")
plt.text(77000, 2500, "Singapore")
plt.text(63000, plotdata.suicides_no.max()-600, "United States");

In 2015, GDP seems to have a small effect on suicide numbers, except for a few outliers:  Singapore and Luxembourg have high GDP and low er numbers of suicides, while the US has a lower GDP, but significantly higher amounts of suicides.

### Suicide rates by GDP per capita in 2015:

In [None]:
plotdata2 = pd.DataFrame(data.groupby(['gdp_per_capita ($)','year','country'])['suicides/100k pop'].sum())
plotdata2 = plotdata2.reset_index()
plotdata2 = plotdata2[plotdata2.year == 2015]
plt.figure(figsize = (10,5))
sns.scatterplot(plotdata2['gdp_per_capita ($)'],plotdata2['suicides/100k pop']);
plt.title("Suicide rate and GDP Per Capita in 2015")
plt.annotate('Luxembourg', xy=(107456,134.4), xytext = (90000,150), arrowprops = dict(arrowstyle = '->'))
plt.annotate('Singapore', xy=(81766,109.29), xytext = (70000,150), arrowprops = dict(arrowstyle = '->'))
plt.annotate('United States', xy=(60387,175.41), xytext = (60000,200), arrowprops = dict(arrowstyle = '->'))
plt.annotate('Japan', xy=(36510,206.04), xytext = (30000,250), arrowprops = dict(arrowstyle = '->'))
plt.annotate('Belgium', xy=(42830,196.66), xytext = (40000,230), arrowprops = dict(arrowstyle = '->'))
plt.annotate('Austria', xy=(46484,194.62), xytext = (50000,220), arrowprops = dict(arrowstyle = '->'))
plt.annotate('Republic of Korea', xy=(28410,389.14), xytext = (33000,380), arrowprops = dict(arrowstyle = '->'));

Alternatively to total suicide numbers, the suicide rate is even less affected by GDP.  The Republic of Korea stands out on this plot, having a significantly high suicide rate and one of the lower GDP's per capita.

### Percent of suicides by age between 1985 and 2015 by country:

In [None]:
with plt.style.context('tableau-colorblind10'):
    fig = plt.figure(figsize = (35,25))
    #plt.legend(np.sort(data.age.unique()), )
    for i in range(0,23):
        ax = fig.add_subplot(4,6,i+1)
        ax.set_title(data.index.unique()[i], fontsize = 35)
        agedata = data.loc[data.index.unique()[i],:].groupby('age').suicides_no.sum()
        ax.pie(agedata, textprops={'size': 10})
    fig.legend(agedata.index, loc = 'lower center', ncol = 6, fontsize = 30, bbox_to_anchor = (0.5,0))

Overall, the smallest number of suicides is in the age group 5-14, while the largest amount of suicids is seen in ages 35-54, followed closely by 55-74.  Central and South American countries appear to have the highest rate of teen and young adult suicide.

### Total number of suicides by gender betwwen 1985 and 2015:

In [None]:
sexplot = sexdata = data.groupby(['year','sex']).suicides_no.sum()
sexplot = sexplot.reset_index()
femdata = sexplot[sexplot.sex == 'female']
maledata = sexplot[sexplot.sex == 'male']
femdata = femdata.drop('sex',axis = 1)
maledata = maledata.drop('sex',axis = 1)
sexplot = pd.merge(femdata,maledata,on = 'year',suffixes = ('_female','_male'))
sexplot = sexplot.set_index('year')
plt.figure(figsize = (20,5));
colors = colors = ['#E45E9D','#1589FF']
for i in range(0,2):
    plt.bar(sexplot.index, sexplot.iloc[:,i], color = colors[i], alpha = 0.5, label = data.sex.unique()[i-1]);
plt.title("Number of Suicides by Gender by Year", fontsize = 15)
plt.xlabel("Year", fontsize = 12)
plt.ylabel("Number of Suicides", fontsize = 12)
plt.legend(fontsize = 12);

The ratio of suicides averages around 25% women and 75% men.  The has only been a small increasing trend in women's suicides from about 20,000 to 30,000 per year, while men's suicides have risen from about 60,000 to almost 100,000 since 1985.

### Mean percent of suicidal population by country between 1985 and 2015:

In [None]:
countrydata2 = (data.groupby('country')['suicides/100k pop'].mean()/100000).reset_index()
plt.figure(figsize = (50,20))
countrybar2 = sns.barplot(x = 'country', y = 'suicides/100k pop', data = countrydata2)
plt.title("Suicides by Percentage of Population", fontsize = 70)
plt.ylabel("Percent", fontsize = 50)
plt.xlabel("Country", fontsize = 50)
plt.xticks(rotation=80)
plt.tick_params(labelsize = 40)

The countries with the highest suicides rates are Austria, Belgium, Japan, Luxembourg, Republic of Korea, Singapore, and the U.S.

### Total number of suicides per country between 1985 and 2015:

In [None]:
countrydata = data.groupby('country').suicides_no.sum()
plt.figure(figsize = (50,20))
countrybar = plt.bar(countrydata.index, countrydata); #color = np.random.rand(countryplot[0],3));
plt.title("Suicides by Country", fontsize = 70)
plt.ylabel("Suicides (millions)", fontsize = 50)
plt.xlabel("Country", fontsize = 50)
plt.xticks(rotation=80)
plt.tick_params(labelsize = 40)

Japan and the United States have a high lead in total number of suicides between 1985 adn 2015.