In [None]:
!pip install pycountry-convert
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from geopy.exc import GeocoderTimedOut
from geopy.geocoders import Nominatim 
import folium
from folium.plugins import MarkerCluster
from pycountry_convert import country_alpha2_to_continent_code, country_name_to_country_alpha2
plt.rcParams.update({'figure.max_open_warning': 0})
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None

In [None]:
df = pd.read_csv('../input/suicide-rates-overview-1985-to-2016/master.csv')

### Data Analysis

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.columns

In [None]:
countries = df['country'].unique()
print("Total country : ",len(countries))

In [None]:
df['age'].unique()

In [None]:
df['generation'].unique()

In [None]:
def country_suicide():
    """" shows total suicide in each country """
    
    temp = df[['country','suicides_no']]
    temp = temp.groupby('country').sum()
    temp.reset_index(level=['country'],inplace=True)
    chart = sns.catplot(x='country',y='suicides_no',kind="bar",data=temp,height=5, aspect=15/5)
    chart.set_xticklabels(rotation=90)

In [None]:
country_suicide()

In [None]:
def plot_country_yearly_suicide(country):
    """" shows yearly suicide in a country """
    temp = df[df['country']==country]
    chart = sns.catplot(x='country-year',y='suicides_no',kind="bar",data=temp,height=3, aspect=9/4)
    plt.title(country)
    chart.set_xticklabels(rotation=90)

In [None]:
plot_country_yearly_suicide('Albania')
plot_country_yearly_suicide('Japan')

In [None]:
def plot_country_sex_suicide(country):
    """" shows suicide in each gender in a country """

    temp = df[df['country'] == country]
    temp.groupby('sex').sum()
    sns.catplot(x='sex',y='population',kind='bar',data=temp,height=3, aspect=9/4)
    plt.title(country)

In [None]:
plot_country_sex_suicide('Albania')
plot_country_sex_suicide('Japan')

In [None]:
def plot_country_gdp_suicide(country):
    """" shows gdp vs suicides in a country.
         The per capita gdp has been divided by 100 for scaling.
    """

    temp = df.groupby(['country-year','country']).sum()
    temp.reset_index(level=['country-year','country'],inplace=True)
    ptemp = temp[temp['country']==country]
    ptemp['gdp_per_capita ($)'] = ptemp['gdp_per_capita ($)']/100
    d = ptemp['country-year']
    d = [t[-4:] for t in d]
    ptemp['country-year'] = d
    plt.figure(figsize=(20,5))
    plt.xlabel('year')
    plt.ylabel('gdp_per_capita($)/100')
    ax = plt.gca()
    ax.set_facecolor('#2E4053')
    plt.grid(color='white',linestyle='dotted',linewidth=0.5)
    plt.plot(ptemp['country-year'],ptemp['suicides_no'],marker='o',label='suicides')
    plt.plot(ptemp['country-year'],ptemp['gdp_per_capita ($)'],marker='^',label='gdp_per_capita')
    plt.title(country)
    plt.legend()

In [None]:
plot_country_gdp_suicide('Albania')
plot_country_gdp_suicide('Japan')

In [None]:
def plot_country_suicide_sex_yearly(country):
    """" shows gender wise suicides per year in a country """

    temp = df[df['country']==country]
    sns.set(rc={'figure.figsize':(20,4)})
    sns.violinplot(x="year", y="suicides_no", hue="sex", data=temp)
    plt.title(country)
    return

In [None]:
plot_country_suicide_sex_yearly('Albania')

In [None]:
plot_country_suicide_sex_yearly('Japan')

In [None]:
def plot_country_suicide_sex_generation(country):
    """" shows gender wise and genration wise suicides per year in a country """
    
    sns.catplot(x='sex',y='suicides_no',hue='generation',kind='bar',data=df[df['country']==country])
    plt.title(country)

In [None]:
plot_country_suicide_sex_generation('Albania')
plot_country_suicide_sex_generation('Japan')

In [None]:
def findGeocode(country):
    """returns central latitude and longitude of a location"""
    
    try:
        geolocator = Nominatim(user_agent="default_name") 
        return geolocator.geocode(country) 
      
    except GeocoderTimedOut: 
        return findGeocode(country)

In [None]:
def plot_world_map():
    """plot world map and number of suicides"""
    
    temp = df
    ptemp = temp.groupby('country').sum()
    dtemp = ptemp.drop(['year','suicides/100k pop','HDI for year'],axis=1)
    dtemp.reset_index(level = ['country'],inplace=True)
    coordinates = [findGeocode(c)[1] for c in dtemp['country']]
    lat = [coor[0] for coor in coordinates]
    long = [coor[1] for coor in coordinates]
    dtemp['latitude'] = lat
    dtemp['longitude'] = long

    f = folium.Figure(width=1000, height=500)

    world_map= folium.Map(tiles="cartodbpositron",max_bounds=True,zoom_control=False)
    marker_cluster = MarkerCluster().add_to(world_map)
    
    for i in range(len(dtemp)):
        lat = dtemp.iloc[i]['latitude']
        long = dtemp.iloc[i]['longitude']
        radius=5
        popup_text = """Country : {}<br> Suicides : {}<br>"""
        popup_text = popup_text.format(dtemp.iloc[i]['country'],dtemp.iloc[i]['suicides_no'])
        folium.CircleMarker(location = [lat, long], radius=radius, popup= popup_text, fill =True).add_to(marker_cluster)
        
    return world_map

In [None]:
plot_world_map()

In [None]:
def plot_country_age_suicide(country):
    """shows age wise suicide in a country"""
    
    temp = df
    dtemp = temp.groupby(['country','age','sex']).sum()
    dtemp.reset_index(level = ['country'],inplace=True)
    dtemp.reset_index(level = ['age'],inplace=True)
    dtemp.reset_index(level = ['sex'],inplace=True)
    
    dtemp[dtemp['country']==country]
    sns.catplot(x='age',y='suicides_no',hue='sex',kind='bar',data=dtemp,height=3, aspect=9/4)
    plt.title(country)

In [None]:
plot_country_age_suicide('Albania')
plot_country_age_suicide('Japna')

In [None]:
def plot_country_population_suicides(country):
    """shows population and suicides in a country.
    popultaion is divided by 10000 to fit to scale
    """
    temp = df
    dtemp = temp[temp['country']==country].groupby('year').sum()
    dtemp.reset_index(level = ['year'],inplace=True)
    plt.figure(figsize=(20,5))
    ax = plt.gca()
    ax.set_facecolor('#2E4053')
    plt.title('country')
    plt.xlabel('year')
    plt.ylabel('count')
    plt.grid(color='white',linestyle='dashed')
    plt.plot(dtemp['year'],dtemp['population']/10000,marker='o',label='population/10k',color='orange')
    plt.plot(dtemp['year'],dtemp['suicides_no'],marker='o',label='suicides',color='red')
    plt.legend()

In [None]:
plot_country_population_suicides('Albania')
plot_country_population_suicides('Japan')

In [None]:
def plot_global_change_yearly_population_suicide():
    """show global change in suicide rates over the years"""
    
    temp = df
    dtemp = temp[['year','population','suicides_no']].groupby('year').sum()
    dtemp.reset_index(level=['year'],inplace=True)
    dtemp['population'] /= 10000
    plt.figure(figsize=(20,5))
    ax = plt.gca()
    ax.set_facecolor('#2E4053')
    plt.grid(color='white',linestyle='dotted',linewidth=0.5)
    plt.title('Yearly global change')
    plt.xlabel('year')
    plt.ylabel('count')
    plt.plot(dtemp['year'],dtemp['population'],label='population',marker='o',color='orange')
    plt.plot(dtemp['year'],dtemp['suicides_no'],label='suicide',marker='x',color='red')
    plt.plot(dtemp['year'],[dtemp['suicides_no'].mean()]*len(dtemp),label='mean suicide',linestyle='dashed',color='red')
    plt.plot(dtemp['year'],[dtemp['population'].mean()]*len(dtemp),label='mean population',linestyle='dashed',color='orange')
    plt.legend()

In [None]:
plot_global_change_yearly_population_suicide()

In [None]:
def plot_global_change_generation_yearly():
    """show global change in suicide rates over the years"""
    
    temp = df
    dtemp = temp[['population','suicides_no','generation']].groupby('generation').sum()
    dtemp.reset_index(level=['generation'],inplace=True)
    dtemp['population'] /= 10000
    plt.figure(figsize=(20,5))
    ax = plt.gca()
    ax.set_facecolor('#2E4053')
    plt.title('Global change in suicide varying on generation')
    plt.xlabel('generation')
    plt.ylabel('count')
    plt.grid(color='white', linestyle='dotted', linewidth=0.5)
    plt.plot(dtemp['generation'],dtemp['population'],label='population',marker='o',color='orange')
    plt.plot(dtemp['generation'],dtemp['suicides_no'],label='suicide',marker='x',color='red')
    plt.plot(dtemp['generation'],[dtemp['suicides_no'].mean()]*len(dtemp),label='mean suicide',linestyle='dashed',color='orange')
    plt.plot(dtemp['generation'],[dtemp['population'].mean()]*len(dtemp),label='mean population',linestyle='dashed',color='red')
    plt.legend()

In [None]:
plot_global_change_generation_yearly()

In [None]:
def plot_global_change_age_yearly():
    """show global change in suicide rates over the years"""
    
    temp = df
    dtemp = temp[['population','suicides_no','age']].groupby('age').sum()
    dtemp.reset_index(level=['age'],inplace=True)
    dtemp['population'] /= 10000
    plt.figure(figsize=(20,5))
    ax = plt.gca()
    ax.set_facecolor('#2E4053')
    plt.grid(color='teal',linestyle='dotted')
    plt.title('global change in suicides varying on age')
    plt.xlabel('age')
    plt.ylabel('count')
    plt.plot(dtemp['age'],dtemp['population'],label='population/10k',marker='o',color='#F7DC6F')
    plt.plot(dtemp['age'],dtemp['suicides_no'],label='suicide',marker='x',color='#F1948A')
    plt.plot(dtemp['age'],[dtemp['suicides_no'].mean()]*len(dtemp),label='mean suicide',linestyle='dashed',color='#F7DC6F')
    plt.plot(dtemp['age'],[dtemp['population'].mean()]*len(dtemp),label='mean population/10k',linestyle='dashed',color='#F1948A')
    plt.legend()

In [None]:
plot_global_change_age_yearly()

In [None]:
def get_continent(con):
    """return continent central coordinates"""
    try:
        cn_a2_code =  country_name_to_country_alpha2(con)
    except:
        cn_a2_code = con
    try:
        cn_continent = country_alpha2_to_continent_code(cn_a2_code)
    except:
        cn_continent = con 
    return (cn_continent)

In [None]:
def create_country_continent():
    """create dictionary of continent and their central coordiantes"""
    
    g = [get_continent(c) for c in df['country'].unique()]
    g[g.index('Republic of Korea')]='AS'
    g[g.index('Saint Vincent and Grenadines')]='EU'
    
    country_continent_dict = {}

    for i in range(0,len(g)):
        country_continent_dict.update({df['country'].unique()[i]:g[i]})
    return country_continent_dict

In [None]:
def plot_continent_suicide_gdp_population():
    """plot world map and line chart for continent wise suicide count"""
    
    dtemp = df
    dtemp['continent']=dtemp['country'].map(create_country_continent())
    dtemp['continent'] = dtemp['continent'].map({'AF':'Africa','AS':'Asia','EU':'Europa','NA':'North America','OC':'Oceania','SA':'South America'})
    dtemp = df.groupby('continent').sum()
    dtemp.reset_index(level=['continent'],inplace=True)
    
    plt.figure(figsize=(20,5))
    ax = plt.gca()
    ax.set_facecolor('#2E4053')
    plt.grid(color='white',linestyle='dotted')
    plt.plot(dtemp['continent'],dtemp['suicides_no'],marker='o',label='suicides',color='crimson')
    plt.plot(dtemp['continent'],dtemp['population']/10000,marker='x',label='popultaion',color='darkorange')
    plt.plot(dtemp['continent'],dtemp['gdp_per_capita ($)']/100,marker='x',label='gdp_per_capita ($)',color='limegreen')
    plt.legend()
    
    continent_list = ['North America','South America','Europa','Asia','Africa','Oceania']
    coords = [findGeocode(continent) for continent in continent_list]
    coords_dict = {}
    for i in range(0,len(coords)):
        coords_dict.update({coords[i][0]:coords[i][1]})
        
    dtemp['coordinates'] = dtemp['continent'].map(coords_dict)
    
    f = folium.Figure(width=1000, height=500)

    world_map= folium.Map(tiles="cartodbpositron",max_bounds=True,zoom_control=False)
    marker_cluster = MarkerCluster().add_to(world_map)
    
    for i in range(len(dtemp)):
        lat = dtemp.iloc[i]['coordinates'][0]
        long = dtemp.iloc[i]['coordinates'][1]
        radius=5
        popup_text = """Continent : {}<br> Suicides : {}<br>"""
        popup_text = popup_text.format(dtemp.iloc[i]['continent'],dtemp.iloc[i]['suicides_no'])
        folium.CircleMarker(location = [lat, long], radius=radius, popup= popup_text, fill =True).add_to(marker_cluster)
        
    return world_map

In [None]:
plot_continent_suicide_gdp_population()

In [None]:
def plot_pie_continent_suicide():
    """ plot pie chart for continent wise suicide count"""
    dtemp = df
    dtemp['continent']=dtemp['country'].map(create_country_continent())
    dtemp['continent'] = dtemp['continent'].map({'AF':'Africa','AS':'Asia','EU':'Europa','NA':'North America','OC':'Oceania','SA':'South America'})
    dtemp = df.groupby('continent').sum()
    dtemp.reset_index(level=['continent'],inplace=True)

    labels = dtemp['continent']
    sizes = dtemp['suicides_no']
    colors = ['#ff9999','#66b3ff','#99ff99','#ffcc99','#F9E79F','#A3E4D7']
    plt.figure(figsize=(35,15))
    plt.title('Suicides in different continents',fontsize=30,pad='30.0',backgroundcolor='#F9EBEA')
    plt.pie(sizes,  labels=labels, autopct='%1.1f%%',colors=colors , startangle=90,pctdistance=0.85,textprops={'fontsize': 20})

    centre_circle = plt.Circle((0,0),0.70,fc='white')
    fig = plt.gcf()
    fig.gca().add_artist(centre_circle)
    plt.legend(prop={'size': 20})
    plt.axis('equal')
    plt.show()

In [None]:
plot_pie_continent_suicide()

In [None]:
def plot_continent_suicide_sex():
    """plot world map and line chart for continent wise suicide count"""
    
    dtemp = df
    dtemp['continent']=dtemp['country'].map(create_country_continent())
    dtemp['continent'] = dtemp['continent'].map({'AF':'Africa','AS':'Asia','EU':'Europa','NA':'North America','OC':'Oceania','SA':'South America'})
    dtemp = df.groupby(['continent','sex']).sum()
    dtemp.reset_index(level=['continent'],inplace=True)
    dtemp.reset_index(level=['sex'],inplace=True)
    
    male = dtemp[dtemp['sex']=='male']
    female = dtemp[dtemp['sex']=='female']
    
    plt.figure(figsize=(20,5))
    ax = plt.gca()
    ax.set_facecolor('#2E4053')
    plt.grid(color='white',linestyle='dotted')
    plt.xlabel('continent')
    plt.ylabel('count')
    plt.plot(male['continent'],male['suicides_no'],marker='o',label='male',color='crimson',linestyle='dotted',linewidth=3)
    plt.plot(female['continent'],female['suicides_no'],marker='x',label='female',color='darkorange',linestyle='dotted',linewidth=3)
    plt.legend()

In [None]:
plot_continent_suicide_sex()

In [None]:
def plot_continent_suicide_generation():
    """plot world map and line chart for continent wise suicide count"""
    
    dtemp = df
    dtemp['continent']=dtemp['country'].map(create_country_continent())
    dtemp['continent'] = dtemp['continent'].map({'AF':'Africa','AS':'Asia','EU':'Europa','NA':'North America','OC':'Oceania','SA':'South America'})
    dtemp = df.groupby(['continent','generation']).sum()
    dtemp.reset_index(level=['continent'],inplace=True)
    dtemp.reset_index(level=['generation'],inplace=True)
    
    genx = dtemp[dtemp['generation']=='Generation X']
    silent = dtemp[dtemp['generation']=='Silent']
    gengi = dtemp[dtemp['generation']=='G.I. Generation']
    boomer = dtemp[dtemp['generation']=='Boomers']
    millenial = dtemp[dtemp['generation']=='Millenials']
    genz = dtemp[dtemp['generation']=='Generation Z']
    
    plt.figure(figsize=(20,5))
    ax = plt.gca()
    ax.set_facecolor('#212F3C')
    plt.xlabel('continent')
    plt.ylabel('count')
    plt.grid(linestyle='dotted',linewidth=0.5)
    plt.plot(genx['continent'],genx['suicides_no'],marker='o',label='generation x',color='#ff9999')
    plt.plot(silent['continent'],silent['suicides_no'],marker='^',label='silent',color='#66b3ff')
    plt.plot(gengi['continent'],gengi['suicides_no'],marker='x',label='generation GI',color='#99ff99')
    plt.plot(boomer['continent'],boomer['suicides_no'],marker='.',label='boomer',color='#ffcc99')
    plt.plot(millenial['continent'],millenial['suicides_no'],marker='*',label='millenial',color='#F9E79F')
    plt.plot(genz['continent'],genz['suicides_no'],marker='s',label='generation Z',color='#A3E4D7')
    
    plt.legend()

In [None]:
plot_continent_suicide_generation()

In [None]:
def plot_happiness_suicide_2016():
    hp = pd.read_csv('../input/world-happiness/2016.csv')
    hdf = hp[['Country','Happiness Score','Health (Life Expectancy)','Freedom','Dystopia Residual']]
    hdf.rename(columns={"Country":"country"},inplace=True)
    tdf = df[df['year']==2016]
    tdf = tdf.groupby(['country']).mean()
    tdf.reset_index(level=['country'],inplace=True)
    finaldf = hdf.merge(tdf,how='inner',on='country')
    finaldf.drop(['HDI for year','year'],axis=1,inplace=True)
    plt.figure(figsize=(20,5))
    plt.grid()
    ax = plt.gca()
    ax.set_facecolor('floralwhite')
    plt.title('Does suicides depend on happiness factors ?')
    plt.xlabel('country')
    plt.plot(finaldf['country'],finaldf['suicides/100k pop'],label='Suicides per 100k people',marker='x',color='red',linestyle='solid',linewidth=1)
    plt.plot(finaldf['country'],finaldf['Happiness Score'],label='Happiness Score',marker='o',linestyle='dashed',color='orange',linewidth=1)
    plt.plot(finaldf['country'],finaldf['Health (Life Expectancy)']*15,label='Health (Life Expectancy)',marker='^',color='seagreen',linestyle='dotted',linewidth=1)
    plt.plot(finaldf['country'],finaldf['Dystopia Residual']*10,label='Dystopia Residual',marker='D',color='teal',linestyle='dotted',linewidth=1)
    plt.plot(finaldf['country'],finaldf['Freedom']*30,label='Freedom',marker='s',color='black',linestyle='dotted',linewidth=1)
    plt.legend()
    plt.show()

In [None]:
plot_happiness_suicide_2016()

In [None]:
def plot_happiness_suicide_2015():
    hp = pd.read_csv('../input/world-happiness/2015.csv')
    hdf = hp[['Country','Happiness Score','Health (Life Expectancy)','Freedom','Dystopia Residual']]
    hdf.rename(columns={"Country":"country"},inplace=True)
    tdf = df[df['year']==2015]
    tdf = tdf.groupby(['country']).mean()
    tdf.reset_index(level=['country'],inplace=True)
    finaldf = hdf.merge(tdf,how='inner',on='country')
    finaldf.drop(['HDI for year','year'],axis=1,inplace=True)
    plt.figure(figsize=(20,5))
    plt.xticks(rotation=90)
    plt.grid()
    ax = plt.gca()
    ax.set_facecolor('floralwhite')
    plt.title('Does suicides depend on happiness factors ?')
    plt.xlabel('country')
    plt.plot(finaldf['country'],finaldf['suicides/100k pop'],label='Suicides per 100k people',marker='x',color='red',linestyle='solid',linewidth=1)
    plt.plot(finaldf['country'],finaldf['Happiness Score'],label='Happiness Score',marker='o',linestyle='dashed',color='orange',linewidth=1)
    plt.plot(finaldf['country'],finaldf['Health (Life Expectancy)']*15,label='Health (Life Expectancy)',marker='^',color='seagreen',linestyle='dotted',linewidth=1)
    plt.plot(finaldf['country'],finaldf['Dystopia Residual']*10,label='Dystopia Residual',marker='D',color='teal',linestyle='dotted',linewidth=1)
    plt.plot(finaldf['country'],finaldf['Freedom']*30,label='Freedom',marker='s',color='black',linestyle='dotted',linewidth=1)
    plt.legend()
    plt.show()

In [None]:
plot_happiness_suicide_2015()