In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas.api.types import CategoricalDtype
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv(r"../input/who-suicide-statistics/who_suicide_statistics.csv")
df.head()

In [None]:
print(df.shape)
print(df.isnull().sum())
print(df.age.unique())

In [None]:
clean_df = df.dropna()
clean_df['suicide_rate_per_million'] = (clean_df.loc[:,'suicides_no'] / (clean_df.loc[:,'population'] / 1000000))

In [None]:
clean_df.loc[:, 'age'] = clean_df['age'].str.replace(' years','')

In [None]:
age_categories = ['5-14', '15-24', '25-34', '35-54', '55-74', '75+']
clean_df.age = clean_df.age.astype(CategoricalDtype(categories=age_categories, ordered=True))

In [None]:
sns.set()

mean_suicides_years = df[['year', 'suicides_no']].groupby('year').mean()
mean_suicides_years.plot(kind='line', grid=True, legend=False, title="Suicide Count by Years");

In [None]:
sex_dist = clean_df.groupby('sex').count().sum(axis=1) # Distribution of sexes
print(sex_dist)
sex_dist.plot(kind='pie', title="Sex Distribution", shadow=True, autopct='%1.1f%%');

In [None]:
# Top 10 Highest
highest_values_by_countries = df['suicides_no'].dropna().groupby(df['country']).sum().nlargest(10)
# Top 10 Lowest
lowest_values_by_countries = df['suicides_no'].dropna().groupby(df['country']).sum().nsmallest(10)

highest_values_by_countries.plot(kind='barh', title="10 Countries with Highest Suicide Numbers", figsize=(6,4), color='g')
plt.show()

lowest_values_by_countries.plot(kind='barh', title="10 Countries with Lowest Suicide Numbers", figsize=(6,4), color='g')
plt.show()

In [None]:
sns.set()

suicide_numbers_by_sex = df['suicides_no'].dropna().groupby(df['sex']).sum() / 1000000
suicide_numbers_by_sex.plot(kind='pie', title="Suicide Count by Sex", shadow=True, autopct='%1.1f%%');

In [None]:
xdf = pd.DataFrame(clean_df.groupby(["sex", "year"])['suicides_no'].sum().reset_index())

sns.lineplot(x='year', y='suicides_no', hue='sex', data=xdf);

In [None]:
sns.set()

tr_canada_japan_2015 = (clean_df.loc[(clean_df['country'].isin(['Turkey', 'United States of America', 'Japan'])) 
                                        & (clean_df['year'] == 2015), ['country', 'sex', 'age', 'suicide_rate_per_million']].sort_values(['sex', 'age']))
sns.catplot(x='age', hue='sex', col='country', y='suicide_rate_per_million', data=tr_canada_japan_2015, kind='bar', col_wrap=3)

In [None]:
# Visualizing Japan Data

japan_suicide_count = clean_df.suicides_no[clean_df.country == "Japan"]

plt.figure(figsize=(8,5))
(sns.scatterplot(x=clean_df.year[clean_df.country == "Japan"], y=japan_suicide_count, hue=clean_df.age, style=clean_df.sex)
 .set_title("Sex-Age Distribution of Suicide by Years in Japan"));
plt.xlabel("Year")
plt.ylabel("Suicide Count")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.);

In [None]:
# Visualizing Turkey Data
sns.set()

turkey_suicide_count = clean_df.suicides_no[clean_df.country == "Turkey"]
t = (sns.scatterplot(x=clean_df.year[clean_df.country == "Turkey"], y=turkey_suicide_count, hue=clean_df.age, style=clean_df.sex)
     .set_title("Sex-Age Distribution of Suicide by Years in Turkey"));
plt.xlabel("Year")
plt.ylabel("Suicide Count")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.);

In [None]:
# Visualizing United States of America Data
sns.set()

us_suicide_count = clean_df.suicides_no[clean_df.country == "United States of America"]
t = (sns.scatterplot(x=clean_df.year[clean_df.country == "United States of America"], y=us_suicide_count, hue=clean_df.age, style=clean_df.sex)
     .set_title("Sex-Age Distribution of Suicide by Years in United States of America"));
plt.xlabel("Year")
plt.ylabel("Suicide Count")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.);