In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl

In [None]:
df_suicide = pd.read_csv('../input/suicide-rates-overview-1985-to-2016/master.csv')
df_suicide.head()

In [None]:
dfc = df_suicide.country.unique()
dfc

If we add the regions and continents of the countries to our data set, we will have the opportunity for more detailed analysis. Therefore, we will use an auxiliary data set from kaggle, which includes increasing country and continent information.

In [None]:
df_continent = pd.read_csv('../input/country-to-continent/countryContinent.csv')
df_continent.head()

In [None]:
df_continent

# Find mismatches
Before merging the two datasets, we need to detect and correct any mismatches in the country names. Let's write a function for this.

In [None]:
def mismatch (series_1, series_2, only_out=True):
    out_num = 0
    for i in series_1.unique():
        if i in series_2.unique():
#         if any(series_2.unique() == i):  # alternative
            if not only_out:
                print("in :",i)
        else:
            print("out :",i)
            out_num += 1
    print(f"{out_num} mismatch(es) between those dataframes".upper())
    
mismatch(df_suicide.country, df_continent.country, True)

In [None]:
df_continent.loc[df_continent.country.str.contains("Maca"),'country']='Macau'
df_continent.loc[df_continent.country.str.contains("'Korea \(Republic of\)'"),'country']='Republic of Korea'
df_continent.loc[df_continent.country.str.contains("Saint Vincent and the Grenadines"),'country']='Saint Vincent and Grenadines'
df_continent.loc[df_continent.country.str.contains("United Kingdom of Great Britain and Northern Ireland"),'country']='United Kingdom'
df_continent.loc[df_continent.country.str.contains("United States of America"),'country']='United States'
mismatch(df_suicide.country, df_continent.country, True)

In [None]:
df_continent.loc[df_continent.country.str.contains("Korea \(Republic of\)"),'country'] = 'Republic of Korea'
mismatch(df_suicide.country, df_continent.country, True)

# Let's merge the two datasets over the country names column.

In [None]:
df = df_suicide.merge(df_continent[['country','continent','sub_region','code_3']], left_on = 'country', right_on = 'country')
df

In [None]:
print("Types: ", df.dtypes)

In [None]:
print("Shape: ", df.shape)

In [None]:
print("Counts: ")
print(df.apply(lambda x: x.count()))

In [None]:
print("Nulls: ")
print(df.apply(lambda x: x.isnull().sum()))

In [None]:
print("Min: ")
print(df.min())

print("Max: ")
print(df.max())

In [None]:
df.columns

In [None]:
df = df.rename(columns = {'country':'land',
                       'year':'jaar',
                       'gdp_for_year ($)':'gdp_for_year',
                       'gdp_per_capita ($)':'gdp_per_capita'})
df.columns

In [None]:
df

In [None]:
df.drop('HDI for year',axis = 1,inplace = True)
df.isnull().any().sum()

In [None]:
df.head()

# Dropping Unnecessary Variables
For multiple changes; df[['country', 'year', 'country-year']]

In [None]:
df[['country-year']]

# Features Correlation of USA

In [None]:
plt.subplots(figsize=(8,6))
sns.heatmap(df_USA.corr(), annot = True);

In [None]:
sns.pairplot(df_USA, corner=True);

In [None]:
df

# Global Suicides(per 100K)-trend over time 1985-2016

In [None]:
plt.subplots(figsize=(15,6))
df.groupby('jaar')["suicides/100k pop"].mean().plot.line(rot=0)
plt.title("Suicides per 100K by year");

# Global Suicides(per 100K) by Continent

In [None]:
plt.subplots(figsize = (15,6))
df.groupby('continent')["suicides/100k pop"].mean().sort_values(ascending = False).plot.bar(rot=0)
plt.title("Suicides per 100K by continent");

# Global Suicides(per 100k) by Gender and trend over time 1985-2016

In [None]:
dfgender = pd.pivot_table(df,values= ["suicides/100k pop"], 
                     index=['sex','jaar'], aggfunc='mean').reset_index()
fig, ax = plt.subplots(figsize=(20,5))
for i in ['male','female']:
    ax = dfgender[dfgender.sex==i].groupby('jaar')["suicides/100k pop"].mean().plot(kind='line',label=i)
ax.legend(loc='upper right', borderaxespad=0.5)
ax.set_ylabel('Suicides per 100k')
ax.set_title('Suicides per 100K by gender in years')
plt.show()

# Population-gdp_per_capita Plot

In [None]:
a = df.groupby('jaar')["gdp_per_capita"].sum()
a

In [None]:
b = df.groupby('jaar')["population"].sum()
b

In [None]:
c = a/b

In [None]:
df2=df.copy()
df2 =df2.set_index('jaar')

fig, ax = plt.subplots(figsize=(18,8))
b.plot(ax=ax,color='g',xlabel= "Years",ylabel= "Population")
plt.legend(bbox_to_anchor=(0.0, 0.90), loc=2, borderaxespad=0.)

ax2 = ax.twinx()
a.plot(ax=ax2,color='b',ylabel='GDP per capita')
plt.legend(bbox_to_anchor=(0.0, 0.80), loc=2, borderaxespad=0.)

plt.title('Population-gdp_per_capita Plot', fontsize=20)
plt.show()

# Cinsiyete Göre İntihar Sayıları

In [None]:
def func(pct, allvals):
    absolute = int(round(pct/100.*np.sum(allvals)))
    return "{:.1f}%\n( {:.2f}M )".format(pct,round(absolute/1000000,2))

suicide_counts_by_sex = df.groupby('sex').suicides_no.sum()

fig, ax = plt.subplots(figsize=(7,7))
ax.pie(x = suicide_counts_by_sex.values[::-1], 
        labels=suicide_counts_by_sex.index[::-1], 
       explode=(0, 0.1),
       autopct=lambda pct: func(pct, suicide_counts_by_sex.values[::-1]),
       startangle=90)
ax.set_title("Suicide Counts by Gender");
plt.show()

In [None]:
AgeMean=[]
for i in (df.age.unique()):
    new4=df[df['age']==i]
    AgeMean.append(new4['suicides/100k pop'].mean())
AgeMean

In [None]:
x=[i for i in AgeMean]
plt.figure(figsize=(10,5),dpi=80)
plt.pie(x,labels=[i for i in df.age.unique()],autopct='%1.1f%%')
plt.title('Which age of people suicide a most')
plt.show()

# Which age of people suicide a most

In [None]:
sns.barplot(x="age",y = 'suicides_no',data=df);
plt.xticks(rotation=60);

# Which generation of people suicide a most

In [None]:
sns.barplot(x="generation",y = 'suicides_no',data=df);
plt.xticks(rotation=60);