In [1]:
import pandas as pd
import numpy as np

# Show dataset and create an index
suicide_df = pd.read_csv('Resources/master.csv', index_col='country')
suicide_df.head()

suicide_df.columns

# Rename columns as some are misformatted
suicide_df.rename(columns={' gdp_for_year ($) ':'gdp_for_year ($)'}, inplace=True)
suicide_df.columns

# Remove columns we don't need or will use in our analysis
suicide_df = suicide_df.drop(columns=['country-year','HDI for year'])

suicide_df.head()

# Normalize and change 'gdp_per_capita ($)' column into an integer dtype
suicide_df['gdp_for_year ($)'] = suicide_df['gdp_for_year ($)'].str.replace(',','')
suicide_df['gdp_for_year ($)'] = suicide_df['gdp_for_year ($)'].astype(int)

suicide_df.dtypes

# Check for null values in the dataset
def num_missing(x):
    return sum(x.isnull())
# Apply Per Column
print ("Missing values per column:")
print (suicide_df.apply(num_missing, axis=0))

# Suicide_df is pretty clean

# Start of high-level analysis
# Show number of suicides in each country
suicide_no_dataframe = suicide_df.pivot_table(values='suicides_no', index='country', aggfunc='sum')

# Show population of each country
suicide_pop_dataframe = suicide_df.pivot_table(values='population', index='country', aggfunc='sum')

# Create dataframe with population and suicide count
suicide_num_pop = suicide_no_dataframe.join(suicide_pop_dataframe)

# Divide suicide count column with population
suicide_num_pop['percentage'] = (suicide_num_pop['suicides_no']/suicide_num_pop['population'])

# Multiply percentage column by 100
suicide_series_percent = suicide_num_pop['percentage'].apply(lambda x: x *100)

# Create new dataframe showing percentage total of suicdes per each country's population
suicide_frame_percent = suicide_series_percent.to_frame()
suicide_frame_percent = suicide_frame_percent.rename(columns={'percentage':'Percentage of Suicide'})

suicide_num_pop = suicide_frame_percent.join(suicide_num_pop)

# Drop unecessary 'percentage' column
suicide_percentage_analysis = suicide_num_pop.drop(columns='percentage')

# Show brief description of percentage total
suicide_percentage_analysis['Percentage of Suicide'].describe()

suicide_percentage_analysis = suicide_percentage_analysis.sort_values(by=['Percentage of Suicide'], ascending=False)

suicide_percentage_analysis.to_csv('Suicide_Percentage_Breakdown.csv', index=True)

# Analyze Suicide Rates over time per Country

# Select the columns you want
suicide_overtime_df = suicide_df.drop(columns=['sex', 'age', 'population', 'suicides/100k pop', 'gdp_for_year ($)','gdp_per_capita ($)', 'generation'])

# Set Datetime index
suicide_overtime_df['year'] = pd.to_datetime(suicide_overtime_df['year'], format='%Y')

# Create pivot-table showing yearly suicide number per country
suicide_overtime_df = suicide_overtime_df.pivot_table(values='suicides_no', index='year', aggfunc='sum')

suicide_overtime_df = suicide_overtime_df[suicide_overtime_df.suicides_no != 15603]

import matplotlib.pyplot as plt

plt.plot(suicide_overtime_df.index.values, suicide_overtime_df['suicides_no'])
plt.xlabel('Year')
plt.ylabel('Total Number of Suicides')
plt.title('Global Suicide Trend')
plt.show()

ss = suicide_percentage_analysis['Percentage of Suicide'].nlargest(50)
ss = ss.to_frame()
ss.to_csv('Suicide_Percentage_Breakdown_Top_50.csv', index=True)

suicide_percentage_analysis.loc['United Kingdom']

suicide_year_df = suicide_df.pivot_table(values='suicides_no', index='country', columns='year', aggfunc='sum')

suicide_2015_series = suicide_year_df[2015]
suicide_2015 = suicide_2015_series.to_frame()
suicide_2015.dropna(inplace=True)
suicide_2015.rename(columns={2015:'2015 Suicide Num'}, inplace=True)

suicide_pop_df = suicide_df.pivot_table(values='population', index='country', columns='year', aggfunc='sum')
suicide_pop_series = suicide_pop_df[2015]
suicide_pop_2015 = suicide_pop_series.to_frame()
suicide_pop_2015.dropna(inplace=True)
suicide_2015_df = suicide_pop_2015.join(suicide_2015, on='country')
suicide_2015_df

# Divide suicide count column with population
suicide_2015_df['percentage'] = (suicide_2015_df['2015 Suicide Num']/suicide_2015_df[2015])

# Multiply percentage column by 100
suicide_series_2015 = suicide_2015_df['percentage'].apply(lambda x: x *100)

# Create new dataframe showing percentage total of suicdes per each country's population
suicide_2015_percent = suicide_series_2015.to_frame()
suicide_2015_percent = suicide_2015_percent.rename(columns={'percentage':'Percentage of Suicide'})

suicide_2015_percent.to_csv('Suicide_Percentage_Breakdown_2015.csv', index=True)

# Create a pivot-table showing suicides per gender
suicide_overtime_df.head()

Missing values per column:
year                  0
sex                   0
age                   0
suicides_no           0
population            0
suicides/100k pop     0
gdp_for_year ($)      0
gdp_per_capita ($)    0
generation            0
dtype: int64



To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


<Figure size 640x480 with 1 Axes>

Unnamed: 0_level_0,suicides_no
year,Unnamed: 1_level_1
1985-01-01,116063
1986-01-01,120670
1987-01-01,126842
1988-01-01,121026
1989-01-01,160244


In [2]:
suicide_series_2015

country
Antigua and Barbuda             0.001088
Argentina                       0.007741
Armenia                         0.002647
Australia                       0.013610
Austria                         0.015220
Belgium                         0.017573
Belize                          0.008129
Brazil                          0.005842
Chile                           0.011095
Colombia                        0.005242
Croatia                         0.018467
Cuba                            0.013949
Cyprus                          0.005025
Czech Republic                  0.013885
Denmark                         0.010477
Ecuador                         0.007383
Estonia                         0.015682
Finland                         0.014107
Georgia                         0.005552
Germany                         0.012911
Greece                          0.005120
Grenada                         0.000000
Guatemala                       0.003465
Hungary                         0.019895
Iceland 