In [None]:
import pandas as pd
import pycountry_convert as pc
from pycountry import countries

## Model
suicide ~ population \
suicide ~ population + gdp_per_capita \
group by country (or year) (or age) suicide ~ population + gdp_per_capita

In [None]:
suicide_df = pd.read_csv('data/suicide.csv')
suicide_df = suicide_df.rename(columns={'suicides_no':'suicide', ' gdp_for_year ($) ':'total_gdp', 'gdp_per_capita ($)':'gdp_per_capita', 'HDI for year':'hdi'})
suicide_df = suicide_df.drop(columns=['suicides/100k pop', 'hdi', 'country-year', 'total_gdp', 'generation'])
suicide_df      

## Get the rows belong to European countries

In [None]:
# Get the country names since some countries are not recognized by pycountry
countries_name_list = [country.name for country in list(countries)]

# Function to extract continent code
def get_continent(country):
    country_code = pc.country_name_to_country_alpha2(country)
    return pc.country_alpha2_to_continent_code(country_code)

suicide_df = suicide_df[suicide_df['country'].isin(countries_name_list)]   # Filter out invalid countries
suicide_df['continent'] = suicide_df['country'].apply(get_continent)       # Extract continent code
suicide_df = suicide_df[suicide_df['continent'] == 'EU']                   # Get the countries in EU
suicide_df.head(5)

## Group data by year and country

In [None]:
suicide_df_grouped = suicide_df.groupby(['year', 'country']).aggregate({
    'suicide':'sum', 'population':'sum', 'gdp_per_capita':'mean'})
df = suicide_df_grouped.unstack().reset_index()
df = df[(1991 <= df['year']) & (df['year'] <= 2015)].dropna(axis=1).set_index('year').stack()
df


## Generate new columns

-   The dataframe is tranposed and stacked so that it have a better form
-   Columns `suicides_per_100k` and `gdp_per_capita` are generated using these data
-   The dataframe is exported to a csv file for future processes

In [None]:
output_df = df.copy()
output_df['suicides_per_100k'] = (output_df['suicide']/output_df['population']) * 100000
output_df.to_csv("data/suicide_cleaned.csv")