# Number of suicides per city


In [1]:
import pandas as pd
import glob
root = "../"

Read file with suicide data

In [2]:
path = root + "/CSV/TabNet/Suicides/"
suicide_df = pd.DataFrame()
all_files = glob.glob(path + "*.csv")
for file in all_files:
    file_name = file.split("/")[-1]
    year = file.split("_")[1].split(".csv")[0]
    
    year_df = pd.read_csv(path + file_name, sep=";")
    year_df['MUNCOD'] = year_df['Município'].str.split(' ', expand=True)[0]
    year_df = year_df[year_df["Município"] != "Total"]
    year_df = year_df[["Total", "MUNCOD"]]
    year_df = year_df.rename(columns={"Total": year})
    if suicide_df.empty:
        suicide_df = year_df.copy()
    else:
        suicide_df = pd.merge(suicide_df, year_df, on="MUNCOD", how="outer")
    suicide_df = suicide_df.replace("-",0)
    suicide_df = suicide_df.fillna(0)
cols_order = ['MUNCOD', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018']
suicide_df = suicide_df[cols_order]
suicide_df.head()

Unnamed: 0,MUNCOD,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,110001,5.0,2.0,2.0,1.0,3.0,2.0,1.0,3.0,2.0,3.0,1.0
1,110037,0.0,3.0,2.0,0.0,3.0,1.0,1.0,0.0,2.0,1.0,2.0
2,110040,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,2.0
3,110002,8.0,2.0,4.0,4.0,9.0,2.0,5.0,5.0,6.0,10.0,2.0
4,110004,4.0,6.0,2.0,1.0,4.0,1.0,5.0,6.0,9.0,5.0,8.0


Save data to a .csv

In [3]:
suicide_df.to_csv(root + 'CSV/Suicide/suicide_count_08_18.csv')

# Rate of suicides per city

## Part I: get population data
Source: ftp://ftp.datasus.gov.br/dissemin/publicos/IBGE/POPTCU/

In [4]:
import urllib.request
from zipfile import ZipFile
import os
import dbf

def get_pop_data(year):
    if not os.path.exists('temp'):
        os.makedirs('temp')
    pop_zip = 'POPTBR' + year + '.zip'
    pop_file = 'POPTBR' + year + '.csv'
    url = 'ftp://ftp.datasus.gov.br/dissemin/publicos/IBGE/POPTCU/' + pop_zip
    urllib.request.urlretrieve(url, 'temp/' + pop_zip)
    with ZipFile('temp/' + pop_zip, 'r') as zipObj:
        zipObj.extractall(path="temp/")
        
        if int(year) > 13:
            fname = 'temp/' + zipObj.namelist()[0]
            with dbf.Table(fname) as table:
                dbf.export(table, 'temp/' + pop_file)
    
    df = pd.read_csv('temp/' + pop_file)
    df = df.set_axis(['MUNCOD', 'ANO', 'POPULACAO'], axis=1, inplace=False)
    df = df[["POPULACAO", "MUNCOD"]]
    df = df[df["POPULACAO"].apply(lambda x: isinstance(x, int) or x.isnumeric())]
    df["POPULACAO"] = df["POPULACAO"].astype('int64')
    df["MUNCOD"] = df["MUNCOD"].astype('int64')
    df["MUNCOD"] = [int(str(x)[:6]) for x in df['MUNCOD']]
    
    df.rename(columns={"POPULACAO": "POP_" + year}, inplace=True)
    return df

In [5]:
all_years = pd.DataFrame()
years = ["08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18"]
for year in years:
    df = get_pop_data(year)
    if "MUNCOD" in all_years:
        all_years = pd.merge(df, all_years, left_on="MUNCOD",right_on="MUNCOD")
    else:
        all_years = df
all_years

Unnamed: 0,POP_18,MUNCOD,POP_17,POP_16,POP_15,POP_14,POP_13,POP_12,POP_11,POP_10,POP_09,POP_08
0,23167,110001,25437,25506,25578,25652,25728,24069,24228,24422,24354,24577
1,106168,110002,107345,105896,104401,102860,101269,92747,91570,90354,85541,84581
2,5438,110003,6224,6289,6355,6424,6495,6132,6221,6309,6695,6777
3,84813,110004,88507,87877,87226,86556,85863,79330,78959,78601,78675,78263
4,16444,110005,17934,17959,17986,18013,18041,16852,16939,17030,16622,16784
...,...,...,...,...,...,...,...,...,...,...,...,...
5560,13746,522200,13675,13567,13456,13343,13227,12737,12644,12549,12831,12699
5561,8611,522205,8397,8286,8171,8053,7933,7576,7476,7371,6093,6091
5562,6026,522220,5731,5615,5495,5371,5246,4954,4847,4742,4578,4461
5563,5758,522230,5690,5635,5578,5520,5460,5244,5196,5145,5359,5282


Saving it to a .csv file

In [6]:
all_years.to_csv(root + 'CSV/Population/population_08_18.csv')

Deleting all temporary files

In [7]:
files = glob.glob('temp/*')
for f in files:
    os.remove(f)

## Part II: calculate rates

In [8]:
population = pd.read_csv(root + 'CSV/Population/population_08_18.csv', index_col=[0])
suicides = pd.read_csv(root + 'CSV/Suicide/suicide_count_08_18.csv', index_col=[0])
df = pd.merge(population, suicides, left_on="MUNCOD", right_on="MUNCOD")
years = [str(x).zfill(2) for x in range(8,19)]
for year in years:
    df['RATE_' + year] = df['20' + year]/df['POP_' + year] * 100000
    df = df.drop(['20' + year, 'POP_' + year], axis=1)
df.head()

Unnamed: 0,MUNCOD,RATE_08,RATE_09,RATE_10,RATE_11,RATE_12,RATE_13,RATE_14,RATE_15,RATE_16,RATE_17,RATE_18
0,110001,20.344224,8.212203,8.189337,4.127456,12.464166,7.773632,3.898332,11.728829,7.841292,11.793844,4.316485
1,110002,9.458389,2.33806,4.427031,4.368243,9.703818,1.974938,4.860976,4.789226,5.665936,9.315758,1.883807
2,110003,0.0,14.93652,0.0,0.0,0.0,15.396459,0.0,31.471282,15.900779,0.0,18.389114
3,110004,5.110972,7.626311,2.544497,1.26648,5.042229,1.164646,5.776607,6.878683,10.241588,5.649271,9.432516
4,110005,0.0,0.0,11.743981,0.0,11.868028,0.0,0.0,11.11976,11.136478,5.576001,6.081245


Export to csv

In [9]:
df.to_csv(root + 'CSV/Suicide/suicide_rates_08_18.csv')