In [2]:
import pandas as pd

# GRAPH 1 - MAP

In [2]:

data = pd.read_csv('../data/147_desratizacion.csv', delimiter=';')
df_base = data[['domicilio_barrio', 'fecha_ingreso']]
# if the month is 11 drop it
df_base = df_base[df_base['fecha_ingreso'].str.contains('/11/') == False]
# make a df that has the domicilio_barrio and other column with quantity of appearances on df_base
df = df_base.groupby('domicilio_barrio').size().reset_index(name='counts')
others_count = df.loc[df['counts'] < 78, 'counts'].sum()

df['domicilio_barrio'] = df['domicilio_barrio'].apply(lambda x: x.upper())

# save df to ratas_clean.csv
df.to_csv('../data/map_ratas.csv', index=False)

# GRAPH 2 - RANKING

In [3]:
# get the ranking barrios with the most complaints. make a new df with columns of the months. the rows will be domicilio_barrio. the values of the cells will be the number of complaints for that month
# read in the data
df = df_base.copy()

# convert fecha_ingreso to a datetime object
df['fecha_ingreso'] = pd.to_datetime(df['fecha_ingreso'], format='%d/%m/%Y')

# extract the month from the date and create a new column
df['month'] = df['fecha_ingreso'].dt.month

# group the data by domicilio_barrio and month, and count the number of complaints
complaints_by_barrio_month = df.groupby(['domicilio_barrio', 'month']).size().reset_index(name='complaints')
# drop month 11
complaints_by_barrio_month = complaints_by_barrio_month[complaints_by_barrio_month['month'] != 11]

# create a pivot table to transform the data into the desired format
complaints_by_barrio_month_pivot = complaints_by_barrio_month.pivot(index='domicilio_barrio', columns='month', values='complaints')

# add a column to sum the total number of complaints for each barrio
complaints_by_barrio_month_pivot['total_complaints'] = complaints_by_barrio_month_pivot.sum(axis=1).astype(int)

# get the ranking of barrios with the most complaints
ranking_barrios = complaints_by_barrio_month_pivot.sort_values(by='total_complaints', ascending=False)

# drop the total_complaints column
ranking_barrios = ranking_barrios.drop(columns=['total_complaints'])

# replace month 1 to january, 2 to february, etc.
ranking_barrios = ranking_barrios.rename(columns={1: 'January', 2: 'February', 3: 'March', 4: 'April', 5: 'May'})

# replace NaN values with 0
ranking_barrios = ranking_barrios.fillna(0)
# make all values from each month int
ranking_barrios = ranking_barrios.astype(int)

#get only the top 5 barrios
ranking_barrios = ranking_barrios.head(5)

# print(ranking_barrios)
# save the ranking
ranking_barrios.to_csv('../data/ranking_barrios.csv')


# GRAPH 3 - DONUT


In [5]:
# read ../data/rata_clean_total.csv
df = pd.read_csv('../data/map_ratas.csv')

# add a new row to the dataframe with the sum of counts for domicilio_barrio with count less than 150
    
df.loc[len(df)] = ['Otros', others_count]

# drop the rows with count less than 78
df = df[df['counts'] >= 78]

# save df to map_ratas.csv
df.to_csv('../data/rata_clean.csv', index=False)



# map with normalized data with population


In [4]:
# get map_ratas.csv
df = pd.read_csv('../data/map_ratas.csv')

# get the population of each barrio, the data is in ../data/caba_pob_barrios_2010.csv
df_pob = pd.read_csv('../data/caba_pob_barrios_2010.csv')

# get the column POBLACION from df_pob and add it to df with its corresponding domicilio_barrio
df['pob'] = df['domicilio_barrio'].map(df_pob.set_index('BARRIO')['POBLACION'])

# calculate the ratio of complaints per 1000 people
df['ratio'] = df['counts'] / df['pob'] * 1000

# save df to map_ratas_normalized.csv
df.to_csv('../data/map_ratas_normalized.csv', index=False)




