### Mapping Bogotá - Data Analysis

*24 de Noviembre de 2021*

*Nicolás Tibatá*

In [1]:
import json
import pandas as pd
import plotly.express as px # pip install plotly
import numpy as np

In [2]:
# I dont want to make my jupyter notebook, so...
import plotly.io as pio
pio.renderers.default = 'chrome'

In [3]:
localidades_b = json.load(open('poligonos-localidad-2.geojson', 'r'))
# Get it here: https://bogota-laburbano.opendatasoft.com/explore/dataset/poligonos-localidades/export/?flg=es

In [4]:
df = pd.read_csv('df_maps.csv')

In [5]:
df['Promedio'] = df['Precio'].groupby(df['Localidad']).transform('mean')
# Whit this line we can append the mean of the price grouping by locality

In [6]:
df['Promedio']

0       1.242554e+09
1       7.383502e+08
2       5.532981e+08
3       7.383502e+08
4       5.532981e+08
            ...     
3392    5.178020e+08
3393    7.695618e+08
3394    4.576452e+08
3395    7.695618e+08
3396    3.589825e+08
Name: Promedio, Length: 3397, dtype: float64

We have to change the locality names. To match the geojson file

In [7]:
df['Localidad'] = df['Localidad'].str.upper()
df['Localidad'] = [s.replace('Á', 'A') for s in df['Localidad']]
df['Localidad'] = [s.replace('É', 'E') for s in df['Localidad']]
df['Localidad'] = [s.replace('Í', 'I') for s in df['Localidad']]
df['Localidad'] = [s.replace('Ó', 'O') for s in df['Localidad']]

In [8]:
df['Localidad'] = df['Localidad'].str.strip() # To eliminate the first blank space of a string
df['Localidad']

0            CHAPINERO
1              USAQUEN
2                 SUBA
3              USAQUEN
4                 SUBA
             ...      
3392          FONTIBON
3393       TEUSAQUILLO
3394    ANTONIO NARIÑO
3395       TEUSAQUILLO
3396           KENNEDY
Name: Localidad, Length: 3397, dtype: object

In [9]:
df.columns

Index(['Unnamed: 0', 'Precio', 'Metros Cuadrados', 'Habitaciones', 'Baños',
       'Parqueadero', 'Estrato', 'Localidad', 'Barrio', 'Contacto',
       'Robo_Personas', 'Homicidios', 'Promedio'],
      dtype='object')

In [14]:
maps_final = df.drop(['Unnamed: 0', 'Precio', 'Metros Cuadrados', 'Habitaciones', 'Baños',
       'Parqueadero', 'Estrato', 'Barrio', 'Contacto'], axis = 1)

In [15]:
maps_final

Unnamed: 0,Localidad,Robo_Personas,Homicidios,Promedio
0,CHAPINERO,5578,6,1.242554e+09
1,USAQUEN,5376,29,7.383502e+08
2,SUBA,7567,43,5.532981e+08
3,USAQUEN,5376,29,7.383502e+08
4,SUBA,7567,43,5.532981e+08
...,...,...,...,...
3392,FONTIBON,4739,21,5.178020e+08
3393,TEUSAQUILLO,3506,9,7.695618e+08
3394,ANTONIO NARIÑO,1788,11,4.576452e+08
3395,TEUSAQUILLO,3506,9,7.695618e+08


In [16]:
maps_final1 = maps_final.drop_duplicates(['Localidad'])

In [17]:
maps_final1

Unnamed: 0,Localidad,Robo_Personas,Homicidios,Promedio
0,CHAPINERO,5578,6,1242554000.0
1,USAQUEN,5376,29,738350200.0
2,SUBA,7567,43,553298100.0
86,BARRIOS UNIDOS,2975,10,1026022000.0
277,ENGATIVA,8151,32,458086700.0
803,TEUSAQUILLO,3506,9,769561800.0
827,SANTA FE,3255,36,489554300.0
853,PUENTE ARANDA,3778,16,491002000.0
1047,LOS MARTIRES,2849,54,559323800.0
1303,FONTIBON,4739,21,517802000.0


-----

#### Featuring the Geojson File

In [18]:
localidades_b['features'][0]['properties'] # We can see the locality. So lets append it
localidades_b['features'][1]['properties'] 
# The important information here is 'Identificador unico de la localidad'

{'identificador_unico_de_la_localidad': '10',
 'area_de_la_localidad': '35880967,349154',
 'acto_administrativo_de_la_localidad': 'Acuerdo 8 de 1977',
 'geo_point_2d': [4.701028978459081, -74.11320818503745],
 'nombre_de_la_localidad': 'ENGATIVA'}

There's a problem. We need the 'id' on a int format, not a str. So let´s change it.

In [19]:
for i in range(20): # There are 20 locality's in Bogotá
    localidades_b['features'][i]['properties']['identificador_unico_de_la_localidad'] = int(localidades_b['features'][i]['properties']['identificador_unico_de_la_localidad'])

In [20]:
localidades_b['features'][1]['properties'] # id as int

{'identificador_unico_de_la_localidad': 10,
 'area_de_la_localidad': '35880967,349154',
 'acto_administrativo_de_la_localidad': 'Acuerdo 8 de 1977',
 'geo_point_2d': [4.701028978459081, -74.11320818503745],
 'nombre_de_la_localidad': 'ENGATIVA'}

In [21]:
localidad_id_map = {}
for feature in localidades_b['features']:
    feature['id'] = feature['properties']['identificador_unico_de_la_localidad']
    localidad_id_map[feature['properties']['nombre_de_la_localidad']] = feature['id']

In [22]:
localidad_id_map # We have our dictionary with the locality's. 

{'CANDELARIA': 17,
 'ENGATIVA': 10,
 'PUENTE ARANDA': 16,
 'CHAPINERO': 2,
 'ANTONIO NARIÑO': 15,
 'BARRIOS UNIDOS': 12,
 'LOS MARTIRES': 14,
 'CIUDAD BOLIVAR': 19,
 'SAN CRISTOBAL': 4,
 'USAQUEN': 1,
 'RAFAEL URIBE URIBE': 18,
 'USME': 5,
 'SANTA FE': 3,
 'BOSA': 7,
 'TUNJUELITO': 6,
 'SUBA': 11,
 'KENNEDY': 8,
 'TEUSAQUILLO': 13,
 'SUMAPAZ': 20,
 'FONTIBON': 9}

In [23]:
localidad_id_map.pop('SUMAPAZ')

20

In [24]:
localidad_id_map

{'CANDELARIA': 17,
 'ENGATIVA': 10,
 'PUENTE ARANDA': 16,
 'CHAPINERO': 2,
 'ANTONIO NARIÑO': 15,
 'BARRIOS UNIDOS': 12,
 'LOS MARTIRES': 14,
 'CIUDAD BOLIVAR': 19,
 'SAN CRISTOBAL': 4,
 'USAQUEN': 1,
 'RAFAEL URIBE URIBE': 18,
 'USME': 5,
 'SANTA FE': 3,
 'BOSA': 7,
 'TUNJUELITO': 6,
 'SUBA': 11,
 'KENNEDY': 8,
 'TEUSAQUILLO': 13,
 'FONTIBON': 9}

-----

In [25]:
maps_final1['id'] = maps_final1['Localidad'].apply(lambda x: localidad_id_map[x])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [51]:
maps_final1['PromedioScale'] = np.log10(maps_final1['Promedio'])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [26]:
maps_final1

Unnamed: 0,Localidad,Robo_Personas,Homicidios,Promedio,id
0,CHAPINERO,5578,6,1242554000.0,2
1,USAQUEN,5376,29,738350200.0,1
2,SUBA,7567,43,553298100.0,11
86,BARRIOS UNIDOS,2975,10,1026022000.0,12
277,ENGATIVA,8151,32,458086700.0,10
803,TEUSAQUILLO,3506,9,769561800.0,13
827,SANTA FE,3255,36,489554300.0,3
853,PUENTE ARANDA,3778,16,491002000.0,16
1047,LOS MARTIRES,2849,54,559323800.0,14
1303,FONTIBON,4739,21,517802000.0,9


In [27]:
maps_final1.to_csv('df_maps_final.csv')