To take advantage of the model, we want to predict noise in locations without noise sensors.

In [1]:
import pandas as pd
import json
from geopy import distance

In [2]:
interim_path = '../data/interim/'

## Read Locations of New Points

In [6]:
df_locations = pd.read_csv("../data/otros/Nuevas_Localizaciones.csv",sep=",")

In [7]:
df_locations.head()

Unnamed: 0,X,Y,objectid,numero_estacion,nombre_estacion,coordenada_x_estacion,coordenada_y_estacion,ubicacion_estacion,troncal_estacion,numero_vagones_estacion,numero_accesos_estacion,biciestacion_estacion,capacidad_biciestacion_estacion,tipo_estacion,biciparqueadero_estacion,latitud_estacion,longitud_estacion,globalid
0,-74.056178,4.692982,1,2205,Calle 106,1002370.0,1010682.0,Calle 106,Autonorte,4,1,0,0,4,0,4.692802,-74.056147,{D804EEAA-1DFA-41B9-8EA7-4C5C728E7AC1}
1,-74.046035,4.754621,2,2000,Portal del Norte,1003499.0,1017502.0,Calle 173,Autonorte,0,0,0,0,1,0,4.754472,-74.045965,{182C5CE1-0696-47CA-8EB8-40E45A851D42}
2,-74.079866,4.630663,3,7108,AV. El Dorado,999749.1,1003767.0,Kr 30 Av 28,NQS,1,1,0,0,4,0,4.630262,-74.079769,{D51603ED-7B42-48F9-A729-98A88E48C3CA}
3,-74.055298,4.698125,4,2204,Pepe Sierra,1002463.0,1011242.0,Calle 116,Autonorte,2,1,0,0,4,0,4.697859,-74.055313,{43A41E08-B32C-458F-B86F-76D19888C696}
4,-74.049218,4.734585,5,2103,Mazurén,1003166.0,1015401.0,Calle 152,Autonorte,5,1,0,0,4,0,4.735471,-74.04897,{893AC347-B878-48AD-A615-F94C152AFF8C}


In [8]:
cais_SDA = df_locations[["nombre_estacion",'latitud_estacion','longitud_estacion']].copy()

In [14]:
cais_SDA.columns = ['Estación','latitud','longitud']

In [15]:
cais_SDA

Unnamed: 0,Estación,latitud,longitud
0,Calle 106,4.692802,-74.056147
1,Portal del Norte,4.754472,-74.045965
2,AV. El Dorado,4.630262,-74.079769
3,Pepe Sierra,4.697859,-74.055313
4,Mazurén,4.735471,-74.048970
...,...,...,...
144,Puente Aranda,4.625567,-74.104614
145,San Victorino,4.601774,-74.076854
146,Ciudad Jardín,4.581217,-74.090428
147,Policarpa,4.586540,-74.086550


In [16]:
cais_SDA['lat_lon'] = list(zip(cais_SDA.latitud, cais_SDA.longitud))

In [17]:
cais_SDA.head()

Unnamed: 0,Estación,latitud,longitud,lat_lon
0,Calle 106,4.692802,-74.056147,"(4.69280155, -74.05614686)"
1,Portal del Norte,4.754472,-74.045965,"(4.75447203, -74.04596522)"
2,AV. El Dorado,4.630262,-74.079769,"(4.63026213, -74.07976893)"
3,Pepe Sierra,4.697859,-74.055313,"(4.69785859, -74.05531281)"
4,Mazurén,4.735471,-74.04897,"(4.73547088, -74.04897041)"


## Read Bares y Restaurantes

In [18]:
bares_data = json.load(open("../data/otros/bares/bares.geojson"))

In [19]:
bares_df = pd.json_normalize(bares_data['features'])

In [20]:
bares_df.drop(["type","geometry.type","geometry.coordinates"],axis=1,inplace=True)

In [21]:
bares_df.columns = bares_df.columns.str.replace("properties.", "")

In [22]:
bares_df.head()

Unnamed: 0,SUBCATEGOR,NOMBRE_EST,DIRECCION,LOC,SECTOR_CAT,LATITUD,LONGITUD
0,L,CHIBCHOMBIA,CL 27 # 4 - 49 P 1,3,LA MACARENA,4.613799,-74.066259
1,L,RESTAURANTE ROMULO Y REMO MACARENA,KR 4A # 26D - 90,3,LA MACARENA,4.613799,-74.066259
2,L,BOGOTA BEER COMPANY S A,KR 4A # 27 - 3,3,LA MACARENA,4.614092,-74.06589
3,L,PRESTO BTA CALLE 27,KR 7 # 27 - 38 LC 1 ED COLISSEUM,3,SAN DIEGO,4.614158,-74.069222
4,L,LA HAMBURGUESERIA DE LA MACARENA,KR 4A # 27 - 27,3,LA MACARENA,4.614193,-74.06644


In [23]:
bares = bares_df[["NOMBRE_EST","LATITUD","LONGITUD"]].copy()

In [24]:
bares["lat_lon"] = list(zip(bares.LATITUD, bares.LONGITUD))

### Calcular Bares y Restaurantes cercanos en 500 metros

In [25]:
def conteo_en_radio(cais_SDA,establecimientos_df,radio,nombre_var):
    
    for cai in cais_SDA.Estación :
        print(cai)
        cai_lat_lon = cais_SDA[cais_SDA.Estación==cai]['lat_lon']
        temp_df = establecimientos_df.copy()
        temp_df["distancia"] = temp_df.lat_lon.apply(lambda x: distance.distance(cai_lat_lon, x).km)
        cais_SDA.loc[cais_SDA.Estación==cai,nombre_var]=len(temp_df.sort_values("distancia").query("distancia<"+str(radio)).lat_lon)

    return cais_SDA

In [26]:
cais_SDA = conteo_en_radio(cais_SDA,bares,0.5,"conteo_bares_500m")
cais_SDA = conteo_en_radio(cais_SDA,bares,1,"conteo_bares_1km")
cais_SDA = conteo_en_radio(cais_SDA,bares,2,"conteo_bares_2km")

Calle 106
Portal del Norte
AV. El Dorado
Pepe Sierra
Mazurén
Carrera 47
Santa Isabel
Calle 127
General Santander
AV. Cali
Calle 85
Calle 142
Venecia
U. Nacional
CAD
Virrey
Carrera 53
Carrera 90
NQS - Calle 75
Simón Bolívar
Portal de la 80
Marly
Calle 63
SENA
Portal del Sur
Ricaurte - NQS
Alcalá
Calle 72
Calle 146
AV. 68
Prado
Banderas
Minuto de Dios
La Campiña
Calle 187
Terminal
Calle 45
Paloquemao
Salitre - El Greco
Olaya
Portal del Tunal
Héroes
Las Aguas
Museo del Oro
Polo
Calle 76
Calle 34
Alquería
Nariño
Puentelargo
Calle 100
Biblioteca Tintal
Suba - TV. 91
Calle 40 S
Madelena
Flores
Calle 57
Modelia
Calle 26
Perdomo
AV. Chile
Gobernación
Restrepo
El Tiempo - Maloka
Hortúa
CDS - Carrera 32
AV. Jiménez - CL 13
Patio Bonito
Suba - Calle 116
Normandía
Granja - Carrera 77
Portal de las Américas
Recinto Ferial
Quinta Paredes
NQS - Calle 30 S
Suba - AV. Boyacá
Escuela Militar
Zona Industrial
Santa Lucía
La Despensa
Toberín
San Martín
La Castellana
Comuneros
Calle 161
Campín
CAN
Portal de

In [27]:
cais_SDA.sort_values("conteo_bares_500m",ascending=False).head(10)

Unnamed: 0,Estación,latitud,longitud,lat_lon,conteo_bares_500m,conteo_bares_1km,conteo_bares_2km
137,Museo Nacional,4.6141,-74.069622,"(4.61410007, -74.06962209999999)",25.0,42.0,84.0
86,CAN,4.646719,-74.098932,"(4.64671936, -74.09893248)",22.0,22.0,36.0
38,Salitre - El Greco,4.651303,-74.101859,"(4.65130342, -74.10185856)",21.0,34.0,36.0
34,Calle 187,4.761784,-74.044723,"(4.76178408, -74.04472315)",20.0,21.0,21.0
43,Museo del Oro,4.601184,-74.072958,"(4.60118385, -74.07295785)",20.0,36.0,63.0
130,NQS - Calle 38A S,4.593988,-74.122942,"(4.59398803, -74.12294152)",17.0,17.0,19.0
58,Calle 26,4.616171,-74.072413,"(4.61617136, -74.07241326)",14.0,41.0,83.0
42,Las Aguas,4.60267,-74.068178,"(4.60266984, -74.06817769)",13.0,35.0,74.0
46,Calle 34,4.622213,-74.069588,"(4.62221301, -74.06958764)",13.0,35.0,61.0
37,Paloquemao,4.617995,-74.088589,"(4.61799507, -74.08858928)",13.0,13.0,14.0


## Read Colegios

In [28]:
colegios_data = pd.read_excel("../data/otros/colegios/colegios.xlsx")

In [29]:
colegios = colegios_data[['DANE12_SEDE_EDUCATIVA','NOMBRE_SEDE_EDUCATIVA','X','Y']].copy()

In [30]:
colegios["lat_lon"] = list(zip(colegios.Y, colegios.X))

### Calcular Colegios cercanos en 500 metros

In [31]:
cais_SDA = conteo_en_radio(cais_SDA,colegios,0.5,"conteo_colegios_500m")
cais_SDA = conteo_en_radio(cais_SDA,colegios,1,"conteo_colegios_1km")
cais_SDA = conteo_en_radio(cais_SDA,colegios,2,"conteo_colegios_2km")

Calle 106
Portal del Norte
AV. El Dorado
Pepe Sierra
Mazurén
Carrera 47
Santa Isabel
Calle 127
General Santander
AV. Cali
Calle 85
Calle 142
Venecia
U. Nacional
CAD
Virrey
Carrera 53
Carrera 90
NQS - Calle 75
Simón Bolívar
Portal de la 80
Marly
Calle 63
SENA
Portal del Sur
Ricaurte - NQS
Alcalá
Calle 72
Calle 146
AV. 68
Prado
Banderas
Minuto de Dios
La Campiña
Calle 187
Terminal
Calle 45
Paloquemao
Salitre - El Greco
Olaya
Portal del Tunal
Héroes
Las Aguas
Museo del Oro
Polo
Calle 76
Calle 34
Alquería
Nariño
Puentelargo
Calle 100
Biblioteca Tintal
Suba - TV. 91
Calle 40 S
Madelena
Flores
Calle 57
Modelia
Calle 26
Perdomo
AV. Chile
Gobernación
Restrepo
El Tiempo - Maloka
Hortúa
CDS - Carrera 32
AV. Jiménez - CL 13
Patio Bonito
Suba - Calle 116
Normandía
Granja - Carrera 77
Portal de las Américas
Recinto Ferial
Quinta Paredes
NQS - Calle 30 S
Suba - AV. Boyacá
Escuela Militar
Zona Industrial
Santa Lucía
La Despensa
Toberín
San Martín
La Castellana
Comuneros
Calle 161
Campín
CAN
Portal de

In [32]:
cais_SDA.head()

Unnamed: 0,Estación,latitud,longitud,lat_lon,conteo_bares_500m,conteo_bares_1km,conteo_bares_2km,conteo_colegios_500m,conteo_colegios_1km,conteo_colegios_2km
0,Calle 106,4.692802,-74.056147,"(4.69280155, -74.05614686)",0.0,6.0,83.0,1.0,8.0,41.0
1,Portal del Norte,4.754472,-74.045965,"(4.75447203, -74.04596522)",1.0,21.0,21.0,6.0,18.0,85.0
2,AV. El Dorado,4.630262,-74.079769,"(4.63026213, -74.07976893)",0.0,2.0,48.0,9.0,24.0,79.0
3,Pepe Sierra,4.697859,-74.055313,"(4.69785859, -74.05531281)",0.0,18.0,61.0,1.0,7.0,40.0
4,Mazurén,4.735471,-74.04897,"(4.73547088, -74.04897041)",0.0,2.0,3.0,4.0,18.0,68.0


## Read Hoteles

In [33]:
hoteles_data = json.load(open("../data/otros/hoteles/hoteles.geojson",encoding='utf-8'))

In [34]:
hoteles_df = pd.json_normalize(hoteles_data['features'])
hoteles_df.drop(["type","geometry.type","geometry.coordinates"],axis=1,inplace=True)
hoteles_df.columns = hoteles_df.columns.str.replace("properties.", "")

In [35]:
hoteles_df.head()

Unnamed: 0,OBJECTID,NOMBRE_EST,DIRECCION,SECTOR_CAT,TELEFONO_F,LATITUD,LONGITUD,SUBCATEGOR,LOCALIDAD,RNT,TELEFONO_M
0,1,HOSPEDAJE A MARTE,Calle 88 B Sur # 0- 78 ESTE,CHAPINERITO,3209969491.0,4.503119,-74.104165,F,5,69652,3228535930
1,2,HOSPEDAJE LA ESQUINA LA 83,CL 88 B SUR NO. 0 96 ESTE,CHAPINERITO,3155535619.0,4.503166,-74.104005,F,5,70119,7632427
2,3,HOSTAL PARAISO R.P.,CL 71 I BIS NO. 27 B - 22 SUR,EL MIRADOR,,4.550338,-74.160098,G,19,75655,3115997250
3,4,HOTEL JERUSALEN THAILY,CR 45 73 B 40 SUR,LAS BRISAS,3107629622.0,4.569871,-74.164428,H,19,61764,7318648
4,5,HOTEL MIRADOR SUITE,TV 14 NO. 45 F 01 SUR,SANTA LUCIA,3123960180.0,4.570783,-74.124986,H,18,57062,3123960180


In [36]:
hoteles =hoteles_df[['NOMBRE_EST','LATITUD','LONGITUD']].copy()
hoteles["lat_lon"] = list(zip(hoteles.LATITUD, hoteles.LONGITUD))

### Calcular hoteles cercanos

In [37]:
cais_SDA = conteo_en_radio(cais_SDA,hoteles,0.5,"conteo_hoteles_500m")
cais_SDA = conteo_en_radio(cais_SDA,hoteles,1,"conteo_hoteles_1km")
cais_SDA = conteo_en_radio(cais_SDA,hoteles,2,"conteo_hoteles_2km")

Calle 106
Portal del Norte
AV. El Dorado
Pepe Sierra
Mazurén
Carrera 47
Santa Isabel
Calle 127
General Santander
AV. Cali
Calle 85
Calle 142
Venecia
U. Nacional
CAD
Virrey
Carrera 53
Carrera 90
NQS - Calle 75
Simón Bolívar
Portal de la 80
Marly
Calle 63
SENA
Portal del Sur
Ricaurte - NQS
Alcalá
Calle 72
Calle 146
AV. 68
Prado
Banderas
Minuto de Dios
La Campiña
Calle 187
Terminal
Calle 45
Paloquemao
Salitre - El Greco
Olaya
Portal del Tunal
Héroes
Las Aguas
Museo del Oro
Polo
Calle 76
Calle 34
Alquería
Nariño
Puentelargo
Calle 100
Biblioteca Tintal
Suba - TV. 91
Calle 40 S
Madelena
Flores
Calle 57
Modelia
Calle 26
Perdomo
AV. Chile
Gobernación
Restrepo
El Tiempo - Maloka
Hortúa
CDS - Carrera 32
AV. Jiménez - CL 13
Patio Bonito
Suba - Calle 116
Normandía
Granja - Carrera 77
Portal de las Américas
Recinto Ferial
Quinta Paredes
NQS - Calle 30 S
Suba - AV. Boyacá
Escuela Militar
Zona Industrial
Santa Lucía
La Despensa
Toberín
San Martín
La Castellana
Comuneros
Calle 161
Campín
CAN
Portal de

In [38]:
cais_SDA.head()

Unnamed: 0,Estación,latitud,longitud,lat_lon,conteo_bares_500m,conteo_bares_1km,conteo_bares_2km,conteo_colegios_500m,conteo_colegios_1km,conteo_colegios_2km,conteo_hoteles_500m,conteo_hoteles_1km,conteo_hoteles_2km
0,Calle 106,4.692802,-74.056147,"(4.69280155, -74.05614686)",0.0,6.0,83.0,1.0,8.0,41.0,2.0,24.0,89.0
1,Portal del Norte,4.754472,-74.045965,"(4.75447203, -74.04596522)",1.0,21.0,21.0,6.0,18.0,85.0,0.0,0.0,2.0
2,AV. El Dorado,4.630262,-74.079769,"(4.63026213, -74.07976893)",0.0,2.0,48.0,9.0,24.0,79.0,6.0,46.0,217.0
3,Pepe Sierra,4.697859,-74.055313,"(4.69785859, -74.05531281)",0.0,18.0,61.0,1.0,7.0,40.0,1.0,16.0,72.0
4,Mazurén,4.735471,-74.04897,"(4.73547088, -74.04897041)",0.0,2.0,3.0,4.0,18.0,68.0,0.0,1.0,1.0


## Read IPS

In [39]:
IPS_df = pd.read_excel("../data/otros/ips/ipsbogota_Geocode.xlsx")

In [40]:
IPS = IPS_df[['USER_SEDE','Y','X']].copy()
IPS["lat_lon"] = list(zip(IPS.Y, IPS.X))

In [41]:
IPS.head()

Unnamed: 0,USER_SEDE,Y,X,lat_lon
0,Optica GMO Colombia SAS,4.763237,-74.045181,"(4.763237003525945, -74.04518135908292)"
1,Opticas GMO Colombia S.A.S,4.645716,-74.064123,"(4.645715767818375, -74.06412271506343)"
2,Opticas GMO Colombia S.A.S.,4.652687,-74.109152,"(4.652686954779876, -74.10915172384549)"
3,Opticas GMO Colombia S.A.S.,4.652687,-74.109152,"(4.652686954779876, -74.10915172384549)"
4,Opticas GMO Colombia S.A.S.,4.656178,-74.057486,"(4.656177849814369, -74.0574863432243)"


### Calcular IPS cercanas en 500 m

In [42]:
cais_SDA = conteo_en_radio(cais_SDA,IPS,0.5,"conteo_ips_500m")
cais_SDA = conteo_en_radio(cais_SDA,IPS,1,"conteo_ips_1km")
cais_SDA = conteo_en_radio(cais_SDA,IPS,2,"conteo_ips_2km")

Calle 106
Portal del Norte
AV. El Dorado
Pepe Sierra
Mazurén
Carrera 47
Santa Isabel
Calle 127
General Santander
AV. Cali
Calle 85
Calle 142
Venecia
U. Nacional
CAD
Virrey
Carrera 53
Carrera 90
NQS - Calle 75
Simón Bolívar
Portal de la 80
Marly
Calle 63
SENA
Portal del Sur
Ricaurte - NQS
Alcalá
Calle 72
Calle 146
AV. 68
Prado
Banderas
Minuto de Dios
La Campiña
Calle 187
Terminal
Calle 45
Paloquemao
Salitre - El Greco
Olaya
Portal del Tunal
Héroes
Las Aguas
Museo del Oro
Polo
Calle 76
Calle 34
Alquería
Nariño
Puentelargo
Calle 100
Biblioteca Tintal
Suba - TV. 91
Calle 40 S
Madelena
Flores
Calle 57
Modelia
Calle 26
Perdomo
AV. Chile
Gobernación
Restrepo
El Tiempo - Maloka
Hortúa
CDS - Carrera 32
AV. Jiménez - CL 13
Patio Bonito
Suba - Calle 116
Normandía
Granja - Carrera 77
Portal de las Américas
Recinto Ferial
Quinta Paredes
NQS - Calle 30 S
Suba - AV. Boyacá
Escuela Militar
Zona Industrial
Santa Lucía
La Despensa
Toberín
San Martín
La Castellana
Comuneros
Calle 161
Campín
CAN
Portal de

In [43]:
cais_SDA.head()

Unnamed: 0,Estación,latitud,longitud,lat_lon,conteo_bares_500m,conteo_bares_1km,conteo_bares_2km,conteo_colegios_500m,conteo_colegios_1km,conteo_colegios_2km,conteo_hoteles_500m,conteo_hoteles_1km,conteo_hoteles_2km,conteo_ips_500m,conteo_ips_1km,conteo_ips_2km
0,Calle 106,4.692802,-74.056147,"(4.69280155, -74.05614686)",0.0,6.0,83.0,1.0,8.0,41.0,2.0,24.0,89.0,64.0,196.0,786.0
1,Portal del Norte,4.754472,-74.045965,"(4.75447203, -74.04596522)",1.0,21.0,21.0,6.0,18.0,85.0,0.0,0.0,2.0,2.0,28.0,70.0
2,AV. El Dorado,4.630262,-74.079769,"(4.63026213, -74.07976893)",0.0,2.0,48.0,9.0,24.0,79.0,6.0,46.0,217.0,29.0,136.0,559.0
3,Pepe Sierra,4.697859,-74.055313,"(4.69785859, -74.05531281)",0.0,18.0,61.0,1.0,7.0,40.0,1.0,16.0,72.0,29.0,167.0,613.0
4,Mazurén,4.735471,-74.04897,"(4.73547088, -74.04897041)",0.0,2.0,3.0,4.0,18.0,68.0,0.0,1.0,1.0,11.0,44.0,131.0


## Read Manzanas

In [44]:
manzanas_df = pd.read_excel("../data/otros/manzanas/MGN_ANM_MANZANA.xlsx")

In [45]:
manzanas_df.head()

Unnamed: 0,OID,OBJECTID_1,COD_DANE_A,DPTO_CCDGO,MPIO_CCDGO,MPIO_CDPMP,CLAS_CCDGO,SETR_CCDGO,SETR_CCNCT,SECR_CCDGO,...,TP51POSTGR,TP51_13_ED,TP51_99_ED,CD_LC_CM,NMB_LC_CM,TP_LC_CM,Shape_Leng,DDLat,DDLon,ORIG_OID
0,1,1,1100110000000011010101,11,1,11001,1,0,110011000,0,...,0,0,7,4,SAN CRISTOBAL,Localidad,0.004176,4.585633,-74.081623,1
1,2,2,1100110000000011010102,11,1,11001,1,0,110011000,0,...,0,2,3,4,SAN CRISTOBAL,Localidad,0.005631,4.584788,-74.08081,2
2,3,3,1100110000000011010103,11,1,11001,1,0,110011000,0,...,44,5,20,4,SAN CRISTOBAL,Localidad,0.011082,4.58248,-74.081045,3
3,4,4,1100110000000011010104,11,1,11001,1,0,110011000,0,...,125,24,37,4,SAN CRISTOBAL,Localidad,0.008378,4.582606,-74.083257,4
4,5,5,1100110000000011010105,11,1,11001,1,0,110011000,0,...,0,9,2,4,SAN CRISTOBAL,Localidad,0.002777,4.582665,-74.07953,5


In [46]:
not_variable = [
    'DPTO_CCDGO', 'MPIO_CCDGO', 'MPIO_CDPMP', 'CLAS_CCDGO', 'SETR_CCDGO',
    'SETR_CCNCT', 'SECR_CCDGO', 'SECR_CCNCT', 'ZU_CCDGO', 'ZU_CDIVI',
    'SETU_CCDGO', 'SETU_CCNCT', 'SECU_CCDGO', 'SECU_CCNCT', 'MANZ_CCDGO',
    'AG_CCDGO', 'DATO_ANM', 'VERSION', 'Shape_Leng', 'DDLat', 'DDLon',
    'ORIG_OID', 'NMB_LC_CM', 'TP_LC_CM '
]

In [47]:
manzanas_var = [x for x in list(manzanas_df.columns) if x not in not_variable]

In [48]:
manzanas = manzanas_df[manzanas_var].copy()

In [49]:
manzanas["lat_lon"] = list(zip(manzanas.LATITUD, manzanas.LONGITUD))

In [50]:
manzanas_resultados = pd.DataFrame()
radio = 0.5
for cai in cais_SDA.Estación:
    cai_lat_lon = cais_SDA[cais_SDA.Estación==cai]['lat_lon']
    temp_df = manzanas.copy()
    temp_df["distancia"] = temp_df.lat_lon.apply(lambda x: distance.distance(cai_lat_lon, x).km)
    suma_manzanas = temp_df.sort_values("distancia").query("distancia<"+str(radio)).sum()
    suma_manzanas['Estación']=cai
    manzanas_resultados = manzanas_resultados.append(suma_manzanas,ignore_index=True)

In [51]:
manzanas_resultados.head()

Unnamed: 0,AREA,CD_LC_CM,COD_DANE_A,CTNENCUEST,DENSIDAD,Estación,LATITUD,LONGITUD,OBJECTID_1,OID,...,TP9_3_7_NO,TP9_3_8_NO,TP9_3_99_N,TP9_3_9_NO,TP9_3_USO,TP9_4_USO,TP_LC_CM,TVIVIENDA,distancia,lat_lon
0,491145.930687,1101111101011101111111011111011111111101010111...,1100110000000053030204110011000000008412010311...,5101.0,1.139643,Calle 106,337.866917,-5332.069414,2248837.0,2248837.0,...,10.0,0.0,0.0,0.0,286.0,0.0,LocalidadLocalidadLocalidadLocalidadLocalidadL...,4815.0,24.629162,"(4.69265806484, -74.0569639216, 4.69198902178,..."
1,473824.607477,1111111101111101111101111111010101111111110111...,1100110000000091200307110011000000009120032411...,4923.0,1.928228,Portal del Norte,266.271292,-4146.626884,2215219.0,2215219.0,...,3.0,0.0,1.0,0.0,451.0,0.0,LocalidadLocalidadLocalidadLocalidadLocalidadL...,4472.0,19.382685,"(4.75467655089, -74.0470408703, 4.75422637881,..."
2,398589.477953,1313131313131313131313131313131313131313131313...,1100110000000071070110110011000000005107030311...,5256.0,1.840561,AV. El Dorado,370.411933,-5926.374721,2513232.0,2513232.0,...,1.0,0.0,1.0,0.0,549.0,0.0,LocalidadLocalidadLocalidadLocalidadLocalidadL...,4707.0,24.935588,"(4.62978240397, -74.0794615155, 4.63030222435,..."
3,459339.258896,0111111111110111011101010101110101111111010101...,1100110000000084180201110011000000009111051111...,5238.0,1.083165,Pepe Sierra,225.479639,-3554.650018,1750300.0,1750300.0,...,3.0,0.0,1.0,0.0,263.0,0.0,LocalidadLocalidadLocalidadLocalidadLocalidadL...,4975.0,17.094689,"(4.69842223182, -74.0544252941, 4.69883154154,..."
4,483077.760558,1101011111110101010101110101011111110101010111...,1100110000000091100101110011000000008530011311...,4179.0,0.787955,Mazurén,175.17944,-2739.782557,1428505.0,1428505.0,...,8.0,0.0,0.0,0.0,261.0,0.0,LocalidadLocalidadLocalidadLocalidadLocalidadL...,3918.0,13.813223,"(4.73591939628, -74.0497988363, 4.73406802014,..."


In [52]:
manzanas_resultados.drop(['CD_LC_CM','COD_DANE_A','TP_LC_CM','distancia','lat_lon','LATITUD','LONGITUD'],axis=1,inplace=True)

## Final Merge

In [53]:
final_df = cais_SDA.merge(manzanas_resultados,how='left',on='Estación')

In [54]:
final_df.head()

Unnamed: 0,Estación,latitud,longitud,lat_lon,conteo_bares_500m,conteo_bares_1km,conteo_bares_2km,conteo_colegios_500m,conteo_colegios_1km,conteo_colegios_2km,...,TP9_3_4_NO,TP9_3_5_NO,TP9_3_6_NO,TP9_3_7_NO,TP9_3_8_NO,TP9_3_99_N,TP9_3_9_NO,TP9_3_USO,TP9_4_USO,TVIVIENDA
0,Calle 106,4.692802,-74.056147,"(4.69280155, -74.05614686)",0.0,6.0,83.0,1.0,8.0,41.0,...,0.0,8.0,0.0,10.0,0.0,0.0,0.0,286.0,0.0,4815.0
1,Portal del Norte,4.754472,-74.045965,"(4.75447203, -74.04596522)",1.0,21.0,21.0,6.0,18.0,85.0,...,1.0,18.0,3.0,3.0,0.0,1.0,0.0,451.0,0.0,4472.0
2,AV. El Dorado,4.630262,-74.079769,"(4.63026213, -74.07976893)",0.0,2.0,48.0,9.0,24.0,79.0,...,0.0,46.0,3.0,1.0,0.0,1.0,0.0,549.0,0.0,4707.0
3,Pepe Sierra,4.697859,-74.055313,"(4.69785859, -74.05531281)",0.0,18.0,61.0,1.0,7.0,40.0,...,0.0,3.0,1.0,3.0,0.0,1.0,0.0,263.0,0.0,4975.0
4,Mazurén,4.735471,-74.04897,"(4.73547088, -74.04897041)",0.0,2.0,3.0,4.0,18.0,68.0,...,0.0,15.0,2.0,8.0,0.0,0.0,0.0,261.0,0.0,3918.0


In [55]:
final_df.tail()

Unnamed: 0,Estación,latitud,longitud,lat_lon,conteo_bares_500m,conteo_bares_1km,conteo_bares_2km,conteo_colegios_500m,conteo_colegios_1km,conteo_colegios_2km,...,TP9_3_4_NO,TP9_3_5_NO,TP9_3_6_NO,TP9_3_7_NO,TP9_3_8_NO,TP9_3_99_N,TP9_3_9_NO,TP9_3_USO,TP9_4_USO,TVIVIENDA
144,Puente Aranda,4.625567,-74.104614,"(4.62556721, -74.10461377)",0.0,0.0,2.0,0.0,6.0,49.0,...,0.0,1.0,2.0,5.0,0.0,0.0,0.0,309.0,0.0,453.0
145,San Victorino,4.601774,-74.076854,"(4.60177387, -74.07685397)",10.0,30.0,61.0,0.0,20.0,66.0,...,0.0,40.0,5.0,5.0,0.0,6.0,0.0,1968.0,0.0,1737.0
146,Ciudad Jardín,4.581217,-74.090428,"(4.58121677, -74.09042752)",0.0,0.0,1.0,10.0,51.0,143.0,...,0.0,21.0,14.0,12.0,0.0,3.0,0.0,280.0,0.0,5043.0
147,Policarpa,4.58654,-74.08655,"(4.58653979, -74.08655036)",0.0,1.0,13.0,2.0,30.0,136.0,...,2.0,32.0,19.0,6.0,0.0,3.0,0.0,469.0,0.0,5819.0
148,Bosa,4.596937,-74.179184,"(4.59693732, -74.17918435)",1.0,1.0,1.0,2.0,20.0,94.0,...,0.0,10.0,14.0,2.0,0.0,1.0,6.0,416.0,0.0,4691.0


In [56]:
final_df.to_csv('../data/interim/nuevas_variables_nuevas_estaciones.csv',index=False)