### dev_notebook_

This is your development environment...you may start:
- Exploring your data.
- Testing the ad-hoc methods.
- Designing your pipeline.

# Leemos set de datos
El objetivo de este punto es llegar a leer los datos y transformarlos en un dataframe de pandas

In [28]:
#Import libraries
import pandas as pd
import requests

In [29]:
#Leemos el csv de bicimad estaciones
df_bicimad = pd.read_csv("../data/bicimad_stations.csv", sep='\t')
df_bicimad.head()

Unnamed: 0.1,Unnamed: 0,id,name,light,number,address,activate,no_available,total_bases,dock_bikes,free_bases,reservations_count,geometry.type,geometry.coordinates
0,0,1,1a - Puerta del Sol A,3,1a,Puerta del Sol nº 1,1,1,30,0,0,0,Point,"[-3.7018341, 40.4172137]"
1,1,2,1b - Puerta del Sol B,3,1b,Puerta del Sol nº 1,1,1,30,0,0,0,Point,"[-3.701602938060457, 40.41731271011562]"
2,2,3,2 - Miguel Moya,3,2,Calle Miguel Moya nº 1,1,1,24,0,0,0,Point,"[-3.7058415, 40.4205886]"
3,3,4,3 - Plaza Conde Suchil,2,3,Plaza del Conde del Valle de Súchil nº 3,1,0,18,9,9,0,Point,"[-3.7069171, 40.4302937]"
4,4,5,4 - Malasaña,1,4,Calle Manuela Malasaña nº 5,1,0,24,23,1,0,Point,"[-3.7025875, 40.4285524]"


In [30]:
#Leemos el csv de bicipark estaciones
df_bicipark = pd.read_csv("../data/bicipark_stations.csv", sep=';')
df_bicipark.head()

Unnamed: 0.1,Unnamed: 0,stationId,stationName,address,city,zip_code,total_places,free_places,enabled,reserved_places,geometry.type,geometry.coordinates
0,0,74,Bicipark Fuente de la Mora,Estación de Fuente de la Mora,Madrid,28050,10,9,1,0,Point,"[-3.6630679, 40.4846838]"
1,1,22,Bicipark Orense,"Avenida General Perón, 27",Madrid,28020,15,9,1,1,Point,"[-3.693897, 40.452973]"
2,2,49,Bicipark Montalban,"Calle de Montalbán, 5",Madrid,28014,16,15,1,0,Point,"[-3.690786, 40.418146]"
3,3,12,Bicipark Almagro,"Calle Almagro, 11",Madrid,28010,16,12,1,0,Point,"[-3.692879, 40.430361]"
4,4,9,Bicipark Recoletos,"Paseo de Recoletos, 2",Madrid,28001,10,3,0,0,Point,"[-3.692245, 40.420354]"


In [31]:
#Construimos el url para coger el json de espacios deportivos
base_url = "https://datos.madrid.es/egob"
body = "/catalogo/212808-0-espacio-deporte.json"
response = requests.get(base_url + body)
print(response)

<Response [200]>


In [32]:
#Cogemos el content del json
content = response.content
json_data = response.json()

In [51]:
#Vemos las keys del json porque no construye un df partiendo del json
json_data.keys()

dict_keys(['@context', '@graph'])

In [34]:
#La información que nos interesa es la de la key de graph
df_deporte = pd.DataFrame(json_data['@graph'])

In [35]:
df_deporte.head()

Unnamed: 0,@id,@type,id,title,relation,address,location,organization
0,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,181447,Campo de Golf del Centro Nacional de Golf,http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.4855081729451, 'longitude': -3...","{'organization-desc': ' Bus: 67, 82.', 'acces..."
1,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,4670,Campo de Golf del Club de Golf Olivar de la Hi...,http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.46896569497632, 'longitude': -...","{'organization-desc': ' Bus: 112.', 'accesi..."
2,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,7381184,Centro Comercial Dreams - Palacio Municipal de...,http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.46287225446314, 'longitude': -...",{'organization-desc': ' Metro: Canillas (líne...
3,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,7081762,Centro de Alto Rendimiento del Consejo Superio...,http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.43681199436766, 'longitude': -...","{'organization-desc': ' Bus: 46, U, 160, 161...."
4,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,4844,Centro de Natación Mundial 86 (M - 86),http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.41552303421355, 'longitude': -...",{'organization-desc': ' Metro: Sainz de Baran...


# Limpieza de datos
El objetivo de este punto es llegar a tener todos los datos de la forma más limpia posible

## BiciMad

In [80]:
df_bicimad.head()

Unnamed: 0.1,Unnamed: 0,id,name,light,number,address,activate,no_available,total_bases,dock_bikes,free_bases,reservations_count,geometry.type,geometry.coordinates,Longitud,Latitud
0,0,1,1a - Puerta del Sol A,3,1a,Puerta del Sol nº 1,1,1,30,0,0,0,Point,"[-3.7018341, 40.4172137]",-3.701834,40.417214
1,1,2,1b - Puerta del Sol B,3,1b,Puerta del Sol nº 1,1,1,30,0,0,0,Point,"[-3.701602938060457, 40.41731271011562]",-3.701603,40.417313
2,2,3,2 - Miguel Moya,3,2,Calle Miguel Moya nº 1,1,1,24,0,0,0,Point,"[-3.7058415, 40.4205886]",-3.705842,40.420589
3,3,4,3 - Plaza Conde Suchil,2,3,Plaza del Conde del Valle de Súchil nº 3,1,0,18,9,9,0,Point,"[-3.7069171, 40.4302937]",-3.706917,40.430294
4,4,5,4 - Malasaña,1,4,Calle Manuela Malasaña nº 5,1,0,24,23,1,0,Point,"[-3.7025875, 40.4285524]",-3.702587,40.428552


In [81]:
df_bicimad.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 264 entries, 0 to 263
Data columns (total 16 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Unnamed: 0            264 non-null    int64  
 1   id                    264 non-null    int64  
 2   name                  264 non-null    object 
 3   light                 264 non-null    int64  
 4   number                264 non-null    object 
 5   address               264 non-null    object 
 6   activate              264 non-null    int64  
 7   no_available          264 non-null    int64  
 8   total_bases           264 non-null    int64  
 9   dock_bikes            264 non-null    int64  
 10  free_bases            264 non-null    int64  
 11  reservations_count    264 non-null    int64  
 12  geometry.type         264 non-null    object 
 13  geometry.coordinates  264 non-null    object 
 14  Longitud              264 non-null    float64
 15  Latitud               2

In [62]:
# El objetivo es separar latitud y longitud, así que obsrvamos un registro:
df_bicimad["geometry.coordinates"][0]

'[-3.7018341, 40.4172137]'

In [76]:
# La longitud es el primer elemento de la lista y la latitud el segundo:
def separator_longitud(column):
    return float(column.split(',')[0].replace('[',''))
def separator_latitud(column):
    return float(column.split(',')[1].replace(']',''))

In [77]:
# Comprobamos que funciona en un registro concreto
separator_latitud(df_bicimad["geometry.coordinates"][0])

40.4172137

In [79]:
# Aplicamos las funciones a toda la columna del geometry coordinates:
df_bicimad['Longitud'] = df_bicimad.apply(lambda x: separator_longitud(x["geometry.coordinates"]), axis=1)
df_bicimad['Latitud'] = df_bicimad.apply(lambda x: separator_latitud(x["geometry.coordinates"]), axis=1)
df_bicimad.head()

Unnamed: 0.1,Unnamed: 0,id,name,light,number,address,activate,no_available,total_bases,dock_bikes,free_bases,reservations_count,geometry.type,geometry.coordinates,Longitud,Latitud
0,0,1,1a - Puerta del Sol A,3,1a,Puerta del Sol nº 1,1,1,30,0,0,0,Point,"[-3.7018341, 40.4172137]",-3.701834,40.417214
1,1,2,1b - Puerta del Sol B,3,1b,Puerta del Sol nº 1,1,1,30,0,0,0,Point,"[-3.701602938060457, 40.41731271011562]",-3.701603,40.417313
2,2,3,2 - Miguel Moya,3,2,Calle Miguel Moya nº 1,1,1,24,0,0,0,Point,"[-3.7058415, 40.4205886]",-3.705842,40.420589
3,3,4,3 - Plaza Conde Suchil,2,3,Plaza del Conde del Valle de Súchil nº 3,1,0,18,9,9,0,Point,"[-3.7069171, 40.4302937]",-3.706917,40.430294
4,4,5,4 - Malasaña,1,4,Calle Manuela Malasaña nº 5,1,0,24,23,1,0,Point,"[-3.7025875, 40.4285524]",-3.702587,40.428552


In [99]:
# Seleccionamos las columnas que vamos a necesitar:
df_bicimad[['id', 'name', 'Longitud', 'Latitud']]

Unnamed: 0,id,name,Longitud,Latitud
0,1,1a - Puerta del Sol A,-3.701834,40.417214
1,2,1b - Puerta del Sol B,-3.701603,40.417313
2,3,2 - Miguel Moya,-3.705842,40.420589
3,4,3 - Plaza Conde Suchil,-3.706917,40.430294
4,5,4 - Malasaña,-3.702587,40.428552
...,...,...,...,...
259,265,257 - INEF,-3.729970,40.438960
260,266,258 - Ciudad Universitaria 1,-3.726990,40.443750
261,267,259 - Ciudad Universitaria 2,-3.726930,40.443420
262,268,260 - Facultad Biología,-3.727295,40.448332


## BiciPark

In [91]:
df_bicipark.head()

Unnamed: 0.1,Unnamed: 0,stationId,stationName,address,city,zip_code,total_places,free_places,enabled,reserved_places,geometry.type,geometry.coordinates
0,0,74,Bicipark Fuente de la Mora,Estación de Fuente de la Mora,Madrid,28050,10,9,1,0,Point,"[-3.6630679, 40.4846838]"
1,1,22,Bicipark Orense,"Avenida General Perón, 27",Madrid,28020,15,9,1,1,Point,"[-3.693897, 40.452973]"
2,2,49,Bicipark Montalban,"Calle de Montalbán, 5",Madrid,28014,16,15,1,0,Point,"[-3.690786, 40.418146]"
3,3,12,Bicipark Almagro,"Calle Almagro, 11",Madrid,28010,16,12,1,0,Point,"[-3.692879, 40.430361]"
4,4,9,Bicipark Recoletos,"Paseo de Recoletos, 2",Madrid,28001,10,3,0,0,Point,"[-3.692245, 40.420354]"


In [92]:
df_bicipark.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Unnamed: 0            11 non-null     int64 
 1   stationId             11 non-null     int64 
 2   stationName           11 non-null     object
 3   address               11 non-null     object
 4   city                  11 non-null     object
 5   zip_code              11 non-null     object
 6   total_places          11 non-null     int64 
 7   free_places           11 non-null     int64 
 8   enabled               11 non-null     int64 
 9   reserved_places       11 non-null     int64 
 10  geometry.type         11 non-null     object
 11  geometry.coordinates  11 non-null     object
dtypes: int64(6), object(6)
memory usage: 1.2+ KB


In [94]:
# Aplicamos las funciones que separan las columnas de latitud y longitud:
df_bicipark['Longitud'] = df_bicipark.apply(lambda x: separator_longitud(x["geometry.coordinates"]), axis=1)
df_bicipark['Latitud'] = df_bicipark.apply(lambda x: separator_latitud(x["geometry.coordinates"]), axis=1)
df_bicipark

Unnamed: 0.1,Unnamed: 0,stationId,stationName,address,city,zip_code,total_places,free_places,enabled,reserved_places,geometry.type,geometry.coordinates,Longitud,Latitud
0,0,74,Bicipark Fuente de la Mora,Estación de Fuente de la Mora,Madrid,28050,10,9,1,0,Point,"[-3.6630679, 40.4846838]",-3.663068,40.484684
1,1,22,Bicipark Orense,"Avenida General Perón, 27",Madrid,28020,15,9,1,1,Point,"[-3.693897, 40.452973]",-3.693897,40.452973
2,2,49,Bicipark Montalban,"Calle de Montalbán, 5",Madrid,28014,16,15,1,0,Point,"[-3.690786, 40.418146]",-3.690786,40.418146
3,3,12,Bicipark Almagro,"Calle Almagro, 11",Madrid,28010,16,12,1,0,Point,"[-3.692879, 40.430361]",-3.692879,40.430361
4,4,9,Bicipark Recoletos,"Paseo de Recoletos, 2",Madrid,28001,10,3,0,0,Point,"[-3.692245, 40.420354]",-3.692245,40.420354
5,5,5,Bicipark Recuerdo,"Calle Hiedra, 26",Madrid,28036,13,13,1,0,Point,"[-3.678921, 40.472419]",-3.678921,40.472419
6,6,25,Bicipark Salamanca,Pl. del Marqués de Salamanca,Madrid,28006,5,-1,1,0,Point,"[-3.679172, 40.430315]",-3.679172,40.430315
7,7,67,Bicipark Villa de Paris,"Calle de Orellana, 23, 21",Madrid,28004,10,8,1,1,Point,"[-3.693698, 40.425608]",-3.693698,40.425608
8,8,1154,Bicipark Pitis,"Calle Pau Arroyo del Fresno, 181",Madrid,28049,10,7,1,0,Point,"[-3.7249142, 40.4959425]",-3.724914,40.495942
9,9,1169,Bicipark Aviación,Parking Aviación Española,Madrid,28024,10,10,1,0,Point,"[-3.78369522, 40.383491516]",-3.783695,40.383492


In [100]:
# Seleccionamos las columnas que vamos a necesitar:
df_bicipark[['stationId', 'stationName', 'Longitud', 'Latitud']]

Unnamed: 0,stationId,stationName,Longitud,Latitud
0,74,Bicipark Fuente de la Mora,-3.663068,40.484684
1,22,Bicipark Orense,-3.693897,40.452973
2,49,Bicipark Montalban,-3.690786,40.418146
3,12,Bicipark Almagro,-3.692879,40.430361
4,9,Bicipark Recoletos,-3.692245,40.420354
5,5,Bicipark Recuerdo,-3.678921,40.472419
6,25,Bicipark Salamanca,-3.679172,40.430315
7,67,Bicipark Villa de Paris,-3.693698,40.425608
8,1154,Bicipark Pitis,-3.724914,40.495942
9,1169,Bicipark Aviación,-3.783695,40.383492


## Espacio deporte

In [102]:
df_deporte.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 626 entries, 0 to 625
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   @id           626 non-null    object
 1   @type         626 non-null    object
 2   id            626 non-null    object
 3   title         626 non-null    object
 4   relation      626 non-null    object
 5   address       626 non-null    object
 6   location      626 non-null    object
 7   organization  626 non-null    object
dtypes: object(8)
memory usage: 39.2+ KB


In [101]:
df_deporte.head()

Unnamed: 0,@id,@type,id,title,relation,address,location,organization
0,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,181447,Campo de Golf del Centro Nacional de Golf,http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.4855081729451, 'longitude': -3...","{'organization-desc': ' Bus: 67, 82.', 'acces..."
1,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,4670,Campo de Golf del Club de Golf Olivar de la Hi...,http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.46896569497632, 'longitude': -...","{'organization-desc': ' Bus: 112.', 'accesi..."
2,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,7381184,Centro Comercial Dreams - Palacio Municipal de...,http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.46287225446314, 'longitude': -...",{'organization-desc': ' Metro: Canillas (líne...
3,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,7081762,Centro de Alto Rendimiento del Consejo Superio...,http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.43681199436766, 'longitude': -...","{'organization-desc': ' Bus: 46, U, 160, 161...."
4,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,4844,Centro de Natación Mundial 86 (M - 86),http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.41552303421355, 'longitude': -...",{'organization-desc': ' Metro: Sainz de Baran...


In [109]:
df_deporte["location"][0]['longitude']

-3.735278975476509

In [115]:
def separator_longitud_json(column):
    for i in column:
        return column["longitude"]
def separator_latitud_json(column):
    for i in column:
        return column["latitude"]

In [116]:
separador_longitud_json(df_deporte["location"][0])

-3.735278975476509

In [117]:
df_deporte['Longitud'] = df_deporte.apply(lambda x: separator_longitud_json(x["location"]), axis=1)
df_deporte['Latitud'] = df_deporte.apply(lambda x: separator_latitud_json(x["location"]), axis=1)
df_deporte

Unnamed: 0,@id,@type,id,title,relation,address,location,organization,Longitud,Latitud
0,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,181447,Campo de Golf del Centro Nacional de Golf,http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.4855081729451, 'longitude': -3...","{'organization-desc': ' Bus: 67, 82.', 'acces...",-3.735279,40.485508
1,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,4670,Campo de Golf del Club de Golf Olivar de la Hi...,http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.46896569497632, 'longitude': -...","{'organization-desc': ' Bus: 112.', 'accesi...",-3.609367,40.468966
2,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,7381184,Centro Comercial Dreams - Palacio Municipal de...,http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.46287225446314, 'longitude': -...",{'organization-desc': ' Metro: Canillas (líne...,-3.635776,40.462872
3,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,7081762,Centro de Alto Rendimiento del Consejo Superio...,http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.43681199436766, 'longitude': -...","{'organization-desc': ' Bus: 46, U, 160, 161....",-3.731957,40.436812
4,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,4844,Centro de Natación Mundial 86 (M - 86),http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.41552303421355, 'longitude': -...",{'organization-desc': ' Metro: Sainz de Baran...,-3.667197,40.415523
...,...,...,...,...,...,...,...,...,...,...
621,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,8192515,Parque Deportivo Puerta de Hierro,http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.463352206633466, 'longitude': ...","{'organization-desc': ' Bus: 83, 133.', 'acce...",-3.749646,40.463352
622,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,10967962,Parque Lineal UZI 0.06 Arroyofresno,http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.487146568534655, 'longitude': ...",{'organization-desc': ' Metro: Antonio Machad...,-3.732863,40.487147
623,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,10967954,Pista de Patinaje Paco de Lucía,http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.38527831238433, 'longitude': -...",{'organization-desc': ' Metro: Antonio Machad...,-3.764155,40.385278
624,https://datos.madrid.es/egob/catalogo/tipo/ent...,https://datos.madrid.es/egob/kos/entidadesYorg...,10967959,Pista de petanca Centro de Mayores La Vaguada,http://www.madrid.es/sites/v/index.jsp?vgnextc...,{'district': {'@id': 'https://datos.madrid.es/...,"{'latitude': 40.47903297218874, 'longitude': -...",{'organization-desc': ' Metro: Antonio Machad...,-3.708264,40.479033


In [120]:
df_deporte[["id","title","Longitud","Latitud"]]

Unnamed: 0,id,title,Longitud,Latitud
0,181447,Campo de Golf del Centro Nacional de Golf,-3.735279,40.485508
1,4670,Campo de Golf del Club de Golf Olivar de la Hi...,-3.609367,40.468966
2,7381184,Centro Comercial Dreams - Palacio Municipal de...,-3.635776,40.462872
3,7081762,Centro de Alto Rendimiento del Consejo Superio...,-3.731957,40.436812
4,4844,Centro de Natación Mundial 86 (M - 86),-3.667197,40.415523
...,...,...,...,...
621,8192515,Parque Deportivo Puerta de Hierro,-3.749646,40.463352
622,10967962,Parque Lineal UZI 0.06 Arroyofresno,-3.732863,40.487147
623,10967954,Pista de Patinaje Paco de Lucía,-3.764155,40.385278
624,10967959,Pista de petanca Centro de Mayores La Vaguada,-3.708264,40.479033


# Analizamos datos 