In [1]:
# 1. 

# Importaciones

import json
import numpy as np
import pandas as pd
from random import choice
from os import listdir
from os.path import isfile, isdir

In [2]:
# 2. 

# Categorías consideradas

CATEGORIES = ["bar", "bus_station", "cafe", "department_store", "hospital", "park", 
              "parking", "pharmacy", "primary_school", "restaurant", 
              "school", "secondary_school", "shopping_mall", "subway_station", 
              "taxi_stand", "tourist_attraction", "transit_station", "university"]

#CATEGORIES = ["protect"]

# Grupos de categorías consideradas

DICT_CATEGORIES = {"TRANSPORTS": ["bus_station", "subway_station", "parking", 
                                  "taxi_stand", "transit_station"],
                   "ESTABLISHMENTS": ["bar", "cafe", "restaurant"],
                   "STORES": ["department_store", "shopping_mall"],
                   "HEALTH": ["hospital", "pharmacy"],
                   "PROTECT": ["protect"],
                   "EDUCATION": ["primary_school", "school", "secondary_school", 
                                 "university"],
                   "OTHER": ["park", "tourist_attraction"]}

In [3]:
# 3. 

# Archivos con los datos extraídos

# Para todos los archivos sustituir CATEGORIES por listdir()

files = sorted([file for file in CATEGORIES if isfile(file)])

for file in files[:]:
    if file.startswith("."):
        files.remove(file)
    elif file.endswith("ipynb") or file.endswith("csv"):
        files.remove(file)

In [4]:
# Lectura de los archivos y construcción de una lista de DataFrames

dataframes = []
for file in files:
    with open(file, "r") as file:
        data = file.read().split("\n\n")
    
    while "" in data:
        data.remove("")
    
    jsons_data = []
    for element in data:
        if json.loads(element)["results"]:
            jsons_data.append(json.loads(element)["results"])
    
    for element in jsons_data:
        df = pd.DataFrame()
        df["Name"] = [place["name"] for place in element]
        df["Latitud"] = [place["geometry"]["location"]["lat"] for place in element]
        df["Longitud"] = [place["geometry"]["location"]["lng"] for place in element]
        df["Tipos"] = [place["types"] for place in element]
        dataframes.append(df)

In [5]:
# Construcción del DataFrame

big_df = pd.concat(dataframes, ignore_index = True, verify_integrity = True)
big_df.drop_duplicates(["Latitud", "Longitud"], inplace=True)
big_df.reset_index(drop=True, inplace=True)
big_df

Unnamed: 0,Name,Latitud,Longitud,Tipos
0,Mixturas Gastro Bar,40.448136,-3.700150,"[bar, restaurant, food, point_of_interest, est..."
1,La Bohemia Paraguaya,40.447423,-3.702442,"[bar, restaurant, food, point_of_interest, est..."
2,Willy´S Bar Hamburgueseria,40.447299,-3.701862,"[meal_delivery, meal_takeaway, bar, restaurant..."
3,Griselda Aguilera Fernández,40.447257,-3.701890,"[bar, point_of_interest, establishment]"
4,La_Esquina 23,40.447377,-3.701910,"[bar, liquor_store, restaurant, food, point_of..."
...,...,...,...,...
3802,Colegio Mayor El Faro,40.438464,-3.718145,"[university, point_of_interest, establishment]"
3803,CENTRO DE ESTUDIOS UNIVERSITARIOS RIDAN,40.432448,-3.717327,"[university, point_of_interest, establishment]"
3804,Voxel School,40.432429,-3.713577,"[university, point_of_interest, establishment]"
3805,Colegio Mayor Santa María del Pino,40.440427,-3.716245,"[university, point_of_interest, establishment]"


In [6]:
# Construcción de un .csv a partir del DataFrame

big_df.to_csv("dataframe.csv", index=False)

In [7]:
# Lectura del archivo de datos dataframe.csv

pd.read_csv("dataframe.csv")

Unnamed: 0,Name,Latitud,Longitud,Tipos
0,Mixturas Gastro Bar,40.448136,-3.700150,"['bar', 'restaurant', 'food', 'point_of_intere..."
1,La Bohemia Paraguaya,40.447423,-3.702442,"['bar', 'restaurant', 'food', 'point_of_intere..."
2,Willy´S Bar Hamburgueseria,40.447299,-3.701862,"['meal_delivery', 'meal_takeaway', 'bar', 'res..."
3,Griselda Aguilera Fernández,40.447257,-3.701890,"['bar', 'point_of_interest', 'establishment']"
4,La_Esquina 23,40.447377,-3.701910,"['bar', 'liquor_store', 'restaurant', 'food', ..."
...,...,...,...,...
2079,ACADEMIA NG,40.436930,-3.715547,"['university', 'point_of_interest', 'establish..."
2080,Colegio Mayor De Santiago Apostol,40.436417,-3.714796,"['university', 'point_of_interest', 'establish..."
2081,AEstructuras.com,40.437712,-3.716065,"['university', 'point_of_interest', 'establish..."
2082,Universidad Nebrija Edificio Joaquín María López,40.437889,-3.716176,"['university', 'point_of_interest', 'establish..."


In [4]:
# 4. 

# Función para extraer un subconjunto de un DataFrame, reducirlo a un tanto por ciento

def sample(dataframe, p = 0.8):
    nrows, ncols = dataframe.shape
    sample = np.random.choice(range(0, nrows - 1), 
                              size = round(p * nrows), 
                              replace = False)
    
    return dataframe.iloc[np.sort(sample), :]


# Función para asignar un grupo a los registros de una determinada categoría

def assign_category(dataframe, placetype):
    for category in DICT_CATEGORIES.keys():
        if placetype in DICT_CATEGORIES[category]:
            dataframe["Tipos"] = category
            break
    
    return dataframe

In [5]:
small_df = sample(big_df).reset_index(drop=True)
small_df

NameError: name 'big_df' is not defined

In [5]:
# 5.

# Lectura de los archivos y construcción de una lista de listas de DataFrames

dataframes = []
for file in files:
    with open(file, "r") as file:
        data = file.read().split("\n\n")
    
    while "" in data:
        data.remove("")
    
    jsons_data = []
    for element in data:
        if json.loads(element)["results"]:
            jsons_data.append(json.loads(element)["results"])
    
    dataframe = []
    for element in jsons_data:
        df = pd.DataFrame()
        df["Name"] = [place["name"] for place in element]
        df["Latitud"] = [place["geometry"]["location"]["lat"] for place in element]
        df["Longitud"] = [place["geometry"]["location"]["lng"] for place in element]
        df["Tipos"] = [place["types"] for place in element]
        dataframe.append(df)
        
    dataframes.append(dataframe)

In [6]:
# 6.

# Construcción de un DataFrame único para cada categoría en la lista, una lista de DataFrames

lista_dataframes = [pd.concat(element, ignore_index = True, verify_integrity = True) \
                    for element in dataframes if element]

In [7]:
# 7.

# Limpieza de lista de DataFrames: Asignación de grupos homogéneos y reducción

lista_reduced_dataframes = []
for dataframe, placetype in zip(lista_dataframes, CATEGORIES):
    dataframe = assign_category(dataframe, placetype)
    lista_reduced_dataframes.append(sample(dataframe).reset_index(drop=True))

In [8]:
lista_reduced_dataframes[0].head()

Unnamed: 0,Name,Latitud,Longitud,Tipos
0,La Bohemia Paraguaya,40.447423,-3.702442,ESTABLISHMENTS
1,La_Esquina 23,40.447377,-3.70191,ESTABLISHMENTS
2,El Castillo,40.447357,-3.700643,ESTABLISHMENTS
3,Bar Py,40.447328,-3.702381,ESTABLISHMENTS
4,Tu Rincón,40.446545,-3.701944,ESTABLISHMENTS


In [8]:
# 8.

# Eliminación de duplicados

last_df = pd.concat(lista_reduced_dataframes, ignore_index = True, verify_integrity = True)
last_df.drop_duplicates(["Latitud", "Longitud"], inplace=True)
last_df.reset_index(drop=True, inplace=True)

In [10]:
last_df

Unnamed: 0,Name,Latitud,Longitud,Tipos
0,La Bohemia Paraguaya,40.447423,-3.702442,ESTABLISHMENTS
1,La_Esquina 23,40.447377,-3.701910,ESTABLISHMENTS
2,El Castillo,40.447357,-3.700643,ESTABLISHMENTS
3,Bar Py,40.447328,-3.702381,ESTABLISHMENTS
4,Tu Rincón,40.446545,-3.701944,ESTABLISHMENTS
...,...,...,...,...
1845,ACADEMIA NG,40.436930,-3.715547,EDUCATION
1846,Colegio Mayor De Santiago Apostol,40.436417,-3.714796,EDUCATION
1847,AEstructuras.com,40.437712,-3.716065,EDUCATION
1848,Universidad Nebrija Edificio Joaquín María López,40.437889,-3.716176,EDUCATION


In [9]:
# MAPA DE CALOR

# De todo esto lo único que se van a necesitar van a ser pares de coordenadas

In [10]:
# 9.

# Extracción de las coordenadas de un DataFrame

def obtain_coordinates(dataframe, lng_col = "Longitud", lat_col = "Latitud"):
    coordinates = []
    for row in range(len(dataframe)):
        coordinates.append({"lng": dataframe[lng_col][row], 
                            "lat": dataframe[lat_col][row]})
        
    return coordinates

In [11]:
# 10.

# Escritura de datos (collection features) en un archivo en formato json

def write_json(data, filename):
    with open(filename, "w") as file:
        json.dump(data, file)

In [12]:
# 11. 

# Creación de feature de coordenada

def create_feature(coordinates):
    lng, lat = coordinates["lng"], coordinates["lat"]
    feature = {"type": "Feature", 
               "geometry": { "type": "Point", 
                             "coordinates": [lng, lat]}}
    
    return feature

In [13]:
# 12.

# Creación de feature collection con una lista de features creadas con create_feature

def create_feature_collection(collection):
    feature_collection = {"type": "FeatureCollection",
                          "features": collection}
    
    return feature_collection

In [14]:
# 13.

# Obtiene una colección a partir de un conjunto de coordenadas. Combina #11 y #12

def obtain_feature_collection(coordinates):
    collection = []
    for pair in coordinates:
        collection.append(create_feature(pair))
    
    return create_feature_collection(collection)

In [15]:
# Obtiene una colección a partir de un DataFrame

def obtain_df_feature_collection(dataframe):
    coordinates = obtain_coordinates(dataframe, 
                                     lng_col = "Longitude", 
                                     lat_col = "Latitude")
    
    return obtain_feature_collection(coordinates)

In [16]:
# RUTAS

In [23]:
# 14.

# Creación de objeto json para las rutas

def create_route_json(coordinates, dtype = "dict"):
    if dtype == "dict":
        coordinates = [[element["lng"], element["lat"]] for element in coordinates]
    elif dtype == "list":
        coordinates = [[element[0], element[1]] for element in coordinates]
        
    route = {"type": "Feature",
             "geometry": {"type": "LineString", 
                          "coordinates": coordinates}}
    return route

In [13]:
coordinates = obtain_coordinates(last_df)
coordinates

[{'lng': -3.702442, 'lat': 40.4474229},
 {'lng': -3.7018623, 'lat': 40.4472992},
 {'lng': -3.7018904, 'lat': 40.4472566},
 {'lng': -3.7019096, 'lat': 40.44737680000001},
 {'lng': -3.700643099999999, 'lat': 40.447357},
 {'lng': -3.702380999999999, 'lat': 40.4473276},
 {'lng': -3.7019439, 'lat': 40.446545},
 {'lng': -3.701993, 'lat': 40.44656},
 {'lng': -3.7002443, 'lat': 40.4469571},
 {'lng': -3.7001936, 'lat': 40.4468941},
 {'lng': -3.7001767, 'lat': 40.4469032},
 {'lng': -3.7019023, 'lat': 40.44835399999999},
 {'lng': -3.7021091, 'lat': 40.4482902},
 {'lng': -3.7018093, 'lat': 40.44838319999999},
 {'lng': -3.7027218, 'lat': 40.4482489},
 {'lng': -3.7002861, 'lat': 40.4460627},
 {'lng': -3.7033401, 'lat': 40.4470282},
 {'lng': -3.7041686, 'lat': 40.4463895},
 {'lng': -3.703704299999999, 'lat': 40.4474922},
 {'lng': -3.704215600000001, 'lat': 40.4460352},
 {'lng': -3.7041846, 'lat': 40.4459976},
 {'lng': -3.7042338, 'lat': 40.4473977},
 {'lng': -3.7048414, 'lat': 40.44752940000001},
 {'

In [15]:
write_json(data, "json_danger")

In [35]:
route = create_route_json(coordinates)
route

{'type': 'Feature',
 'geometry': {'type': 'LineString',
  'coordinates': [[-3.702442, 40.4474229],
   [-3.7018623, 40.4472992],
   [-3.7018904, 40.4472566],
   [-3.7019096, 40.44737680000001],
   [-3.700643099999999, 40.447357]]}}

In [38]:
write_json(route, "json_route")

In [17]:
danger = pd.read_csv("rojos.csv")
safe = pd.read_csv("azules.csv")

In [18]:
print(len(danger))
print(len(safe))

1174
73


In [19]:
danger_data = obtain_df_feature_collection(danger)
safe_data = obtain_df_feature_collection(safe)

In [20]:
write_json(danger_data, "json_danger")
write_json(safe_data, "json_safe")

In [22]:
with open("json_danger") as file:
    json_data = file.read()
    
json.loads(json_data)

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'geometry': {'type': 'Point', 'coordinates': [-3.701993, 40.44656]}},
  {'type': 'Feature',
   'geometry': {'type': 'Point',
    'coordinates': [-3.7032428000000013, 40.4470797]}},
  {'type': 'Feature',
   'geometry': {'type': 'Point', 'coordinates': [-3.7051909, 40.447154]}},
  {'type': 'Feature',
   'geometry': {'type': 'Point',
    'coordinates': [-3.7041845999999996, 40.445997600000005]}},
  {'type': 'Feature',
   'geometry': {'type': 'Point',
    'coordinates': [-3.7032428000000013, 40.4470797]}},
  {'type': 'Feature',
   'geometry': {'type': 'Point',
    'coordinates': [-3.7039852999999994, 40.4481584]}},
  {'type': 'Feature',
   'geometry': {'type': 'Point',
    'coordinates': [-3.704605900000001, 40.44668789999999]}},
  {'type': 'Feature',
   'geometry': {'type': 'Point', 'coordinates': [-3.7046538, 40.4465075]}},
  {'type': 'Feature',
   'geometry': {'type': 'Point', 'coordinates': [-3.7051909, 40.447154]}},
  