In [1]:
# Importaciones

import json
import numpy as np
import pandas as pd
from random import choice
from os import listdir
from os.path import isfile, isdir

In [2]:
# Categorías consideradas   ** revisar **

CATEGORIES = ["bar", "bus_station", "cafe", "department_store", "hospital", "park", 
              "parking", "pharmacy", "primary_school", "restaurant", 
              "school", "secondary_school", "shopping_mall", "subway_station", 
              "taxi_stand", "tourist_attraction", "transit_station", "university"]

#CATEGORIES = ["protect"]

# Grupos de categorías consideradas

DICT_CATEGORIES = {"TRANSPORTS": ["bus_station", "subway_station", "parking", 
                                  "taxi_stand", "transit_station"],
                   "ESTABLISHMENTS": ["bar", "cafe", "restaurant"],
                   "STORES": ["department_store", "shopping_mall"],
                   "HEALTH": ["hospital", "pharmacy"],
                   "PROTECT": ["protect"],
                   "EDUCATION": ["primary_school", "school", "secondary_school", 
                                 "university"],
                   "OTHER": ["park", "tourist_attraction"]}

In [3]:
# Archivos con los datos extraídos
# Para todos los archivos sustituir CATEGORIES por listdir()

FILES = sorted([file for file in CATEGORIES if isfile(file)])

for file in FILES[:]:
    if file.startswith("."):
        files.remove(file)
    elif file.endswith("ipynb") or file.endswith("csv"):
        files.remove(file)

In [4]:
# Función para extraer un subconjunto de un DataFrame, reducirlo a un tanto por ciento

def sample(dataframe, p = 0.8):
    nrows, ncols = dataframe.shape
    sample = np.random.choice(range(0, nrows - 1), 
                              size = round(p * nrows), 
                              replace = False)
    
    return dataframe.iloc[np.sort(sample), :]


# Función para asignar un grupo a los registros de una determinada categoría

def assign_category(dataframe, placetype):
    for category in DICT_CATEGORIES.keys():
        if placetype in DICT_CATEGORIES[category]:
            dataframe["Types"] = category
            break
    
    return dataframe

In [5]:
# Lectura de los archivos y construcción de una lista de DataFrames

def create_dataframes_list(files_list):
    dataframes = []
    for file in files_list:
        
        # Lectura de archivo
        with open(file, "r") as file:
            data = file.read().split("\n\n")
    
        while "" in data:
            data.remove("")
    
        jsons_data = []
        for element in data:
            if json.loads(element)["results"]:
                jsons_data.append(json.loads(element)["results"])
                
        # Añade el resultado de cada uno de los sondeos realizados por archivo
        dataframe = []
        for element in jsons_data:
            df = pd.DataFrame()
            df["Name"] = [place["name"] for place in element]
            df["Latitude"] = [place["geometry"]["location"]["lat"] for place in element]
            df["Longitude"] = [place["geometry"]["location"]["lng"] for place in element]
            df["Types"] = [place["types"] for place in element]
            dataframe.append(df)
        
        if dataframe:
            dataframes.append(pd.concat(dataframe, ignore_index = True))
            
    return dataframes

In [6]:
# Limpieza de lista de DataFrames

# Asignación de grupos homogéneos a una lista de DataFrames

def assign_dataframe_list(dataframe_list, categories):
    for dataframe, placetype in zip(dataframe_list, categories):
        assign_category(dataframe, placetype)
    return dataframe_list

# Reducción aleatoria al tanto por ciento de DataFrames en lista de Dataframes

def reduce_dataframe_list(dataframe_list):
    return [sample(dataframe).reset_index(drop=True) for dataframe in dataframe_list]

# Creación de un único DataFrame sin duplicados

def create_unique_dataframe(dataframe_list):
    dataframe = pd.concat(dataframe_list, ignore_index = True)
    dataframe.drop_duplicates(["Latitude", "Longitude"], inplace=True)
    dataframe.reset_index(drop=True, inplace=True)
    return dataframe

In [7]:
# Construcción de un .csv a partir del DataFrame

def write_dataframe(filename, dataframe):
    dataframe.to_csv(filename, index=False)

In [None]:
# EJEMPLO DE APLICACIÓN

In [8]:
dataframes = create_dataframes_list(FILES)

In [9]:
dataframe_list = assign_dataframe_list(dataframes, CATEGORIES)
dataframe_list = reduce_dataframe_list(dataframe_list)

In [10]:
df = create_unique_dataframe(dataframe_list)

In [11]:
write_dataframe("dataframe.csv", df)

In [13]:
pd.read_csv("dataframe.csv").head()

Unnamed: 0,Name,Latitude,Longitude,Types
0,Mixturas Gastro Bar,40.448136,-3.70015,ESTABLISHMENTS
1,Willy´S Bar Hamburgueseria,40.447299,-3.701862,ESTABLISHMENTS
2,La_Esquina 23,40.447377,-3.70191,ESTABLISHMENTS
3,El Castillo,40.447357,-3.700643,ESTABLISHMENTS
4,Bar Py,40.447328,-3.702381,ESTABLISHMENTS
