In [1]:
# Importations

import json
import numpy as np
import pandas as pd
from random import choice
from os import listdir
from os.path import isfile, isdir

In [2]:
# Considered categories

CATEGORIES = ["bar", "bus_station", "cafe", "department_store", "hospital", "park", 
              "parking", "pharmacy", "primary_school", "restaurant", 
              "school", "secondary_school", "shopping_mall", "subway_station", 
              "taxi_stand", "tourist_attraction", "transit_station", "university"]

#CATEGORIES = ["protect"]

# Groups for categories

DICT_CATEGORIES = {"TRANSPORTS": ["bus_station", "subway_station", "parking", 
                                  "taxi_stand", "transit_station"],
                   "ESTABLISHMENTS": ["bar", "cafe", "restaurant"],
                   "STORES": ["department_store", "shopping_mall"],
                   "HEALTH": ["hospital", "pharmacy"],
                   "PROTECT": ["protect"],
                   "EDUCATION": ["primary_school", "school", "secondary_school", 
                                 "university"],
                   "OTHER": ["park", "tourist_attraction"]}

In [3]:
# List of the extracted files in considered categories

FILES = sorted([file for file in CATEGORIES if isfile(file)])

for file in FILES[:]:
    if file.startswith("."):
        files.remove(file)
    elif file.endswith("ipynb") or file.endswith("csv"):
        files.remove(file)

In [4]:
def sample(dataframe, p = 0.8):
    
    """
    Extract a random subset from a DataFrame as a p percent of the original.
    """
    
    nrows, ncols = dataframe.shape
    sample = np.random.choice(range(0, nrows - 1), 
                              size = round(p * nrows), 
                              replace = False)
    
    return dataframe.iloc[np.sort(sample), :]


def assign_category(dataframe, placetype):
    
    """
    Filter a DataFrame to assign the correspondant group to a category.
    """
    
    for category in DICT_CATEGORIES.keys():
        if placetype in DICT_CATEGORIES[category]:
            dataframe["Types"] = category
            break
    
    return dataframe

In [5]:
def create_dataframes_list(files_list):
    
    """
    Read extracted files and create a list of DataFrames by categories.
    """
    
    dataframes = []
    for file in files_list:
        
        # Read extracted files
        with open(file, "r") as file:
            data = file.read().split("\n\n")
    
        while "" in data:
            data.remove("")
    
        jsons_data = []
        for element in data:
            if json.loads(element)["results"]:
                jsons_data.append(json.loads(element)["results"])
                
        # Add the result of the searches to a list
        dataframe = []
        for element in jsons_data:
            df = pd.DataFrame()
            df["Name"] = [place["name"] for place in element]
            df["Latitude"] = [place["geometry"]["location"]["lat"] for place in element]
            df["Longitude"] = [place["geometry"]["location"]["lng"] for place in element]
            df["Types"] = [place["types"] for place in element]
            dataframe.append(df)
        
        if dataframe:
            dataframes.append(pd.concat(dataframe, ignore_index = True))
            
    return dataframes

In [6]:
def assign_dataframe_list(dataframe_list, categories):
    
    """
    Generalize the group assignment to a list of DataFrames.
    """
    
    for dataframe, placetype in zip(dataframe_list, categories):
        assign_category(dataframe, placetype)
        
    return dataframe_list


def reduce_dataframe_list(dataframe_list):
    
    """
    Generalize the DataFrame reduction to a list of DataFrames.
    """
    
    return [sample(dataframe).reset_index(drop=True) for dataframe in dataframe_list]


def create_unique_dataframe(dataframe_list):
    
    """
    Create a unique DataFrame without duplicates from a list of DataFrames.
    """
    
    dataframe = pd.concat(dataframe_list, ignore_index = True)
    dataframe.drop_duplicates(["Latitude", "Longitude"], inplace=True)
    dataframe.reset_index(drop=True, inplace=True)
    
    return dataframe

In [7]:
def write_dataframe(filename, dataframe):
    
    """
    Write a .csv from a DataFrame
    """
    
    dataframe.to_csv(filename, index=False)

In [None]:
### EXAMPLE ###

In [8]:
# Dataframe list creation

dataframes = create_dataframes_list(FILES)

In [9]:
# Dataframe list assignation and reduction

dataframe_list = assign_dataframe_list(dataframes, CATEGORIES)
dataframe_list = reduce_dataframe_list(dataframe_list)

In [10]:
# Unique DataFrame creation

df = create_unique_dataframe(dataframe_list)

In [11]:
# Write the resultant DataFrame

write_dataframe("dataframe.csv", df)