In [1]:
from pymongo import MongoClient
import requests
import json
import pandas as pd
import numpy as np
from getpass import getpass
import os

import folium
from folium import Choropleth, Circle, Marker, Icon, Map
from folium.plugins import HeatMap, MarkerCluster

In [2]:
token = getpass()

········


## Extract from FourSquare API

In [12]:
def foursquare_places(location, radius, query="" ,category = ""):
    
    """
    Search for places by distance on a circular boundary in ForeSquare API and returns a .json()
    Optional filter, venue and/or category can be added
    Response limited to 50 (maximum)

    :location: str : "latidude,longitude" around which to retrieve place information  eg. "43.37012643,-8,39114853"
    :radius: int : radius in meters.define the area to bias search results.  eg. 10000
    :query: str : OPTIONAL:  a string to be matched against all content for this place
    :category: int: OPTIONAL : returns FSQ Places matching the specified id categories
       see category taxonomy -->  https://location.foursquare.com/places/docs/categories

    """


    url = "https://api.foursquare.com/v3/places/search"

    params = {
        "query": query,
        "ll": location,
        "radius":radius,
        "categories":category,
        "limit":50,
        "sort":"DISTANCE"  #Por defecto relevancia. 
    }

    headers = {
        "Accept": "application/json",
        "Authorization": token
    }

    return requests.get(url, params=params, headers=headers).json()

# Acces a db collection

In [13]:
def access_mdb_local_collection (database, collection):

    """ Function that returns a collection inside a DataBase 
    host in my local Server """

    # 1. Connect to my local Server
    client = MongoClient("localhost:27017")
    
    # 2. Access a DataBase:
    db = client[database]

    # 3. Acces a collection inside the given DataBase
    c = db.get_collection(collection)

    # 4. Return collection into a given DataBase host in a given Server
    return c

## Insert into a given collection:

In [14]:
def insert_unique_parents(response, mdb_collection):
    
    """
    Inserts the results of a reques that ARE NOT in the 
    given collection AND ARE NOT related to other object(childrens)

    response: --- : what to insert
    collection: ----: where to insert it
    """
    
    # 1. set conter for inserted objects
    inserted = 0
    
    # 3. Get the list of all elements alrready registered in the collection
    registered = [i['fsq_id'] for i in mdb_collection.find({}, {"_id":0,"fsq_id":1})]
    
    # 4. Iterate trough all elements of the response 
    for i in response["results"]:
        
        #Need to meet 2 conditions to be inserted:
        # It is not alrready registered:
        cond1 = i['fsq_id'] not in registered
        
        ## It's not a children of a parent:
        cond2 = "parent" not in list(i["related_places"].keys())
        
        # Check if the conditions are meet:
        if cond1 and cond2 == True:
            mdb_collection.insert_one(i)
            inserted += 1
            
    print(f'{inserted} items inserted into the collection')
               
    return

## Visualization Functions:

In [15]:
def extract_parameters(i):
            
            name = i["name"]
            
            lat = i["geocodes"]["main"]["latitude"]
            lon = i["geocodes"]["main"]["longitude"]
            
            #can have more than one:
            cat_list = [x["id"] for x in i["categories"]]
            
            try:
                city = i["location"]["locality"]
            except KeyError:
                city = np.nan
            
            params = {"city":city, "name": name, "cat_code":cat_list, "lat":lat, "lon":lon}
            
            return params

In [16]:
def heat_map(df, to_map):
    
    HeatMap(data = df[["lat", "lon"]]).add_to(to_map)
        
    return

# MAIN:

We are going to extract information for each "Capital de provincia"

In [17]:
df = pd.read_csv(f"../data/poblaciones.csv")

In [18]:
#Tranforma ligeramente el DF obtenido:
df[["lat", "lon"]] = df['Lat;Lon'].str.split(';', expand=True)
df.drop(columns=['Lat;Lon'], inplace=True)

Extract Information from FourSquare (function before)

In [19]:
#PARA CADA COMUNIDAD AUTONOMA:

for index, row in df.iterrows():
    
    cap = df.iloc[index]["Capital"]
    
    lat = df.iloc[index]["lat"].replace(",", ".")
    lon = df.iloc[index]["lon"].replace(",", ".")
    
    location = f"{lat},{lon}"
    radius = 10000 #(10km)
    query = "universidad"
    category = "" #12125
    
    #desabled printing   
    #print(f'------------Location {cap} ------------')
    
    # EXTRACT 
    response = foursquare_places(location, radius, query ,category)
    
    #CONCET
    mdb_collection = access_mdb_local_collection ("foursquare", "spain_universities")
    
    #INSERT:
    insert_unique_parents(response, mdb_collection)
    
    #desable printing
    #print(f'---------------------------------------')

0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0 items inserted into the collection
0

Generate a Data Frame:

In [21]:
c = access_mdb_local_collection ("foursquare", "spain_universities")

filt = {} #all
    
# 2. Select the filds that we want to target:
projection = { "_id":0,"categories":1, "geocodes":1,"name":1, "location":1}
    
# 3. Extract data
datos = list(c.find(filt,projection))

# 4. Generate an empty dictionary with the categories to save:
register = {"city":[], "place_name":[], "categories":[],"lat":[], "lon":[]}

for i in datos:

        info = extract_parameters(i) # info it's a dictionary 
        
        register["city"].append(info["city"])
        register["place_name"].append(info["name"])
        register["categories"].append(info["cat_code"])
        register["lat"].append(info["lat"])
        register["lon"].append(info["lon"])

df = pd.DataFrame(register)

Create the map

In [23]:
# 1. Set the map:
spn_map = Map([40.40841191, -3.68760088], zoom_start=6, tiles='CartoDB positron')

# 2. Create the heat Map (function)
heat_map(df, spn_map)

# 3. Save and open:
path = "../figures/spain_universities.html"
spn_map.save(path)