In [44]:
from pymongo import MongoClient
import requests

from dotenv import load_dotenv
import os
import re
import json, requests
import pandas as pd
from pandas.io.json import json_normalize

In [45]:
client = MongoClient()
db = client.get_database("companies")

In [46]:
load_dotenv()

True

In [22]:
#creamos las variables para poder hacer requests en la API foursquare.
foursquare_client_id = os.getenv("foursquare_client_id")

foursquare_client_secret = os.getenv("foursquare_client_secret")

In [43]:
#Hacemos unwind por oficinas, desechando las que no tengan alguna de las coordenadas latitud o longitud.
#De esta manera creamos un listado de empresas con una unica oficina. Si anteriormente una empresa tenia 2 o más 
#oficinas, se creará un documento por cada oficina.

res = db.companies.aggregate([
                      {"$unwind":"$offices" },
                      {"$match":{"offices.latitude":{"$ne":None}}},
                      {"$match":{"offices.longitude":{"$ne":None}}},
                      {"$project":{"_id":0}} # We omit the id, because when inserting into new collection, we can't
                                             # have two documents with same id.
                     ])

db.companies_unwinded.insert_many(res)

In [52]:
offices = db.companies_unwinded

In [53]:
res = offices.find({},{"offices":1})

In [54]:
#We will use GeoJSON Point when working in MongoDB, so will create a Sphere 2d index to get the coordinates 
#of each document.
for c in res:
    filt = {"_id":c["_id"]}
    coord = {
              "type": "Point",
              "coordinates": [c["offices"]["longitude"], c["offices"]["latitude"]]
            }
    update = {"$set":{"coord":coord}}
    offices.update_one(filt,update)

In [None]:
#Creamos una funcion para encontrar en foursquare guarderias o escuelas infantiles a un radio de 2km 
#dadas unas coordenadas.
def education_within_2km(latitude,longitude):

    url = 'https://api.foursquare.com/v2/venues/explore'

    params = dict(
                    client_id= foursquare_client_id ,
                    client_secret= foursquare_client_secret ,
                    v='20180323',
                    ll= str(latitude)+","+str(longitude),
                    categoryId=['4f4533814b9074f6e4fb0107', '4f4533804b9074f6e4fb0105'],
                    radius = 2000,
                    limit=5
                )
    resp = requests.get(url=url, params=params)
    data = json.loads(resp.text)
#Al buscar información desde la API de foursquare, esta nos devuelve un json con el listado de guarderias y colegios 
#infantiles. La respuesta de esta request/peticion a la API de foursquare lo convertimos a formato json. Una vez convertido, crearemos una lista de tuplas 
#cogiendo las coordenadas (latitud y longitud) de cada uno de los elementos de ese listado que hemos generado. 
#El resto de información la podemos desechar.
    coord_items = []
    for group in data["response"]["groups"]:
        for item in group["items"]:
            lat = item["venue"]["location"]["lat"]            
            lng = item["venue"]["location"]["lng"]
            coord_items.append((lat,lng))
    return coord_items

In [100]:
# We provide random coordinates to the function just to ensure the function works and returns coordinates nearby.
education_coordinates = education_within_2km(47.603122, -122.333253)
education_coordinates

[(47.60408020019531, -122.33384704589844),
 (47.601499968546136, -122.31578595907476),
 (47.61723529168316, -122.3352597198037)]

In [90]:
#Creamos una funcion genérica que usaremos en nuestras búsquedas en MongoDB
#Nos hemos dado cuenta de que cada vez que hacemos queries en MongoDB, repetimos los 
#filtros relativos a busqueda geoespacial y dist. max. de 2km por lo que hemos hecho una 
#función para no repetir codigo.

def filter_in_companies_within_2km(query, latitude, longitude):
    point = {
        "type":"Point",
        "coordinates":[longitude, latitude]
    }
    distance_query = {
        "coord":{
            "$near":{
                "$geometry": point,
                "$maxDistance": 2000
           }
        }
    }#aqui abajo juntamos los dos diccionarios en uno solo.
    query.update(distance_query)
    #Creamos una lista de tuplas cogiendo las coordenadas (latitud y longitud) de cada uno de 
    #los elementos de esa lista que hemos generado. El resto de información la podemos desechar.
    coordinates = []
    for result in offices.find(query,{"coord":1}):
        coord = result["coord"]["coordinates"]
        coordinates.append((coord[1], coord[0]))
        
    return coordinates

#Startups que han recaudado más de $1M y fundadas despues de 2008.
def startups_raised_over_1m_within_2km(latitude, longitude):
    query = {
        "total_money_raised": {
            "$regex": ".*M"
        },
        "founded_year": {
            "$gte": 2008
        }
    }#esta funcion devuelve lo que devuelve "filter in companies within 2km"
    return filter_in_companies_within_2km(query, latitude, longitude)

print(startups_raised_over_1m_within_2km(47.603122, -122.333253))

[(47.603364, -122.333359), (47.6103008, -122.3399782)]


In [89]:
#Creamos una funcion para conseguir en MongoDB las coordenadas de las empresas de 
#diseño de videojuegos a un radio inferior a 2km.
def game_design_companies_within_2km(latitude, longitude):
    query = {
        "category_code": "games_video"
    }
    return filter_in_companies_within_2km(query, latitude, longitude)

print(game_design_companies_within_2km(47.603122, -122.333253))

[(47.600818, -122.334117), (47.6015315, -122.335943), (47.611012, -122.333523), (47.6103008, -122.3399782), (47.6141, -122.33763), (47.615313, -122.323408), (47.6161693, -122.3451897)]


In [93]:
#Creamos una funcion para encontrar en foursquare lugares de ocio nocturno (nightlife spot) a un radio de 2km 
#dadas unas coordenadas.
def nightlife_spots_within_2km(latitude,longitude):

    url = 'https://api.foursquare.com/v2/venues/explore'

    params = dict(
                    client_id= foursquare_client_id ,
                    client_secret= foursquare_client_secret,
                    v='20180323',
                    ll= str(latitude)+","+str(longitude),
                    categoryId=['4d4b7105d754a06376d81259'],
                    radius = 2000,
                    limit=10
                )
    resp = requests.get(url=url, params=params)
    data = json.loads(resp.text)
    coord_items = []
    for group in data["response"]["groups"]:
        for item in group["items"]:
            lat = item["venue"]["location"]["lat"]            
            lng = item["venue"]["location"]["lng"]
            coord_items.append((lat,lng))
    return coord_items

In [94]:
nightlife_spots_coordinates = nightlife_spots_within_2km(47.603122, -122.333253)
nightlife_spots_coordinates

[(47.602012114767625, -122.33179258432052),
 (47.601004162802, -122.334119031303),
 (47.6001432, -122.3310024),
 (47.601737052225594, -122.33312973915179),
 (47.60011606018805, -122.3315971380008),
 (47.60808617171844, -122.33495508012143),
 (47.60524931782989, -122.34050803533015),
 (47.599290120142115, -122.33278667766864),
 (47.59800673680192, -122.32776719034055),
 (47.60089818452381, -122.33303150814028)]

In [145]:
#Creamos una funcion para encontrar en foursquare starbucks a un radio de 1km 
#dadas unas coordenadas.
def starbucks_within_1km(latitude,longitude):

    url = 'https://api.foursquare.com/v2/venues/explore'

    params = dict(
                    client_id= foursquare_client_id ,
                    client_secret= foursquare_client_secret ,
                    v='20180323',
                    ll= str(latitude)+","+str(longitude),
                    #categoryId=['4bf58dd8d48988d1e0931735'],
                    query = "starbucks",
                    radius = 1000,
                    limit=3
                )
    resp = requests.get(url=url, params=params)
    data = json.loads(resp.text)
    coord_items = []
    for group in data["response"]["groups"]:
        for item in group["items"]:
            lat = item["venue"]["location"]["lat"]            
            lng = item["venue"]["location"]["lng"]
            coord_items.append((lat,lng))
    return coord_items

###ANOTHER OPTION: SEARCH FOR VENUES
#def starbucks_within_1km(latitude,longitude):
#
#    url = 'https://api.foursquare.com/v2/venues/search'###SEARCH FOR VENUES
#
#    params = dict(
#                    client_id= foursquare_client_id ,
#                    client_secret= foursquare_client_secret ,
#                    v='20180323',
#                    ll= str(latitude)+","+str(longitude),
#                    categoryId=['556f676fbd6a75a99038d8ec'],#venue Chain ID of Starbucks https://developer.foursquare.com/docs/build-with-foursquare/chains/
#                    radius = 1000,
#                    limit=3
#                )
#    resp = requests.get(url=url, params=params)
#    data = json.loads(resp.text)
#    coord_items = []
#    for venue in data["response"]["venues"]:
#        lat = venue["location"]["lat"]            
#        lng = venue["location"]["lng"]
#        coord_items.append((lat,lng))
#    return coord_items

In [147]:
starbucks_coordinates = starbucks_within_1km(42.35888, -71.05682)
starbucks_coordinates



[(42.35814839, -71.05817074),
 (42.35914156082082, -71.05554804778141),
 (42.35938374, -71.05930244)]

In [141]:
#Creamos una funcion para encontrar en foursquare AEROPUERTOS a un radio de 25km 
#dadas unas coordenadas.
def airports_within_25km(latitude,longitude):

    url = 'https://api.foursquare.com/v2/venues/explore'

    params = dict(
                    client_id= foursquare_client_id ,
                    client_secret= foursquare_client_secret ,
                    v='20180323',
                    ll= str(latitude)+","+str(longitude),
                    categoryId=['4bf58dd8d48988d1eb931735'], #Airport Terminal ID
                    radius = 25000,
                    limit=1
                )
    resp = requests.get(url=url, params=params)
    data = json.loads(resp.text)
    coord_items = []
    for group in data["response"]["groups"]:
        for item in group["items"]:
            lat = item["venue"]["location"]["lat"]            
            lng = item["venue"]["location"]["lng"]
            coord_items.append((lat,lng))
    return coord_items

In [142]:
airports_coordinates = airports_within_25km(47.603122, -122.333253)
airports_coordinates

[(47.44358853419229, -122.302508354187)]

In [135]:
#Creamos una funcion para encontrar en foursquare estadios de basket a un radio de 10km 
#dadas unas coordenadas.
def basketball_stadium_within_10km(latitude,longitude):

    url = 'https://api.foursquare.com/v2/venues/explore'

    params = dict(
                    client_id= foursquare_client_id ,
                    client_secret= foursquare_client_secret ,
                    v='20180323',
                    ll= str(latitude)+","+str(longitude),
                    categoryId=['4bf58dd8d48988d18b941735'], #Basketball Stadium
                    radius = 10000,
                    limit=1
                )
    resp = requests.get(url=url, params=params)
    data = json.loads(resp.text)
    coord_items = []
    for group in data["response"]["groups"]:
        for item in group["items"]:
            lat = item["venue"]["location"]["lat"]            
            lng = item["venue"]["location"]["lng"]
            coord_items.append((lat,lng))
    return coord_items

In [136]:
basketball_stadium_coordinates = basketball_stadium_within_10km(47.603122, -122.333253)
basketball_stadium_coordinates

[(47.68044866290678, -122.3326198276763)]

In [137]:
#Creamos una funcion para encontrar en foursquare Pet Services a un radio de 1km 
#dadas unas coordenadas.
def pet_services_within_1km(latitude,longitude):

    url = 'https://api.foursquare.com/v2/venues/explore'

    params = dict(
                    client_id= foursquare_client_id ,
                    client_secret= foursquare_client_secret ,
                    v='20180323',
                    ll= str(latitude)+","+str(longitude),
                    categoryId=['5032897c91d4c4b30a586d69'], #Pet Services
                    radius = 1000,
                    limit=1
                )
    resp = requests.get(url=url, params=params)
    data = json.loads(resp.text)
    coord_items = []
    for group in data["response"]["groups"]:
        for item in group["items"]:
            lat = item["venue"]["location"]["lat"]            
            lng = item["venue"]["location"]["lng"]
            coord_items.append((lat,lng))
    return coord_items

In [138]:
pet_services_coordinates = pet_services_within_1km(47.603122, -122.333253)
pet_services_coordinates

[(47.601537, -122.330604)]

In [139]:
#Creamos una funcion para encontrar en foursquare Vegan Restaurants a un radio de 1km 
#dadas unas coordenadas.
def vegan_restaurants_within_1km(latitude,longitude):

    url = 'https://api.foursquare.com/v2/venues/explore'

    params = dict(
                    client_id= foursquare_client_id ,
                    client_secret= foursquare_client_secret ,
                    v='20180323',
                    ll= str(latitude)+","+str(longitude),
                    categoryId=['4bf58dd8d48988d1d3941735'], #Vegan Restaurants
                    radius = 1000,
                    limit=3
                )
    resp = requests.get(url=url, params=params)
    data = json.loads(resp.text)
    coord_items = []
    for group in data["response"]["groups"]:
        for item in group["items"]:
            lat = item["venue"]["location"]["lat"]            
            lng = item["venue"]["location"]["lng"]
            coord_items.append((lat,lng))
    return coord_items

In [140]:
vegan_restaurants_coordinates = vegan_restaurants_within_1km(47.603122, -122.333253)
vegan_restaurants_coordinates

[(47.6098817, -122.3366367),
 (47.59842425921504, -122.3239498169),
 (47.60940684867868, -122.34159447826438)]

In [143]:
# List of Offices in San Diego, where we plan to set up our office. We choose this city because the foursquare API 
# only allows us to do 950 requests a day.
# For each office we need to do 7 requests (one per each foursquare function). So the maximum amount of offices we 
# can ask in a day is 950/7 =135. We have filtered in MongoDB the offices by cities and found that San Diego has 
# 114 different documents(offices), so we can find the most suitable office within this city by checking all 
# the offices in it.

filter_query = {"offices.city": "San Diego"}
project_query = {"_id": 1, "coord.coordinates":1}
san_diego_offices = offices.find(filter_query,project_query).sort("name",1)

for office in san_diego_offices:
    latitude = office["coord"]["coordinates"][1]    
    longitude = office["coord"]["coordinates"][0]
    rank = 0
    
# below you can see the score we have considered for each condition within the function. This increments the rank
# so at the end, when we sort descending the rank, we would be able to see the highest score and the first one 
# would be the most suitable office according to our requirements.

    education_places_count = len(education_within_2km(latitude, longitude)) #API Foursquare
    if education_places_count > 3:
        rank += 90
    elif education_places_count > 0:
        rank += 60

    startups_places_count = len(startups_raised_over_1m_within_2km(latitude, longitude)) #MongoDB
    if startups_places_count > 2:
        rank += 85
    elif startups_places_count > 0:
        rank += 50

    designers_places_count = len(game_design_companies_within_2km(latitude, longitude)) #MongoDB
    if designers_places_count > 2:
        rank += 75
    elif designers_places_count > 0:
        rank += 45

    nightlife_places_count = len(nightlife_spots_within_2km(latitude, longitude)) #API Foursquare
    if nightlife_places_count > 7:
        rank += 70
    elif nightlife_places_count > 3:
        rank += 50
    elif nightlife_places_count > 0:
        rank += 20

    starbucks_places_count = len(starbucks_within_1km(latitude,longitude)) #API Foursquare
    if starbucks_places_count > 1:
        rank += 50
    elif starbucks_places_count > 0:
        rank += 20

    airport_places_count = len(airports_within_25km(latitude,longitude)) #API Foursquare
    if airport_places_count > 0:
        rank += 30

    basketball_places_count = len(basketball_stadium_within_10km(latitude,longitude)) #API Foursquare
    if basketball_places_count > 0:
        rank += 12

    pet_services_places_count = len(pet_services_within_1km(latitude,longitude)) #API Foursquare
    if pet_services_places_count > 0:
        rank += 10

    vegan_places_count = len(vegan_restaurants_within_1km(latitude,longitude)) #API Foursquare
    if vegan_places_count > 2:
        rank += 15
    if vegan_places_count > 1:
        rank += 10
    elif vegan_places_count > 0:
        rank += 5

    # cuando hayamos comprabado todas las restricciones, rank tendrá el valor final, el cual lo añadimos/actualizamos 
    # en el documento de mongo
    filt = {"_id":office["_id"]}
    update = {"$set":{"rank": rank}}
    offices.update_one(filt, update)
    
# Una vez ya hemos iterado sobre todas las oficinas y las hemos rankeado, hacemos una query para devolverlas
# ordenadas desc for rank y esas son las "soluciones" para las restricciones/baremación que hemos definido.
project_query = {"_id": 0, "name": 1, "coord.coordinates":1, "rank": 1}
for san_diego_offices_with_rank in offices.find(filter_query,project_query).sort("rank",-1):
    print(san_diego_offices_with_rank)

{'name': 'Bandsintown', 'coord': {'coordinates': [-71.05682, 42.35888]}, 'rank': 447}
{'name': 'Cuts', 'coord': {'coordinates': [-117.163841, 32.718834]}, 'rank': 382}
{'name': 'MindTouch', 'coord': {'coordinates': [-117.167831, 32.720656]}, 'rank': 382}
{'name': 'Goowy', 'coord': {'coordinates': [-117.163353, 32.71537]}, 'rank': 382}
{'name': 'CheerOutLoud', 'coord': {'coordinates': [-117.163841, 32.718834]}, 'rank': 382}
{'name': 'Repptide', 'coord': {'coordinates': [-117.1565, 32.7154]}, 'rank': 382}
{'name': 'Vidmeter', 'coord': {'coordinates': [-117.163841, 32.718834]}, 'rank': 382}
{'name': 'Rondee', 'coord': {'coordinates': [-117.163841, 32.718834]}, 'rank': 382}
{'name': 'Dandelife', 'coord': {'coordinates': [-117.166941, 32.72038]}, 'rank': 382}
{'name': 'Gridjit', 'coord': {'coordinates': [-117.163841, 32.718834]}, 'rank': 382}
{'name': 'Geary Interactive', 'coord': {'coordinates': [-117.160661, 32.718379]}, 'rank': 382}
{'name': 'The Linkup', 'coord': {'coordinates': [-117.1

In [None]:
# Curiosamente, la oficina que mejor puntuacion tiene (447) no está situada en la ciudad de San Diego, sino en Boston. 
# Por algun motivo, se ha definido en la base de datos "companies" de manera errónea. Nosotros aun asi tomamos esa 
# oficina pues es la que tiene mejor puntuación.