In [106]:
#LIBRARIES

from pymongo import MongoClient
import pandas as pd
import os
from dotenv import load_dotenv
import requests
import json
from dotenv import load_dotenv
import numpy as np
import folium
from folium import Map, Marker, Icon

# EXTRACT TOP 3 CITIES WITH MONGODB

In [107]:
client = MongoClient("localhost:27017")
db = client["ironhack"]
c = db.get_collection("companies")

In [108]:
def mongo_extract (filter):
    projection = {"_id":0, "name":1, "offices.city":1, "offices.latitude": 1, "offices.longitude":1}
    
    list_ = list(c.find(filter, projection))

    df = pd.DataFrame(list_).explode("offices").reset_index(drop=True)
    df = pd.concat([df, df["offices"].apply(pd.Series)], axis=1).reset_index(drop=True)
    df = df.drop(["offices"], axis = 1)
    df = df[df["city"]!=""]
    return df

In [109]:
money_df = mongo_extract({"total_money_raised" : {"$regex" : "[$€].*[MB]"}})
money_df["city"].value_counts()[:3]

San Francisco    436
New York         335
London           173
Name: city, dtype: int64

In [110]:
design_df = mongo_extract({"tag_list" : {"$regex" : "design"}})
design_df["city"].value_counts()[:3]

London           24
New York         23
San Francisco    20
Name: city, dtype: int64

In [111]:
gaming_df = mongo_extract({"category_code" : "games_video"})
gaming_df["city"].value_counts()[:3]

New York         75
San Francisco    68
London           36
Name: city, dtype: int64

# SAN FRANCISCO COMPANIES

In [112]:
def san_francisco_clean ():

    san_francisco_filter = {"offices.0.city" : "San Francisco"}
    dataframe = mongo_extract(san_francisco_filter)
    dataframe = dataframe[dataframe["city"]=="San Francisco"]
    dataframe.dropna(how='any', inplace=True)
    dataframe.reset_index(drop=True, inplace=True)

    return dataframe

In [113]:
san_francisco = san_francisco_clean()
san_francisco

Unnamed: 0,name,city,latitude,longitude
0,Digg,San Francisco,37.764726,-122.394523
1,Twitter,San Francisco,37.776805,-122.416924
2,StumbleUpon,San Francisco,37.775196,-122.419204
3,Scribd,San Francisco,37.789634,-122.404052
4,Powerset,San Francisco,37.778613,-122.395289
...,...,...,...,...
606,Calypso Technology,San Francisco,37.789318,-122.400655
607,Roc2Loc,San Francisco,37.782475,-122.407764
608,Danoo,San Francisco,37.788852,-122.400863
609,DJ Nitrogen,San Francisco,37.784174,-122.449408


# FOURSQUARE

In [114]:
load_dotenv()
token_fsq = os.getenv("token_foursquare")

In [115]:
def foursquare_cat (category, name, df):
    
    response_list = []
    distance = []
    lat = []
    lon = []
    
    for i in range(len(df)):
        url = f"https://api.foursquare.com/v3/places/search?ll={df['latitude'][i]}%2C{df['longitude'][i]}&categories={category}&limit=1"
        headers = {"accept": "application/json", "Authorization": token_fsq}
        response = requests.get(url, headers=headers).json()
        response_list.append(response)
        
    for x in range(len(response_list)):
        try:
            distance.append(response_list[x]["results"][0]["distance"])
        except:
            distance.append(None)
            
    for x in range(len(response_list)):
        try:
            lat.append(response_list[x]["results"][0]["geocodes"]["main"]["latitude"])
        except:
            lat.append(None)
    
    for x in range(len(response_list)):
        try:
            lon.append(response_list[x]["results"][0]["geocodes"]["main"]["longitude"])
        except:
            lon.append(None)
     
    df[f"{name}_dist"] = distance
    df[f"{name}_lat"] = lat
    df[f"{name}_lon"] = lon

    return df

In [116]:
def foursquare_query (query, df):
    
    response_list = []
    distance = []
    lat = []
    lon = []
    
    for i in range(len(df)):
        url = f"https://api.foursquare.com/v3/places/search?query={query}&ll={df['latitude'][i]}%2C{df['longitude'][i]}&limit=1"
        headers = {"accept": "application/json", "Authorization": token_fsq}
        response = requests.get(url, headers=headers).json()
        response_list.append(response)
        
    for x in range(len(response_list)):
        try:
            distance.append(response_list[x]["results"][0]["distance"])
        except:
            distance.append(None)
            
    for x in range(len(response_list)):
        try:
            lat.append(response_list[x]["results"][0]["geocodes"]["main"]["latitude"])
        except:
            lat.append(None)
    
    for x in range(len(response_list)):
        try:
            lon.append(response_list[x]["results"][0]["geocodes"]["main"]["longitude"])
        except:
            lon.append(None)
        
    df[f"{query}_dist"] = distance
    df[f"{query}_lat"] = lat
    df[f"{query}_lon"] = lon
    
    return df

In [12]:
nearest_park = foursquare_cat(16032, "park", san_francisco) #Outdoor Parks

In [13]:
nearest_starbucks = foursquare_query("starbucks", nearest_park) #Starbucks

In [14]:
nearest_airport = foursquare_cat(19040, "airport", nearest_starbucks) #International Airports

In [15]:
nearest_club = foursquare_cat(10035, "club", nearest_airport) #Dance, music and theatre

In [16]:
nearest_vegan = foursquare_query("vegan", nearest_club) #Vegan Restaurants

In [17]:
final_df = foursquare_query("basketball_stadium", nearest_vegan) #Basketball Courts

In [18]:
final_df.dropna(how='any', inplace=True)
final_df.reset_index(drop=True, inplace=True)
final_df

Unnamed: 0,name,city,latitude,longitude,park_dist,park_lat,park_lon,starbucks_dist,starbucks_lat,starbucks_lon,...,airport_lon,club_dist,club_lat,club_lon,vegan_dist,vegan_lat,vegan_lon,basketball_stadium_dist,basketball_stadium_lat,basketball_stadium_lon
0,Digg,San Francisco,37.764726,-122.394523,1021.0,37.759393,-122.403820,289.0,37.767121,-122.393738,...,-122.395506,2701.0,37.776311,-122.421407,2192.0,37.761621,-122.419145,711.0,37.767935,-122.387335
1,Twitter,San Francisco,37.776805,-122.416924,1757.0,37.792234,-122.412280,1155.0,37.784144,-122.407429,...,-122.395506,408.0,37.776311,-122.421407,1713.0,37.761621,-122.419145,2772.0,37.767935,-122.387335
2,StumbleUpon,San Francisco,37.775196,-122.419204,1984.0,37.792234,-122.412280,1424.0,37.784144,-122.407429,...,-122.395506,241.0,37.776311,-122.421407,1523.0,37.761621,-122.419145,2904.0,37.767935,-122.387335
3,Scribd,San Francisco,37.789634,-122.404052,764.0,37.792234,-122.412280,927.0,37.788371,-122.393555,...,-122.395506,2130.0,37.776311,-122.421407,3397.0,37.761621,-122.419145,2824.0,37.767935,-122.387335
4,Powerset,San Francisco,37.778613,-122.395289,938.0,37.784655,-122.402441,1080.0,37.788371,-122.393555,...,-122.395506,2319.0,37.776311,-122.421407,2830.0,37.761621,-122.419145,1378.0,37.767935,-122.387335
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
588,Calypso Technology,San Francisco,37.789318,-122.400655,320.0,37.789031,-122.396892,627.0,37.788371,-122.393555,...,-122.395506,1666.0,37.779181,-122.414663,3493.0,37.761621,-122.419145,2650.0,37.767935,-122.387335
589,Roc2Loc,San Francisco,37.782475,-122.407764,526.0,37.784655,-122.402441,1178.0,37.777119,-122.419416,...,-122.395506,710.0,37.779181,-122.414663,2537.0,37.761621,-122.419145,2411.0,37.767935,-122.387335
590,Danoo,San Francisco,37.788852,-122.400863,473.0,37.784655,-122.402441,637.0,37.788371,-122.393555,...,-122.395506,1618.0,37.779181,-122.414663,3439.0,37.761621,-122.419145,2612.0,37.767935,-122.387335
591,DJ Nitrogen,San Francisco,37.784174,-122.449408,1286.0,37.790161,-122.437677,1667.0,37.799192,-122.449395,...,-122.395506,1438.0,37.784039,-122.433157,3663.0,37.761621,-122.419145,2010.0,37.766929,-122.456078


In [117]:
def score(dataframe):

    score = []

    for i in range(len(dataframe)):
        total = ((1/dataframe["airport_dist"][i]*0.35)*100 + (1/dataframe["starbucks_dist"][i]*0.20)*100 
        + (1/dataframe["park_dist"][i]*0.15)*100 + (1/dataframe["club_dist"][i]*0.15)*100 
        + (1/dataframe["vegan_dist"][i]*0.10)*100 + (1/dataframe["basketball_stadium_dist"][i]*0.05)*100)
        score.append(total)

    return score
    

In [118]:
total_score = score(final_df)
final_df["score"] = total_score
final_df

Unnamed: 0,name,city,latitude,longitude,park_dist,park_lat,park_lon,starbucks_dist,starbucks_lat,starbucks_lon,airport_dist,airport_lat,airport_lon,club_dist,club_lat,club_lon,vegan_dist,vegan_lat,vegan_lon,basketball_stadium_dist,basketball_stadium_lat,basketball_stadium_lon,score
0,Digg,San Francisco,37.764726,-122.394523,1021.0,37.759393,-122.403820,289.0,37.767121,-122.393738,16455.0,37.624123,-122.395506,2701.0,37.776311,-122.421407,2192.0,37.761621,-122.419145,711.0,37.767935,-122.387335,0.103171
1,Twitter,San Francisco,37.776805,-122.416924,1757.0,37.792234,-122.412280,1155.0,37.784144,-122.407429,17977.0,37.624123,-122.395506,408.0,37.776311,-122.421407,1713.0,37.761621,-122.419145,2772.0,37.767935,-122.387335,0.072206
2,StumbleUpon,San Francisco,37.775196,-122.419204,1984.0,37.792234,-122.412280,1424.0,37.784144,-122.407429,17831.0,37.624123,-122.395506,241.0,37.776311,-122.421407,1523.0,37.761621,-122.419145,2904.0,37.767935,-122.387335,0.094097
3,Scribd,San Francisco,37.789634,-122.404052,764.0,37.792234,-122.412280,927.0,37.788371,-122.393555,19268.0,37.624123,-122.395506,2130.0,37.776311,-122.421407,3397.0,37.761621,-122.419145,2824.0,37.767935,-122.387335,0.054782
4,Powerset,San Francisco,37.778613,-122.395289,938.0,37.784655,-122.402441,1080.0,37.788371,-122.393555,18000.0,37.624123,-122.395506,2319.0,37.776311,-122.421407,2830.0,37.761621,-122.419145,1378.0,37.767935,-122.387335,0.050085
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
588,Calypso Technology,San Francisco,37.789318,-122.400655,320.0,37.789031,-122.396892,627.0,37.788371,-122.393555,19212.0,37.624123,-122.395506,1666.0,37.779181,-122.414663,3493.0,37.761621,-122.419145,2650.0,37.767935,-122.387335,0.094348
589,Roc2Loc,San Francisco,37.782475,-122.407764,526.0,37.784655,-122.402441,1178.0,37.777119,-122.419416,18504.0,37.624123,-122.395506,710.0,37.779181,-122.414663,2537.0,37.761621,-122.419145,2411.0,37.767935,-122.387335,0.074529
590,Danoo,San Francisco,37.788852,-122.400863,473.0,37.784655,-122.402441,637.0,37.788371,-122.393555,19162.0,37.624123,-122.395506,1618.0,37.779181,-122.414663,3439.0,37.761621,-122.419145,2612.0,37.767935,-122.387335,0.079029
591,DJ Nitrogen,San Francisco,37.784174,-122.449408,1286.0,37.790161,-122.437677,1667.0,37.799192,-122.449395,19395.0,37.624123,-122.395506,1438.0,37.784039,-122.433157,3663.0,37.761621,-122.419145,2010.0,37.766929,-122.456078,0.041115


# VISUALIZATION

In [119]:
final_location = final_df.sort_values(by = ["score"], ascending=False)[:1]
pd.set_option('display.max_columns', None)
final_location

Unnamed: 0,name,city,latitude,longitude,park_dist,park_lat,park_lon,starbucks_dist,starbucks_lat,starbucks_lon,airport_dist,airport_lat,airport_lon,club_dist,club_lat,club_lon,vegan_dist,vegan_lat,vegan_lon,basketball_stadium_dist,basketball_stadium_lat,basketball_stadium_lon,score
563,Five Prime Therapeutics,San Francisco,37.767629,-122.393799,2058.0,37.784655,-122.402441,39.0,37.767121,-122.393738,16775.0,37.624123,-122.395506,2622.0,37.776311,-122.421407,2328.0,37.761621,-122.419145,557.0,37.767935,-122.387335,0.541189


In [120]:
san_francisco_map = Map(location = [37.767629, -122.393799], zoom_start = 11)
san_francisco_map

In [121]:
marker_office = {"location": [37.767629,-122.393799], "tooltip": "NEW OFFICE!"}
icon_office = Icon(color="red", prefix = "fa", icon="fa-map-marker", icon_color = "black")
Marker(**marker_office, icon = icon_office, radius = 2).add_to(san_francisco_map)

marker_park = {"location": [37.784655,-122.402441], "tooltip": "Yerba Buena Gardens (Public park & cultural space)"}
icon_park = Icon(color="darkgreen", prefix = "fa", icon="fa-tree", icon_color = "black")
Marker(**marker_park, icon = icon_park, radius = 2).add_to(san_francisco_map)

marker_starbucks = {"location": [37.767121,-122.393738], "tooltip": "Starbucks"}
icon_starbucks = Icon(color="beige", prefix = "fa", icon="fa-coffee", icon_color = "black")
Marker(**marker_starbucks, icon = icon_starbucks, radius = 2).add_to(san_francisco_map)

marker_airport = {"location": [37.624123,-122.395506], "tooltip": "San Francisco International Airport"}
icon_airport = Icon(color="blue", prefix = "fa", icon="fa-plane", icon_color = "black")
Marker(**marker_airport, icon = icon_airport, radius = 2).add_to(san_francisco_map)

marker_club = {"location": [37.776311,-122.421407], "tooltip": "SFJAZZ (Club) & B-Side (Gastropub)"}
icon_club = Icon(color="pink", prefix = "fa", icon="fa-glass", icon_color = "black")
Marker(**marker_club, icon = icon_club, radius = 2).add_to(san_francisco_map)

marker_vegan = {"location": [37.761621,-122.419145], "tooltip": "Gracias Madre (Organic & vegan Mexican cantina)"}
icon_vegan = Icon(color="green", prefix = "fa", icon="fa-cutlery", icon_color = "black")
Marker(**marker_vegan, icon = icon_vegan, radius = 2).add_to(san_francisco_map)

marker_basketball = {"location": [37.767935,-122.387335], "tooltip": "Chase Center (Golden State Warriors stadium)"}
icon_basketball = Icon(color="orange", prefix = "fa", icon="fa-futbol-o", icon_color = "black")
Marker(**marker_basketball, icon = icon_basketball, radius = 2).add_to(san_francisco_map)

san_francisco_map