In [1]:
from pymongo import MongoClient
import pandas as pd
import os
import requests
import json
from dotenv import load_dotenv
from bs4 import BeautifulSoup
import re
import geopandas as gpd
from cartoframes.viz import Map, Layer, popup_element

In [2]:
client = MongoClient("localhost:27017")
db = client["ironhack"]
c = db.get_collection("companies")

In [3]:
def mongo_extract (filter):
    projection = {"_id":0, "name":1, "offices.city":1, "offices.latitude": 1, "offices.longitude":1}
    
    list_ = list(c.find(filter, projection))

    df = pd.DataFrame(list_).explode("offices").reset_index(drop=True)
    df = pd.concat([df, df["offices"].apply(pd.Series)], axis=1).reset_index(drop=True)
    df = df.drop(["offices"], axis = 1)
    df = df[df["city"]!=""]
    return df

In [4]:
money_filter = {"total_money_raised" : {"$regex" : "[$€].*[MB]"}}
money_df = mongo_extract(money_filter)
money_df["city"].value_counts()[:3]

San Francisco    436
New York         335
London           173
Name: city, dtype: int64

In [5]:
design_filter = {"tag_list" : {"$regex" : "design"}}
design_df = mongo_extract(design_filter)
design_df["city"].value_counts()[:3]

London           24
New York         23
San Francisco    20
Name: city, dtype: int64

In [6]:
gaming_filter = {"category_code" : "games_video"}
gaming_df = mongo_extract(gaming_filter)
gaming_df["city"].value_counts()[:3]

New York         75
San Francisco    68
London           36
Name: city, dtype: int64

# SAN FRANCISCO COMPANIES

In [7]:
san_francisco_filter = {"offices.0.city" : "San Francisco"}
san_francisco = mongo_extract(san_francisco_filter)
san_francisco = san_francisco[san_francisco["city"]=="San Francisco"]
san_francisco.dropna(how='any', inplace=True)
san_francisco.reset_index(drop=True, inplace=True)
san_francisco

Unnamed: 0,name,city,latitude,longitude
0,Digg,San Francisco,37.764726,-122.394523
1,Twitter,San Francisco,37.776805,-122.416924
2,StumbleUpon,San Francisco,37.775196,-122.419204
3,Scribd,San Francisco,37.789634,-122.404052
4,Powerset,San Francisco,37.778613,-122.395289
...,...,...,...,...
606,Calypso Technology,San Francisco,37.789318,-122.400655
607,Roc2Loc,San Francisco,37.782475,-122.407764
608,Danoo,San Francisco,37.788852,-122.400863
609,DJ Nitrogen,San Francisco,37.784174,-122.449408


# FOURSQUARE

In [8]:
load_dotenv()
token_fsq = os.getenv("token_foursquare")

In [9]:
def foursquare_cat (category, lat, lon):

    url = f"https://api.foursquare.com/v3/places/search?ll={lat}%2C{lon}&categories={category}&limit=1"

    headers = {"accept": "application/json", "Authorization": token_fsq}
    response = requests.get(url, headers=headers).json()

    list = []
    for i in response["results"]:

        lat = i["geocodes"]["main"]["latitude"]
        lon = i["geocodes"]["main"]["longitude"]

        list.append(lat)
        list.append(lon)
    
    return list

In [10]:
def foursquare_query (query, lat, lon):

    url = f"https://api.foursquare.com/v3/places/search?query={query}&ll={lat}%2C{lon}&limit=1"

    headers = {"accept": "application/json", "Authorization": token_fsq}
    response = requests.get(url, headers=headers).json()

    list = []
    for i in response["results"]:

        lat = i["geocodes"]["main"]["latitude"]
        lon = i["geocodes"]["main"]["longitude"]

        list.append(lat)
        list.append(lon)
    
    return list

In [11]:
category = 16032 #Outdoor Parks
park=[]
for i in zip(san_francisco["latitude"],san_francisco["longitude"]):
    park.append(foursquare_cat(category,i[0],i[1]))
san_francisco["nearest_park"] = park
san_francisco

Unnamed: 0,name,city,latitude,longitude,nearest_park
0,Digg,San Francisco,37.764726,-122.394523,"[37.759393, -122.40382]"
1,Twitter,San Francisco,37.776805,-122.416924,"[37.792234, -122.41228]"
2,StumbleUpon,San Francisco,37.775196,-122.419204,"[37.792234, -122.41228]"
3,Scribd,San Francisco,37.789634,-122.404052,"[37.792234, -122.41228]"
4,Powerset,San Francisco,37.778613,-122.395289,"[37.784655, -122.402441]"
...,...,...,...,...,...
606,Calypso Technology,San Francisco,37.789318,-122.400655,"[37.789031, -122.396892]"
607,Roc2Loc,San Francisco,37.782475,-122.407764,"[37.784655, -122.402441]"
608,Danoo,San Francisco,37.788852,-122.400863,"[37.784655, -122.402441]"
609,DJ Nitrogen,San Francisco,37.784174,-122.449408,"[37.790161, -122.437677]"


In [12]:
query = "starbucks" #Starbucks
starbucks=[]
for i in zip(san_francisco["latitude"],san_francisco["longitude"]):
    starbucks.append(foursquare_query(query,i[0],i[1]))
san_francisco["nearest_starbucks"] = starbucks
san_francisco

Unnamed: 0,name,city,latitude,longitude,nearest_park,nearest_starbucks
0,Digg,San Francisco,37.764726,-122.394523,"[37.759393, -122.40382]","[37.767121, -122.393738]"
1,Twitter,San Francisco,37.776805,-122.416924,"[37.792234, -122.41228]","[37.777119, -122.419416]"
2,StumbleUpon,San Francisco,37.775196,-122.419204,"[37.792234, -122.41228]","[37.784144, -122.407429]"
3,Scribd,San Francisco,37.789634,-122.404052,"[37.792234, -122.41228]","[37.788371, -122.393555]"
4,Powerset,San Francisco,37.778613,-122.395289,"[37.784655, -122.402441]","[37.788371, -122.393555]"
...,...,...,...,...,...,...
606,Calypso Technology,San Francisco,37.789318,-122.400655,"[37.789031, -122.396892]","[37.788371, -122.393555]"
607,Roc2Loc,San Francisco,37.782475,-122.407764,"[37.784655, -122.402441]","[37.784144, -122.407429]"
608,Danoo,San Francisco,37.788852,-122.400863,"[37.784655, -122.402441]","[37.788371, -122.393555]"
609,DJ Nitrogen,San Francisco,37.784174,-122.449408,"[37.790161, -122.437677]","[37.799192, -122.449395]"


In [13]:
category = 19040 #International Airports
airport=[]
for i in zip(san_francisco["latitude"],san_francisco["longitude"]):
    airport.append(foursquare_cat(category,i[0],i[1]))
san_francisco["nearest_airport"] = airport
san_francisco

Unnamed: 0,name,city,latitude,longitude,nearest_park,nearest_starbucks,nearest_airport
0,Digg,San Francisco,37.764726,-122.394523,"[37.759393, -122.40382]","[37.767121, -122.393738]","[37.624123, -122.395506]"
1,Twitter,San Francisco,37.776805,-122.416924,"[37.792234, -122.41228]","[37.777119, -122.419416]","[37.624123, -122.395506]"
2,StumbleUpon,San Francisco,37.775196,-122.419204,"[37.792234, -122.41228]","[37.784144, -122.407429]","[37.624123, -122.395506]"
3,Scribd,San Francisco,37.789634,-122.404052,"[37.792234, -122.41228]","[37.788371, -122.393555]","[37.624123, -122.395506]"
4,Powerset,San Francisco,37.778613,-122.395289,"[37.784655, -122.402441]","[37.788371, -122.393555]","[37.624123, -122.395506]"
...,...,...,...,...,...,...,...
606,Calypso Technology,San Francisco,37.789318,-122.400655,"[37.789031, -122.396892]","[37.788371, -122.393555]","[37.624123, -122.395506]"
607,Roc2Loc,San Francisco,37.782475,-122.407764,"[37.784655, -122.402441]","[37.784144, -122.407429]","[37.624123, -122.395506]"
608,Danoo,San Francisco,37.788852,-122.400863,"[37.784655, -122.402441]","[37.788371, -122.393555]","[37.624123, -122.395506]"
609,DJ Nitrogen,San Francisco,37.784174,-122.449408,"[37.790161, -122.437677]","[37.799192, -122.449395]","[37.624123, -122.395506]"


In [14]:
category = 10032 #Night Clubs
night_clubs=[]
for i in zip(san_francisco["latitude"],san_francisco["longitude"]):
    night_clubs.append(foursquare_cat(category,i[0],i[1]))
san_francisco["nearest_night_club"] = night_clubs
san_francisco

Unnamed: 0,name,city,latitude,longitude,nearest_park,nearest_starbucks,nearest_airport,nearest_night_club
0,Digg,San Francisco,37.764726,-122.394523,"[37.759393, -122.40382]","[37.767121, -122.393738]","[37.624123, -122.395506]","[37.771782, -122.414442]"
1,Twitter,San Francisco,37.776805,-122.416924,"[37.792234, -122.41228]","[37.777119, -122.419416]","[37.624123, -122.395506]","[37.771782, -122.414442]"
2,StumbleUpon,San Francisco,37.775196,-122.419204,"[37.792234, -122.41228]","[37.784144, -122.407429]","[37.624123, -122.395506]","[37.771782, -122.414442]"
3,Scribd,San Francisco,37.789634,-122.404052,"[37.792234, -122.41228]","[37.788371, -122.393555]","[37.624123, -122.395506]","[37.771782, -122.414442]"
4,Powerset,San Francisco,37.778613,-122.395289,"[37.784655, -122.402441]","[37.788371, -122.393555]","[37.624123, -122.395506]","[37.771782, -122.414442]"
...,...,...,...,...,...,...,...,...
606,Calypso Technology,San Francisco,37.789318,-122.400655,"[37.789031, -122.396892]","[37.788371, -122.393555]","[37.624123, -122.395506]","[37.771782, -122.414442]"
607,Roc2Loc,San Francisco,37.782475,-122.407764,"[37.784655, -122.402441]","[37.784144, -122.407429]","[37.624123, -122.395506]","[37.771782, -122.414442]"
608,Danoo,San Francisco,37.788852,-122.400863,"[37.784655, -122.402441]","[37.788371, -122.393555]","[37.624123, -122.395506]","[37.771782, -122.414442]"
609,DJ Nitrogen,San Francisco,37.784174,-122.449408,"[37.790161, -122.437677]","[37.799192, -122.449395]","[37.624123, -122.395506]","[37.771782, -122.414442]"


In [15]:
category = 13377 #Vegan Restaurants
vegan_restaurants=[]
for i in zip(san_francisco["latitude"],san_francisco["longitude"]):
    vegan_restaurants.append(foursquare_cat(category,i[0],i[1]))
san_francisco["nearest_vegan_restaurant"] = vegan_restaurants
san_francisco

Unnamed: 0,name,city,latitude,longitude,nearest_park,nearest_starbucks,nearest_airport,nearest_night_club,nearest_vegan_restaurant
0,Digg,San Francisco,37.764726,-122.394523,"[37.759393, -122.40382]","[37.767121, -122.393738]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.786329, -122.417353]"
1,Twitter,San Francisco,37.776805,-122.416924,"[37.792234, -122.41228]","[37.777119, -122.419416]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.786329, -122.417353]"
2,StumbleUpon,San Francisco,37.775196,-122.419204,"[37.792234, -122.41228]","[37.784144, -122.407429]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.786329, -122.417353]"
3,Scribd,San Francisco,37.789634,-122.404052,"[37.792234, -122.41228]","[37.788371, -122.393555]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.786329, -122.417353]"
4,Powerset,San Francisco,37.778613,-122.395289,"[37.784655, -122.402441]","[37.788371, -122.393555]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.787182, -122.398432]"
...,...,...,...,...,...,...,...,...,...
606,Calypso Technology,San Francisco,37.789318,-122.400655,"[37.789031, -122.396892]","[37.788371, -122.393555]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.787182, -122.398432]"
607,Roc2Loc,San Francisco,37.782475,-122.407764,"[37.784655, -122.402441]","[37.784144, -122.407429]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.786329, -122.417353]"
608,Danoo,San Francisco,37.788852,-122.400863,"[37.784655, -122.402441]","[37.788371, -122.393555]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.787182, -122.398432]"
609,DJ Nitrogen,San Francisco,37.784174,-122.449408,"[37.790161, -122.437677]","[37.799192, -122.449395]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.763808, -122.467787]"


In [16]:
category = 18006 #Basketball Courts
basketball_courts=[]
for i in zip(san_francisco["latitude"],san_francisco["longitude"]):
    basketball_courts.append(foursquare_cat(category,i[0],i[1]))
san_francisco["nearest_basketball_court"] = basketball_courts
san_francisco

Unnamed: 0,name,city,latitude,longitude,nearest_park,nearest_starbucks,nearest_airport,nearest_night_club,nearest_vegan_restaurant,nearest_basketball_court
0,Digg,San Francisco,37.764726,-122.394523,"[37.759393, -122.40382]","[37.767121, -122.393738]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.786329, -122.417353]","[37.772031, -122.397898]"
1,Twitter,San Francisco,37.776805,-122.416924,"[37.792234, -122.41228]","[37.777119, -122.419416]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.786329, -122.417353]","[37.772031, -122.397898]"
2,StumbleUpon,San Francisco,37.775196,-122.419204,"[37.792234, -122.41228]","[37.784144, -122.407429]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.786329, -122.417353]","[37.772031, -122.397898]"
3,Scribd,San Francisco,37.789634,-122.404052,"[37.792234, -122.41228]","[37.788371, -122.393555]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.786329, -122.417353]","[37.772031, -122.397898]"
4,Powerset,San Francisco,37.778613,-122.395289,"[37.784655, -122.402441]","[37.788371, -122.393555]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.787182, -122.398432]","[37.772031, -122.397898]"
...,...,...,...,...,...,...,...,...,...,...
606,Calypso Technology,San Francisco,37.789318,-122.400655,"[37.789031, -122.396892]","[37.788371, -122.393555]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.787182, -122.398432]","[37.772031, -122.397898]"
607,Roc2Loc,San Francisco,37.782475,-122.407764,"[37.784655, -122.402441]","[37.784144, -122.407429]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.786329, -122.417353]","[37.772031, -122.397898]"
608,Danoo,San Francisco,37.788852,-122.400863,"[37.784655, -122.402441]","[37.788371, -122.393555]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.787182, -122.398432]","[37.772031, -122.397898]"
609,DJ Nitrogen,San Francisco,37.784174,-122.449408,"[37.790161, -122.437677]","[37.799192, -122.449395]","[37.624123, -122.395506]","[37.771782, -122.414442]","[37.763808, -122.467787]","[37.772031, -122.397898]"
