In [1]:
#Libraries

from pymongo import MongoClient
import pandas as pd
import os
from dotenv import load_dotenv
import requests
import json
from dotenv import load_dotenv

In [2]:
client = MongoClient("localhost:27017")
db = client["ironhack"]
c = db.get_collection("companies")

In [3]:
def mongo_extract (filter):
    projection = {"_id":0, "name":1, "offices.city":1, "offices.latitude": 1, "offices.longitude":1}
    
    list_ = list(c.find(filter, projection))

    df = pd.DataFrame(list_).explode("offices").reset_index(drop=True)
    df = pd.concat([df, df["offices"].apply(pd.Series)], axis=1).reset_index(drop=True)
    df = df.drop(["offices"], axis = 1)
    df = df[df["city"]!=""]
    return df

In [4]:
money_df = mongo_extract({"total_money_raised" : {"$regex" : "[$€].*[MB]"}})
money_df["city"].value_counts()[:3]

San Francisco    436
New York         335
London           173
Name: city, dtype: int64

In [5]:
design_df = mongo_extract({"tag_list" : {"$regex" : "design"}})
design_df["city"].value_counts()[:3]

London           24
New York         23
San Francisco    20
Name: city, dtype: int64

In [6]:
gaming_df = mongo_extract({"category_code" : "games_video"})
gaming_df["city"].value_counts()[:3]

New York         75
San Francisco    68
London           36
Name: city, dtype: int64

# SAN FRANCISCO COMPANIES

In [7]:
def san_francisco_clean ():

    san_francisco_filter = {"offices.0.city" : "San Francisco"}
    dataframe = mongo_extract(san_francisco_filter)
    dataframe = dataframe[dataframe["city"]=="San Francisco"]
    dataframe.dropna(how='any', inplace=True)
    dataframe.reset_index(drop=True, inplace=True)

    return dataframe

In [8]:
san_francisco = san_francisco_clean()
san_francisco

Unnamed: 0,name,city,latitude,longitude
0,Digg,San Francisco,37.764726,-122.394523
1,Twitter,San Francisco,37.776805,-122.416924
2,StumbleUpon,San Francisco,37.775196,-122.419204
3,Scribd,San Francisco,37.789634,-122.404052
4,Powerset,San Francisco,37.778613,-122.395289
...,...,...,...,...
606,Calypso Technology,San Francisco,37.789318,-122.400655
607,Roc2Loc,San Francisco,37.782475,-122.407764
608,Danoo,San Francisco,37.788852,-122.400863
609,DJ Nitrogen,San Francisco,37.784174,-122.449408


# FOURSQUARE

In [9]:
load_dotenv()
token_fsq = os.getenv("token_foursquare")

In [10]:
def foursquare_cat (category, name, df):
    
    distance = []
    lat = []
    lon = []
    
    for i in range(len(df)):
        url = f"https://api.foursquare.com/v3/places/search?ll={df['latitude'][i]}%2C{df['longitude'][i]}&categories={category}&limit=1"
        headers = {"accept": "application/json", "Authorization": token_fsq}
        response = requests.get(url, headers=headers).json()
        
        for x in response["results"]:
            distance.append(x["distance"])
            lat.append(x["geocodes"]["main"]["latitude"])
            lon.append(x["geocodes"]["main"]["longitude"])
        
    df[f"{name}_dist"] = pd.Series(distance)
    df[f"{name}_lat"] = pd.Series(lat)
    df[f"{name}_lon"] = pd.Series(lon)
    
    return df

In [11]:
def foursquare_query (query, df):
    
    distance = []
    lat = []
    lon = []
    
    for i in range(len(df)):
        url = f"https://api.foursquare.com/v3/places/search?query={query}&ll={df['latitude'][i]}%2C{df['longitude'][i]}&limit=1"
        headers = {"accept": "application/json", "Authorization": token_fsq}
        response = requests.get(url, headers=headers).json()
        
        for x in response["results"]:
            distance.append(x["distance"])
            lat.append(x["geocodes"]["main"]["latitude"])
            lon.append(x["geocodes"]["main"]["longitude"])
        
    df[f"{query}_dist"] = pd.Series(distance)
    df[f"{query}_lat"] = pd.Series(lat)
    df[f"{query}_lon"] = pd.Series(lon)
    
    return df

In [12]:
nearest_park = foursquare_cat(16032, "park", san_francisco) #Outdoor Parks
nearest_starbucks = foursquare_query("starbucks", nearest_park) #Starbucks
nearest_airport = foursquare_cat(19040, "airport", nearest_starbucks) #International Airports
nearest_club = foursquare_cat(10035, "club", nearest_airport) #Dance, music and theatre
nearest_vegan = foursquare_cat(13377, "vegan", nearest_club) #Vegan Restaurants
nearest_basket_court = foursquare_cat(18006, "basket_court", nearest_vegan) #Basketball Courts

In [18]:
final_df = nearest_basket_court.dropna(how='any')
final_df

Unnamed: 0,name,city,latitude,longitude,park_dist,park_lat,park_lon,starbucks_dist,starbucks_lat,starbucks_lon,...,airport_lon,club_dist,club_lat,club_lon,vegan_dist,vegan_lat,vegan_lon,basket_court_dist,basket_court_lat,basket_court_lon
0,Digg,San Francisco,37.764726,-122.394523,1021.0,37.759393,-122.403820,289.0,37.767121,-122.393738,...,-122.395506,2701.0,37.776311,-122.421407,3119.0,37.786329,-122.417353,852.0,37.772031,-122.397898
1,Twitter,San Francisco,37.776805,-122.416924,1757.0,37.792234,-122.412280,223.0,37.777119,-122.419416,...,-122.395506,408.0,37.776311,-122.421407,1050.0,37.786329,-122.417353,1754.0,37.772031,-122.397898
2,StumbleUpon,San Francisco,37.775196,-122.419204,1984.0,37.792234,-122.412280,1424.0,37.784144,-122.407429,...,-122.395506,241.0,37.776311,-122.421407,1240.0,37.786329,-122.417353,1903.0,37.772031,-122.397898
3,Scribd,San Francisco,37.789634,-122.404052,764.0,37.792234,-122.412280,927.0,37.788371,-122.393555,...,-122.395506,2130.0,37.776311,-122.421407,1223.0,37.786329,-122.417353,2041.0,37.772031,-122.397898
4,Powerset,San Francisco,37.778613,-122.395289,938.0,37.784655,-122.402441,1080.0,37.788371,-122.393555,...,-122.395506,2319.0,37.776311,-122.421407,1005.0,37.787182,-122.398432,780.0,37.772031,-122.397898
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
588,Talent Basket,San Francisco,37.775196,-122.419204,259.0,37.784655,-122.402441,329.0,37.808309,-122.413086,...,-122.395506,3548.0,37.776311,-122.421407,2565.0,37.786329,-122.417353,1903.0,37.772031,-122.397898
589,Viator,San Francisco,37.786710,-122.400840,1984.0,37.792234,-122.412280,225.0,37.777119,-122.419416,...,-122.395506,3140.0,37.776311,-122.421407,1240.0,37.786329,-122.417353,1903.0,37.772031,-122.397898
590,Finest Expert,San Francisco,37.775196,-122.419204,258.0,37.802589,-122.405865,1263.0,37.766742,-122.409414,...,-122.395506,2459.0,37.776311,-122.421407,1240.0,37.786329,-122.417353,1257.0,37.772031,-122.397898
591,Recurrent Energy,San Francisco,37.805289,-122.404448,1229.0,37.792234,-122.412280,171.0,37.766742,-122.409414,...,-122.395506,2883.0,37.802812,-122.449036,2145.0,37.786329,-122.417353,3698.0,37.476549,-122.202839
