In [184]:
import os
import requests
import json
from dotenv import load_dotenv
import pandas as pd
from pandas import json_normalize
from pymongo import MongoClient
import geopandas as gpd
import numpy as np
import folium
from folium import Choropleth, Circle, Marker, Icon, Map
from folium.plugins import HeatMap, MarkerCluster
from cartoframes.viz import Map as Map2, Layer, popup_element

In [185]:
import sys

sys.path.append('../src')

from geo_functions import *

## Filtering to choose City

In [186]:
client = MongoClient("localhost:27017")
db = client['Ironhack']
c = db.get_collection('Companies')

### Filter for companies that have raised more than 1 Million (USD or EUR)

In [187]:
filter_ = {"$and":
             [{'offices': {'$exists': 1}},
             {'total_money_raised' : {'$regex' : '[$€].*[MB]'}}]}
projection = {'name':1, '_id':0, 'total_money_raised':1, 'offices.country_code': 1, "offices.state_code":1,'offices.city':1,'offices.latitude':1,'offices.longitude':1}
list_ = list(c.find(filter_, projection).sort('offices.country_code'))[20:]

In [188]:
df = pd.DataFrame(list_).explode("offices").reset_index(drop=True)
df = pd.concat([df, df["offices"].apply(pd.Series)], axis=1).reset_index(drop=True)
df.dropna(subset=["latitude"],inplace=True)
df.dropna(subset=["city"],inplace=True)
df.drop(columns= 'offices', inplace=True)
df.drop(columns= 0, inplace=True)
df[:5]


Unnamed: 0,name,total_money_raised,city,state_code,country_code,latitude,longitude
146,Rally Software,$68.9M,Boulder,CO,USA,40.010295,-105.242005
150,Rally Software,$68.9M,Amsterdam,,ANT,52.338098,4.86881
151,Rally Software,$68.9M,Buckinghamshire,,GBR,51.666619,-0.614003
155,GET Holding NV,€4.25M,Zaandijk,,NLD,52.475393,4.812833
159,Google,$555M,Mountain View,CA,USA,37.421972,-122.084143


In [189]:
df['city'].value_counts()[:5]

San Francisco    318
New York         221
Mountain View     90
Seattle           88
Palo Alto         84
Name: city, dtype: int64

In [190]:
df_SF = df[df['city'] == 'San Francisco']

### Filter for companies that focus on video games

In [191]:
filter_2 = {"$and": 
             [{"category_code":"games_video"},
             {'offices': {'$exists': 1}}]}
projection_2 = {'name':1, '_id':0, 'category_code':1, 'offices.country_code': 1, "offices.state_code":1,'offices.city':1, 'offices.latitude':1,'offices.longitude':1}
list_2 = list(c.find(filter_2, projection_2).sort('offices.country_code'))[20:]

In [192]:
df_2 = pd.DataFrame(list_2).explode("offices").reset_index(drop=True)
df_2 = pd.concat([df_2, df_2["offices"].apply(pd.Series)], axis=1).reset_index(drop=True)
df_2.drop(columns= 'offices', inplace=True)
df_2.dropna(subset=["city"],inplace=True)
df_2.dropna(subset=["latitude"],inplace=True)
df_2.drop(columns= 0, inplace=True)
df_2 = df_2[df_2['city']!='']
df_2[:5]

Unnamed: 0,name,category_code,city,state_code,country_code,latitude,longitude
226,Tweegee,games_video,Ramat Gan,,ISR,32.037363,34.80037
230,Tweegee,games_video,Ramat Gan,,ISR,32.037363,34.80037
234,CreationFlow,games_video,Ciudad de Buenos Aires,,ARG,-34.602858,-58.433954
239,2threads,games_video,sydney,,AUS,-33.884685,151.216427
240,Sporting Connections,games_video,Maroochydore,,AUS,-26.665908,153.088303


In [193]:
df_2['city'].value_counts()[:5]

New York         51
San Francisco    49
Los Angeles      25
London           20
Palo Alto        16
Name: city, dtype: int64

In [194]:
df_2_SF = df_2[df_2['city'] == 'San Francisco']

### Filter for companies that focus on design

In [195]:
filter_3 = {"$and": 
             [{"tag_list": {'$regex': "design"}},
             {'offices': {'$exists': 1}}]}
projection_3 = {'name':1, '_id':0, 'tag_list':1, 'offices.country_code': 1, "offices.state_code":1,'offices.city':1, 'offices.latitude':1,'offices.longitude':1}
list_3 = list(c.find(filter_3, projection_3).sort('offices.country_code'))

In [196]:
df_3 = pd.DataFrame(list_3).explode("offices").reset_index(drop=True)
df_3 = pd.concat([df_3, df_3["offices"].apply(pd.Series)], axis=1).reset_index(drop=True)
df_3.drop(columns= 'offices', inplace=True)
df_3.dropna(subset=["city"],inplace=True)
df_3.dropna(subset=["latitude"],inplace=True)
df_3.drop(columns= 0, inplace=True)
df_3 = df_3[df_3['city']!='']
df_3[:5]

Unnamed: 0,name,tag_list,city,state_code,country_code,latitude,longitude
101,Making Sense,"custom-software-development, email-marketing, ...",San Antonio,TX,USA,29.516352,-98.43676
102,Making Sense,"custom-software-development, email-marketing, ...",Buenos Aires,,ARG,-34.589281,-58.43297
103,Making Sense,"custom-software-development, email-marketing, ...",Mar del Plata,,ARG,-38.006092,-57.558244
105,Popego,"techcrunch50, tc50, web-service, interests, se...",San Francisco,CA,USA,37.778687,-122.421242
108,Popego,"techcrunch50, tc50, web-service, interests, se...",San Francisco,CA,USA,37.778687,-122.421242


In [197]:
df_3['city'].value_counts()[:5]

New York         18
London           14
San Francisco    12
Los Angeles       9
San Diego         8
Name: city, dtype: int64

In [198]:
df_3_SF = df_3[df_3['city'] == 'San Francisco']

In [199]:
# Im going to focus on San Francisco
# Lots of tech start ups with +1 M funding
# Video games companies nearby
# Design companies nearby

Create a collection with the offices in the Companies DB

In [200]:
o = db.get_collection('Offices')

In [201]:
new_collection = c.aggregate([
    {"$unwind":"$offices"},
    {"$match":{"offices.latitude":{"$ne":None}, "offices.longitude":{"$ne":None}}},
    {"$project":{"_id":0}}])
o.insert_many(new_collection)   

<pymongo.results.InsertManyResult at 0x25f29c1e7c0>

In [202]:
list_offices = o.find({},{"offices":1})

In [203]:
for company in list_offices:
    geojson = {
        "type":"Point",
        "coordinates":[company["offices"]["longitude"], company["offices"]["latitude"]]
    } 
    o.update_one(company, {"$set":{"geojson":geojson}})

In [204]:
o.create_index([("geojson", "2dsphere")])

'geojson_2dsphere'

Look at the offices located at San Francisco and show them in a map to see where we can locate our offices.

In [205]:
total_offices_worldwide = pd.DataFrame(list(o.find()))
total_offices_worldwide = total_offices_worldwide[['name', 'offices', 'geojson']]

In [206]:
total_offices_worldwide = pd.concat([total_offices_worldwide, total_offices_worldwide['offices'].apply(pd.Series)], axis=1).reset_index(drop=True)
total_offices_worldwide = total_offices_worldwide[['name', 'country_code', 'city', 'state_code', 'longitude', 'latitude', 'geojson']]
total_offices_worldwide = total_offices_worldwide[total_offices_worldwide["country_code"]=="USA"]
total_offices_worldwide.sample()

Unnamed: 0,name,country_code,city,state_code,longitude,latitude,geojson
11380,MySQL,USA,Cupertino,CA,-122.030201,37.322806,"{'type': 'Point', 'coordinates': [-122.030201,..."


In [207]:
total_offices_SF = total_offices_worldwide[total_offices_worldwide['city'] == 'San Francisco']
total_offices_SF.sample()

Unnamed: 0,name,country_code,city,state_code,longitude,latitude,geojson
4121,Huddler,USA,San Francisco,CA,-122.401362,37.789321,"{'type': 'Point', 'coordinates': [-122.4013624..."


In [208]:
total_offices_SF.shape

(1977, 7)

In [209]:
san_fran_map = Map(location= [37.76961,-122.4537724], zoom_start=12)
san_fran_map

In [210]:
sf_group = folium.FeatureGroup(name= 'SanFran Offices')
HeatMap(data = total_offices_SF[['latitude', 'longitude']], radius=10).add_to(sf_group)
sf_group.add_to(san_fran_map)

<folium.map.FeatureGroup at 0x25f11aa9940>

In [211]:
df_group = folium.FeatureGroup(name= 'SanFran Companies that raised +1M')
HeatMap(data = df_SF[['latitude', 'longitude']], radius=10, gradient={'0':'Navy', '0.25':'Blue','0.5':'Green', '0.75':'Yellow','1': 'Red'}).add_to(df_group)
df_group.add_to(san_fran_map)

<folium.map.FeatureGroup at 0x25f07374880>

In [212]:
df_2_group = folium.FeatureGroup(name= 'SanFran Video Games Companies')
HeatMap(data = df_2_SF[['latitude', 'longitude']], radius=10, gradient = {0.4: 'yellow', 0.65: 'orange', 1: 'white'}).add_to(df_2_group)
df_2_group.add_to(san_fran_map)

<folium.map.FeatureGroup at 0x25f1d4bf820>

In [213]:
df_3_group = folium.FeatureGroup(name= 'SanFran Design Companies')
HeatMap(data = df_3_SF[['latitude', 'longitude']], radius=15, gradient = {0.4: 'gray', 0.65: 'black', 1: 'white'}).add_to(df_3_group)
df_3_group.add_to(san_fran_map)

<folium.map.FeatureGroup at 0x25f1d4bf7c0>

In [214]:
folium.LayerControl(collapsed=False, position="topleft").add_to(san_fran_map)

<folium.map.LayerControl at 0x25f1d4bf2e0>

In [215]:
point1_lat = 37.7767445077143
point1_long = -122.41710623836829

icon = Icon(color = "red",
            opacity = 0.1,
            prefix = "fa",
            icon = "map-pin",
            icon_color = "white"
)
point1 = Marker(location = [point1_lat, point1_long], tooltip="Potential Location 1", icon=icon)
point1.add_to(san_fran_map)

<folium.map.Marker at 0x25f1d4bf760>

In [216]:
point2_lat = 37.781292586285396
point2_long = -122.40804408884459

icon = Icon(color = "red",
            opacity = 0.1,
            prefix = "fa",
            icon = "map-pin",
            icon_color = "white"
)
point2 = Marker(location = [point2_lat, point2_long], tooltip="Potential Location 2", icon=icon)
point2.add_to(san_fran_map)

<folium.map.Marker at 0x25f1d4bf340>

In [217]:
point3_lat = 37.78984423851833
point3_long = -122.40163489291831

icon = Icon(color = "red",
            opacity = 0.1,
            prefix = "fa",
            icon = "map-pin",
            icon_color = "white"
)
point3 = Marker(location = [point3_lat, point3_long], tooltip="Potential Location 3", icon=icon)
point3.add_to(san_fran_map)

<folium.map.Marker at 0x25f1d4bf610>

In [218]:
point4_lat = 37.78156089058974
point4_long = -122.39269183945797

icon = Icon(color = "red",
            opacity = 0.1,
            prefix = "fa",
            icon = "map-pin",
            icon_color = "white"
)
point4 = Marker(location = [point4_lat, point4_long], tooltip="Potential Location 4", icon=icon)
point4.add_to(san_fran_map)

<folium.map.Marker at 0x25f3220b0d0>

In [219]:
san_fran_map

Create a dataframe with the main potential locations for our offices

In [295]:
key_locations_ = pd.DataFrame({
'point1' : [point1_lat, point1_long],
'point2' : [point2_lat, point2_long],
'point3' : [point3_lat, point3_long],
'point4' : [point4_lat, point4_long]}, 
index = ['latitude', 'longitude'] )

key_locations = key_locations_.transpose()
key_locations


Unnamed: 0,latitude,longitude
point1,37.776745,-122.417106
point2,37.781293,-122.408044
point3,37.789844,-122.401635
point4,37.781561,-122.392692


Download data from foursquare API

In [296]:
load_dotenv()
token_fsq = os.getenv("token_foursquare")

In [297]:
#Starbucks 
parks = '16032'
airports = '19031'
#dog_salon
night_club = '10032'
basketball = '18006'
school = '12057'
rail_station = '19047'
port = '19023'
vegan = '13377'

In [298]:
def foursquare_query (query, lat, lon):

    url = f"https://api.foursquare.com/v3/places/search?query={query}&ll={lat}%2C{lon}&limit=1"

    headers = {"accept": "application/json", "Authorization": token_fsq}
    response = requests.get(url, headers=headers).json()

    list = []
    for i in response["results"]:
        distance = i["distance"]

        list.append(distance)
    
    return list

In [299]:
query = 'Starbucks' #Starbucks
starbucks=[]
for i in zip(key_locations["latitude"],key_locations["longitude"]):
    starbucks.append(foursquare_query(query,i[0],i[1]))
key_locations["Nearest Starbucks (m)"] = starbucks
key_locations

Unnamed: 0,latitude,longitude,Nearest Starbucks (m)
point1,37.776745,-122.417106,[1172]
point2,37.781293,-122.408044,[506]
point3,37.789844,-122.401635,[724]
point4,37.781561,-122.392692,[747]


In [300]:
def foursquare_cat (category, lat, lon):

    url = f"https://api.foursquare.com/v3/places/search?ll={lat}%2C{lon}&categories={category}&limit=1"

    headers = {"accept": "application/json", "Authorization": token_fsq}
    response = requests.get(url, headers=headers).json()

    list = []
    for i in response["results"]:

        lat = i["geocodes"]["main"]["latitude"]
        lon = i["geocodes"]["main"]["longitude"]
        distance = i["distance"]

        list.append(distance)
    
    return list

In [301]:
category = 16032 #Outdoor Parks
park=[]
for i in zip(key_locations["latitude"],key_locations["longitude"]):
    park.append(foursquare_cat(category,i[0],i[1]))
key_locations["Nearest park (m)"] = park
key_locations

Unnamed: 0,latitude,longitude,Nearest Starbucks (m),Nearest park (m)
point1,37.776745,-122.417106,[1172],[1767]
point2,37.781293,-122.408044,[506],[620]
point3,37.789844,-122.401635,[724],[959]
point4,37.781561,-122.392692,[747],[937]


In [302]:
category = 19031 #Airports
airport =[]
for i in zip(key_locations["latitude"],key_locations["longitude"]):
    airport.append(foursquare_cat(category,i[0],i[1]))
key_locations["Nearest Airport (m)"] = airport
key_locations

Unnamed: 0,latitude,longitude,Nearest Starbucks (m),Nearest park (m),Nearest Airport (m)
point1,37.776745,-122.417106,[1172],[1767],[17869]
point2,37.781293,-122.408044,[506],[620],[18275]
point3,37.789844,-122.401635,[724],[959],[19177]
point4,37.781561,-122.392692,[747],[937],[18224]


In [303]:
#Barbershop ¿for dogs?, doing it one by one because using the id for pets grooming gave wrong results
dog_salon = []

dog_salon.append(foursquare_query('the dog barber', 37.776745, -122.417106))
dog_salon.append(foursquare_query('Doggie Day Spaw', 37.781293, -122.408044))
dog_salon.append(foursquare_query('Alpha Gromming Pet Salon', 37.789844, -122.401635))
dog_salon.append(foursquare_query('Doggie Day Spaw', 37.781561, -122.392692))

key_locations["Nearest Dog Salon (m)"] = dog_salon
key_locations


Unnamed: 0,latitude,longitude,Nearest Starbucks (m),Nearest park (m),Nearest Airport (m),Nearest Dog Salon (m)
point1,37.776745,-122.417106,[1172],[1767],[17869],[1015]
point2,37.781293,-122.408044,[506],[620],[18275],[556]
point3,37.789844,-122.401635,[724],[959],[19177],[409]
point4,37.781561,-122.392692,[747],[937],[18224],[919]


In [304]:
query = 'night club' #Night Clubs
night_clubs =[]
for i in zip(key_locations["latitude"],key_locations["longitude"]):
    night_clubs.append(foursquare_query(query,i[0],i[1]))
key_locations["Nearest Night Clubs (m)"] = night_clubs
key_locations

Unnamed: 0,latitude,longitude,Nearest Starbucks (m),Nearest park (m),Nearest Airport (m),Nearest Dog Salon (m),Nearest Night Clubs (m)
point1,37.776745,-122.417106,[1172],[1767],[17869],[1015],[1283]
point2,37.781293,-122.408044,[506],[620],[18275],[556],[1541]
point3,37.789844,-122.401635,[724],[959],[19177],[409],[2520]
point4,37.781561,-122.392692,[747],[937],[18224],[919],[1974]


In [305]:
category = 18006 #Basketball
basketball =[]
for i in zip(key_locations["latitude"],key_locations["longitude"]):
    basketball.append(foursquare_cat(category,i[0],i[1]))
key_locations["Nearest Basketball Stadium (m)"] = basketball
key_locations

Unnamed: 0,latitude,longitude,Nearest Starbucks (m),Nearest park (m),Nearest Airport (m),Nearest Dog Salon (m),Nearest Night Clubs (m),Nearest Basketball Stadium (m)
point1,37.776745,-122.417106,[1172],[1767],[17869],[1015],[1283],[1767]
point2,37.781293,-122.408044,[506],[620],[18275],[556],[1541],[1369]
point3,37.789844,-122.401635,[724],[959],[19177],[409],[2520],[2019]
point4,37.781561,-122.392692,[747],[937],[18224],[919],[1974],[1166]


In [306]:
query = 'school' #Night Clubs
school =[]
for i in zip(key_locations["latitude"],key_locations["longitude"]):
    school.append(foursquare_query(query,i[0],i[1]))
key_locations["Nearest School (m)"] = school
key_locations

Unnamed: 0,latitude,longitude,Nearest Starbucks (m),Nearest park (m),Nearest Airport (m),Nearest Dog Salon (m),Nearest Night Clubs (m),Nearest Basketball Stadium (m),Nearest School (m)
point1,37.776745,-122.417106,[1172],[1767],[17869],[1015],[1283],[1767],[497]
point2,37.781293,-122.408044,[506],[620],[18275],[556],[1541],[1369],[1019]
point3,37.789844,-122.401635,[724],[959],[19177],[409],[2520],[2019],[2124]
point4,37.781561,-122.392692,[747],[937],[18224],[919],[1974],[1166],[2056]


In [307]:
category = 19047 #Rail Station
rail_station =[]
for i in zip(key_locations["latitude"],key_locations["longitude"]):
    rail_station.append(foursquare_cat(category,i[0],i[1]))
key_locations["Nearest Rail Station (m)"] = rail_station
key_locations

Unnamed: 0,latitude,longitude,Nearest Starbucks (m),Nearest park (m),Nearest Airport (m),Nearest Dog Salon (m),Nearest Night Clubs (m),Nearest Basketball Stadium (m),Nearest School (m),Nearest Rail Station (m)
point1,37.776745,-122.417106,[1172],[1767],[17869],[1015],[1283],[1767],[497],[242]
point2,37.781293,-122.408044,[506],[620],[18275],[556],[1541],[1369],[1019],[311]
point3,37.789844,-122.401635,[724],[959],[19177],[409],[2520],[2019],[2124],[550]
point4,37.781561,-122.392692,[747],[937],[18224],[919],[1974],[1166],[2056],[619]


In [308]:
category = 19023 #Port
port =[]
for i in zip(key_locations["latitude"],key_locations["longitude"]):
    port.append(foursquare_cat(category,i[0],i[1]))
key_locations["Nearest Port (m)"] = port
key_locations


Unnamed: 0,latitude,longitude,Nearest Starbucks (m),Nearest park (m),Nearest Airport (m),Nearest Dog Salon (m),Nearest Night Clubs (m),Nearest Basketball Stadium (m),Nearest School (m),Nearest Rail Station (m),Nearest Port (m)
point1,37.776745,-122.417106,[1172],[1767],[17869],[1015],[1283],[1767],[497],[242],[2945]
point2,37.781293,-122.408044,[506],[620],[18275],[556],[1541],[1369],[1019],[311],[2057]
point3,37.789844,-122.401635,[724],[959],[19177],[409],[2520],[2019],[2124],[550],[956]
point4,37.781561,-122.392692,[747],[937],[18224],[919],[1974],[1166],[2056],[619],[1689]


In [309]:
category = 13377 #Vegan Restaurant
vegan =[]
for i in zip(key_locations["latitude"],key_locations["longitude"]):
    vegan.append(foursquare_cat(category,i[0],i[1]))
key_locations["Nearest Vegan Restaurant (m)"] = vegan
key_locations

Unnamed: 0,latitude,longitude,Nearest Starbucks (m),Nearest park (m),Nearest Airport (m),Nearest Dog Salon (m),Nearest Night Clubs (m),Nearest Basketball Stadium (m),Nearest School (m),Nearest Rail Station (m),Nearest Port (m),Nearest Vegan Restaurant (m)
point1,37.776745,-122.417106,[1172],[1767],[17869],[1015],[1283],[1767],[497],[242],[2945],[1057]
point2,37.781293,-122.408044,[506],[620],[18275],[556],[1541],[1369],[1019],[311],[2057],[983]
point3,37.789844,-122.401635,[724],[959],[19177],[409],[2520],[2019],[2124],[550],[956],[389]
point4,37.781561,-122.392692,[747],[937],[18224],[919],[1974],[1166],[2056],[619],[1689],[820]


In [310]:
key_locations["Nearest Starbucks (m)"] = key_locations["Nearest Starbucks (m)"].apply(lambda x: x[0])
key_locations["Nearest park (m)"] = key_locations["Nearest park (m)"].apply(lambda x: x[0])
key_locations["Nearest Airport (m)"] = key_locations["Nearest Airport (m)"].apply(lambda x: x[0])
key_locations["Nearest Dog Salon (m)"] = key_locations["Nearest Dog Salon (m)"].apply(lambda x: x[0])
key_locations["Nearest Night Clubs (m)"] = key_locations["Nearest Night Clubs (m)"].apply(lambda x: x[0])
key_locations["Nearest Basketball Stadium (m)"] = key_locations["Nearest Basketball Stadium (m)"].apply(lambda x: x[0])
key_locations["Nearest School (m)"] = key_locations["Nearest School (m)"].apply(lambda x: x[0])
key_locations["Nearest Rail Station (m)"] = key_locations["Nearest Rail Station (m)"].apply(lambda x: x[0])
key_locations["Nearest Port (m)"] = key_locations["Nearest Port (m)"].apply(lambda x: x[0])
key_locations["Nearest Vegan Restaurant (m)"] = key_locations["Nearest Vegan Restaurant (m)"].apply(lambda x: x[0])

In [311]:
key_locations

Unnamed: 0,latitude,longitude,Nearest Starbucks (m),Nearest park (m),Nearest Airport (m),Nearest Dog Salon (m),Nearest Night Clubs (m),Nearest Basketball Stadium (m),Nearest School (m),Nearest Rail Station (m),Nearest Port (m),Nearest Vegan Restaurant (m)
point1,37.776745,-122.417106,1172,1767,17869,1015,1283,1767,497,242,2945,1057
point2,37.781293,-122.408044,506,620,18275,556,1541,1369,1019,311,2057,983
point3,37.789844,-122.401635,724,959,19177,409,2520,2019,2124,550,956,389
point4,37.781561,-122.392692,747,937,18224,919,1974,1166,2056,619,1689,820


In [274]:
#gdf_starbucks = gpd.GeoDataFrame(df_starbucks, geometry=gpd.points_from_xy(df_starbucks["lon"], df_starbucks["lat"]))
#gdf_starbucks.shape

#Scraping
page_design = requests.get('https://www.dexigner.com/directory/loc/Singapore/Firms')
soup_design = BeautifulSoup(page_design.content, 'html.parser')
title2 = soup_design.title.text # Get page title 
title2
​
design_companies = [c.getText().replace("h3", "").replace(">", "").strip() for c in soup_design.find_all("h3")]
design_companies