In [136]:
import os
import requests
import json
from dotenv import load_dotenv
import pandas as pd
from pandas import json_normalize
from pymongo import MongoClient
import geopandas as gpd
import numpy as np
import folium
from folium import Choropleth, Circle, Marker, Icon, Map
from folium.plugins import HeatMap, MarkerCluster
from cartoframes.viz import Map as Map2, Layer, popup_element

In [137]:
import sys

sys.path.append('../src')

from geo_functions import *

## Filtering to choose City

In [138]:
client = MongoClient("localhost:27017")
db = client['Ironhack']
c = db.get_collection('Companies')

### Filter for companies that have raised more than 1 Million (USD or EUR)

In [139]:
filter_ = {"$and":
             [{'offices': {'$exists': 1}},
             {'total_money_raised' : {'$regex' : '[$€].*[MB]'}}]}
projection = {'name':1, '_id':0, 'total_money_raised':1, 'offices.country_code': 1, "offices.state_code":1,'offices.city':1,'offices.latitude':1,'offices.longitude':1}
list_ = list(c.find(filter_, projection).sort('offices.country_code'))[20:]

In [140]:
df = pd.DataFrame(list_).explode("offices").reset_index(drop=True)
df = pd.concat([df, df["offices"].apply(pd.Series)], axis=1).reset_index(drop=True)
df.dropna(subset=["latitude"],inplace=True)
df.dropna(subset=["city"],inplace=True)
df.drop(columns= 'offices', inplace=True)
df.drop(columns= 0, inplace=True)
df[:5]


Unnamed: 0,name,total_money_raised,city,state_code,country_code,latitude,longitude
146,Rally Software,$68.9M,Boulder,CO,USA,40.010295,-105.242005
150,Rally Software,$68.9M,Amsterdam,,ANT,52.338098,4.86881
151,Rally Software,$68.9M,Buckinghamshire,,GBR,51.666619,-0.614003
155,GET Holding NV,€4.25M,Zaandijk,,NLD,52.475393,4.812833
159,Google,$555M,Mountain View,CA,USA,37.421972,-122.084143


In [141]:
df['city'].value_counts()[:5]

San Francisco    318
New York         221
Mountain View     90
Seattle           88
Palo Alto         84
Name: city, dtype: int64

In [142]:
df_SF = df[df['city'] == 'San Francisco']

### Filter for companies that focus on video games

In [143]:
filter_2 = {"$and": 
             [{"category_code":"games_video"},
             {'offices': {'$exists': 1}}]}
projection_2 = {'name':1, '_id':0, 'category_code':1, 'offices.country_code': 1, "offices.state_code":1,'offices.city':1, 'offices.latitude':1,'offices.longitude':1}
list_2 = list(c.find(filter_2, projection_2).sort('offices.country_code'))[20:]

In [144]:
df_2 = pd.DataFrame(list_2).explode("offices").reset_index(drop=True)
df_2 = pd.concat([df_2, df_2["offices"].apply(pd.Series)], axis=1).reset_index(drop=True)
df_2.drop(columns= 'offices', inplace=True)
df_2.dropna(subset=["city"],inplace=True)
df_2.dropna(subset=["latitude"],inplace=True)
df_2.drop(columns= 0, inplace=True)
df_2 = df_2[df_2['city']!='']
df_2[:5]

Unnamed: 0,name,category_code,city,state_code,country_code,latitude,longitude
226,Tweegee,games_video,Ramat Gan,,ISR,32.037363,34.80037
230,Tweegee,games_video,Ramat Gan,,ISR,32.037363,34.80037
234,CreationFlow,games_video,Ciudad de Buenos Aires,,ARG,-34.602858,-58.433954
239,2threads,games_video,sydney,,AUS,-33.884685,151.216427
240,Sporting Connections,games_video,Maroochydore,,AUS,-26.665908,153.088303


In [145]:
df_2['city'].value_counts()[:5]

New York         51
San Francisco    49
Los Angeles      25
London           20
Palo Alto        16
Name: city, dtype: int64

In [146]:
df_2_SF = df_2[df_2['city'] == 'San Francisco']

### Filter for companies that focus on design

In [147]:
filter_3 = {"$and": 
             [{"tag_list": {'$regex': "design"}},
             {'offices': {'$exists': 1}}]}
projection_3 = {'name':1, '_id':0, 'tag_list':1, 'offices.country_code': 1, "offices.state_code":1,'offices.city':1, 'offices.latitude':1,'offices.longitude':1}
list_3 = list(c.find(filter_3, projection_3).sort('offices.country_code'))

In [148]:
df_3 = pd.DataFrame(list_3).explode("offices").reset_index(drop=True)
df_3 = pd.concat([df_3, df_3["offices"].apply(pd.Series)], axis=1).reset_index(drop=True)
df_3.drop(columns= 'offices', inplace=True)
df_3.dropna(subset=["city"],inplace=True)
df_3.dropna(subset=["latitude"],inplace=True)
df_3.drop(columns= 0, inplace=True)
df_3 = df_3[df_3['city']!='']
df_3[:5]

Unnamed: 0,name,tag_list,city,state_code,country_code,latitude,longitude
101,Making Sense,"custom-software-development, email-marketing, ...",San Antonio,TX,USA,29.516352,-98.43676
102,Making Sense,"custom-software-development, email-marketing, ...",Buenos Aires,,ARG,-34.589281,-58.43297
103,Making Sense,"custom-software-development, email-marketing, ...",Mar del Plata,,ARG,-38.006092,-57.558244
105,Popego,"techcrunch50, tc50, web-service, interests, se...",San Francisco,CA,USA,37.778687,-122.421242
108,Popego,"techcrunch50, tc50, web-service, interests, se...",San Francisco,CA,USA,37.778687,-122.421242


In [149]:
df_3['city'].value_counts()[:5]

New York         18
London           14
San Francisco    12
Los Angeles       9
San Diego         8
Name: city, dtype: int64

In [150]:
df_3_SF = df_3[df_3['city'] == 'San Francisco']

In [151]:
# Im going to focus on San Francisco
# Lots of tech start ups with +1 M funding
# Video games companies nearby
# Design companies nearby

Create a collection with the offices in the Companies DB

In [152]:
o = db.get_collection('Offices')

In [153]:
new_collection = c.aggregate([
    {"$unwind":"$offices"},
    {"$match":{"offices.latitude":{"$ne":None}, "offices.longitude":{"$ne":None}}},
    {"$project":{"_id":0}}])
o.insert_many(new_collection)   

<pymongo.results.InsertManyResult at 0x28b26359140>

In [154]:
list_offices = o.find({},{"offices":1})

In [155]:
for company in list_offices:
    geojson = {
        "type":"Point",
        "coordinates":[company["offices"]["longitude"], company["offices"]["latitude"]]
    } 
    o.update_one(company, {"$set":{"geojson":geojson}})

In [156]:
o.create_index([("geojson", "2dsphere")])

'geojson_2dsphere'

Look at the offices located at San Francisco and show them in a map to see where we can locate our offices.

In [157]:
total_offices_worldwide = pd.DataFrame(list(o.find()))
total_offices_worldwide = total_offices_worldwide[['name', 'offices', 'geojson']]

In [158]:
total_offices_worldwide = pd.concat([total_offices_worldwide, total_offices_worldwide['offices'].apply(pd.Series)], axis=1).reset_index(drop=True)
total_offices_worldwide = total_offices_worldwide[['name', 'country_code', 'city', 'state_code', 'longitude', 'latitude', 'geojson']]
total_offices_worldwide = total_offices_worldwide[total_offices_worldwide["country_code"]=="USA"]
total_offices_worldwide.sample()

Unnamed: 0,name,country_code,city,state_code,longitude,latitude,geojson
1715,Joberator,USA,St Petersburg,FL,-82.634253,27.773404,"{'type': 'Point', 'coordinates': [-82.6342535,..."


In [159]:
total_offices_SF = total_offices_worldwide[total_offices_worldwide['city'] == 'San Francisco']
total_offices_SF.sample()

Unnamed: 0,name,country_code,city,state_code,longitude,latitude,geojson
364,Weebly,USA,San Francisco,CA,-122.400729,37.795555,"{'type': 'Point', 'coordinates': [-122.400729,..."


In [160]:
total_offices_SF.shape

(659, 7)

In [161]:
san_fran_map = Map(location= [37.76961,-122.4537724], zoom_start=12)
san_fran_map

In [162]:
sf_group = folium.FeatureGroup(name= 'SanFran Offices')
HeatMap(data = total_offices_SF[['latitude', 'longitude']], radius=10).add_to(sf_group)
sf_group.add_to(san_fran_map)

<folium.map.FeatureGroup at 0x28b0b541940>

In [163]:
df_group = folium.FeatureGroup(name= 'SanFran Companies that raised +1M')
HeatMap(data = df_SF[['latitude', 'longitude']], radius=10, gradient={'0':'Navy', '0.25':'Blue','0.5':'Green', '0.75':'Yellow','1': 'Red'}).add_to(df_group)
df_group.add_to(san_fran_map)

<folium.map.FeatureGroup at 0x28b25d3a760>

In [164]:
df_2_group = folium.FeatureGroup(name= 'SanFran Video Games Companies')
HeatMap(data = df_2_SF[['latitude', 'longitude']], radius=10, gradient = {0.4: 'yellow', 0.65: 'orange', 1: 'white'}).add_to(df_2_group)
df_2_group.add_to(san_fran_map)

<folium.map.FeatureGroup at 0x28b1a0a31c0>

In [165]:
df_3_group = folium.FeatureGroup(name= 'SanFran Design Companies')
HeatMap(data = df_3_SF[['latitude', 'longitude']], radius=15, gradient = {0.4: 'gray', 0.65: 'black', 1: 'white'}).add_to(df_3_group)
df_3_group.add_to(san_fran_map)

<folium.map.FeatureGroup at 0x28b1a0a3ca0>

In [166]:
folium.LayerControl(collapsed=False, position="topleft").add_to(san_fran_map)

<folium.map.LayerControl at 0x28b1a0a3f70>

In [167]:
point1_lat = 37.775762764480874
point1_long = -122.41617630836654

In [179]:
icon = Icon(color = "red",
            opacity = 0.1,
            prefix = "fa",
            icon = "map-pin",
            icon_color = "white"
)
point1 = Marker(location = [point1_lat, point1_long], tooltip="Potential Location 1", icon=icon)
point1.add_to(san_fran_map)

<folium.map.Marker at 0x28b25d3abb0>

In [169]:
point2_lat = 37.78984423851833
point2_long = -122.40163489291831

In [184]:
icon = Icon(color = "red",
            opacity = 0.1,
            prefix = "fa",
            icon = "map-pin",
            icon_color = "white"
)
point2 = Marker(location = [point2_lat, point2_long], tooltip="Potential Location 2", icon=icon)
point2.add_to(san_fran_map)

<folium.map.Marker at 0x28b1a0a3580>

In [185]:
san_fran_map

Download data from foursquare API

In [172]:
load_dotenv()
token_fsq = os.getenv("token_foursquare")

In [173]:
#Starbucks 
airports = '19031'
barbershop = '11062'
night_club = '10032'
basketball = '18006'
school = '12057'
tram_station = '19050'
rail_station = '19047'
port = '19023'
vegan = '13377'
parks = '16032'


In [174]:
print(total_offices_SF['longitude'].mean())
print(total_offices_SF['latitude'].mean())

-119.9610752564492
37.95546490333839


In [175]:
def foursquare_query (query, lat, lon):

    url = f"https://api.foursquare.com/v3/places/search?query={query}&ll={lat}%2C{lon}&limit=1"

    headers = {"accept": "application/json", "Authorization": token_fsq}
    response = requests.get(url, headers=headers).json()

    list = []
    for i in response["results"]:

        lat = i["geocodes"]["main"]["latitude"]
        lon = i["geocodes"]["main"]["longitude"]
        distance = i["distance"]

        list.append(distance)
    
    return list

In [176]:
query = 'Starbucks' #Starbucks
starbucks=[]
for i in zip(total_offices_SF["latitude"],total_offices_SF["longitude"]):
    starbucks.append(foursquare_query(query,i[0],i[1]))
total_offices_SF["Nearest Starbucks (m)"] = starbucks
total_offices_SF

ConnectionError: HTTPSConnectionPool(host='api.foursquare.com', port=443): Max retries exceeded with url: /v3/places/search?query=Starbucks&ll=37.762681%2C-122.400909&limit=1 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000028B1CABA760>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'))

In [None]:
total_offices_SF['nearest_Starbucks']= 

In [None]:
def foursquare_cat (category, lat, lon):

    url = f"https://api.foursquare.com/v3/places/search?ll={lat}%2C{lon}&categories={category}&limit=1"

    headers = {"accept": "application/json", "Authorization": token_fsq}
    response = requests.get(url, headers=headers).json()

    list = []
    for i in response["results"]:

        lat = i["geocodes"]["main"]["latitude"]
        lon = i["geocodes"]["main"]["longitude"]
        distance = i["distance"]

        list.append(distance)
    
    return list

In [None]:
category = 16032 #Outdoor Parks
park=[]
for i in zip(total_offices_SF["latitude"],total_offices_SF["longitude"]):
    park.append(foursquare_cat(category,i[0],i[1]))
total_offices_SF["nearest_park"] = park
total_offices_SF

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  total_offices_SF["nearest_park"] = park


Unnamed: 0,name,country_code,city,state_code,longitude,latitude,geojson,nearest_park
4,Digg,USA,San Francisco,CA,-122.394523,37.764726,"{'type': 'Point', 'coordinates': [-122.394523,...","[37.759393, -122.40382]"
12,Twitter,USA,San Francisco,CA,-122.416924,37.776805,"{'type': 'Point', 'coordinates': [-122.4169244...","[37.792234, -122.41228]"
13,StumbleUpon,USA,San Francisco,CA,-122.419204,37.775196,"{'type': 'Point', 'coordinates': [-122.419204,...","[37.792234, -122.41228]"
15,Scribd,USA,San Francisco,CA,-122.404052,37.789634,"{'type': 'Point', 'coordinates': [-122.404052,...","[37.792234, -122.41228]"
28,Powerset,USA,San Francisco,CA,-122.395289,37.778613,"{'type': 'Point', 'coordinates': [-122.395289,...","[37.784655, -122.402441]"
...,...,...,...,...,...,...,...,...
10606,Danoo,USA,San Francisco,CA,-122.400863,37.788852,"{'type': 'Point', 'coordinates': [-122.4008634...","[37.784655, -122.402441]"
10611,UCWeb,USA,San Francisco,CA,-122.401322,37.781476,"{'type': 'Point', 'coordinates': [-122.4013217...","[37.784655, -122.402441]"
10665,Grassroots,USA,San Francisco,CA,-122.401850,37.790346,"{'type': 'Point', 'coordinates': [-122.40185, ...","[37.792234, -122.41228]"
10709,DJ Nitrogen,USA,San Francisco,CA,-122.449408,37.784174,"{'type': 'Point', 'coordinates': [-122.4494082...","[37.790161, -122.437677]"


In [None]:
#Starbucks_loc = foursquare_query('Starbucks', -122.4537724, 37.76961)

KeyError: 'results'

In [None]:
#gdf_starbucks = gpd.GeoDataFrame(df_starbucks, geometry=gpd.points_from_xy(df_starbucks["lon"], df_starbucks["lat"]))
#gdf_starbucks.shape

(50, 5)

#Scraping
page_design = requests.get('https://www.dexigner.com/directory/loc/Singapore/Firms')
soup_design = BeautifulSoup(page_design.content, 'html.parser')
title2 = soup_design.title.text # Get page title 
title2
​
design_companies = [c.getText().replace("h3", "").replace(">", "").strip() for c in soup_design.find_all("h3")]
design_companies