In [94]:
import os
import requests
import json
from dotenv import load_dotenv
import pandas as pd
from pandas import json_normalize
from pymongo import MongoClient
import geopandas as gpd
import numpy as np
import folium
from folium import Choropleth, Circle, Marker, Icon, Map
from folium.plugins import HeatMap, MarkerCluster
from cartoframes.viz import Map as Map2, Layer, popup_element

In [95]:
import sys

sys.path.append('../src')

from geo_functions import *

## Filtering to choose City

In [96]:
client = MongoClient("localhost:27017")
db = client['Ironhack']
c = db.get_collection('Companies')

### Filter for companies that have raised more than 1 Million (USD or EUR)

In [97]:
filter_ = {"$and":
             [{'offices': {'$exists': 1}},
             {'total_money_raised' : {'$regex' : '[$€].*[MB]'}}]}
projection = {'name':1, '_id':0, 'total_money_raised':1, 'offices.country_code': 1, "offices.state_code":1,'offices.city':1,'offices.latitude':1,'offices.longitude':1}
list_ = list(c.find(filter_, projection).sort('offices.country_code'))[20:]

In [98]:
df = pd.DataFrame(list_).explode("offices").reset_index(drop=True)
df = pd.concat([df, df["offices"].apply(pd.Series)], axis=1).reset_index(drop=True)
df.dropna(subset=["latitude"],inplace=True)
df.dropna(subset=["city"],inplace=True)
df.drop(columns= 'offices', inplace=True)
df.drop(columns= 0, inplace=True)
df[:5]


Unnamed: 0,name,total_money_raised,city,state_code,country_code,latitude,longitude
146,Rally Software,$68.9M,Boulder,CO,USA,40.010295,-105.242005
150,Rally Software,$68.9M,Amsterdam,,ANT,52.338098,4.86881
151,Rally Software,$68.9M,Buckinghamshire,,GBR,51.666619,-0.614003
155,GET Holding NV,€4.25M,Zaandijk,,NLD,52.475393,4.812833
159,Google,$555M,Mountain View,CA,USA,37.421972,-122.084143


In [99]:
df['city'].value_counts()[:5]

San Francisco    318
New York         221
Mountain View     90
Seattle           88
Palo Alto         84
Name: city, dtype: int64

### Filter for companies that focus on video games

In [100]:
filter_2 = {"$and": 
             [{"category_code":"games_video"},
             {'offices': {'$exists': 1}}]}
projection_2 = {'name':1, '_id':0, 'category_code':1, 'offices.country_code': 1, "offices.state_code":1,'offices.city':1}
list_2 = list(c.find(filter_2, projection_2).sort('offices.country_code'))[20:]

In [101]:
df_2 = pd.DataFrame(list_2).explode("offices").reset_index(drop=True)
df_2 = pd.concat([df_2, df_2["offices"].apply(pd.Series)], axis=1).reset_index(drop=True)
df_2.drop(columns= 'offices', inplace=True)
df_2.dropna(subset=["city"],inplace=True)
df_2.drop(columns= 0, inplace=True)
df_2 = df_2[df_2['city']!='']
df_2[:5]

Unnamed: 0,name,category_code,city,state_code,country_code
224,mysoogal,games_video,none,,ARA
225,Vostu,games_video,Buenos Aires,,ARG
226,Tweegee,games_video,Ramat Gan,,ISR
227,Tweegee,games_video,Istanbul,,TUR
228,Tweegee,games_video,Moscow,,RUS


In [102]:
df_2['city'].value_counts()[:5]

New York         75
San Francisco    68
London           36
Los Angeles      31
Paris            17
Name: city, dtype: int64

### Filter for companies that focus on design

In [103]:
filter_3 = {"$and": 
             [{"tag_list": {'$regex': "design"}},
             {'offices': {'$exists': 1}}]}
projection_3 = {'name':1, '_id':0, 'tag_list':1, 'offices.country_code': 1, "offices.state_code":1,'offices.city':1}
list_3 = list(c.find(filter_3, projection_3).sort('offices.country_code'))

In [134]:
df_3 = pd.DataFrame(list_3).explode("offices").reset_index(drop=True)
df_3 = pd.concat([df_3, df_3["offices"].apply(pd.Series)], axis=1).reset_index(drop=True)
df_3.drop(columns= 'offices', inplace=True)
df_3.dropna(subset=["city"],inplace=True)
df_3.drop(columns= 0, inplace=True)
df_3 = df_3[df_3['city']!='']
df_3[:5]

Unnamed: 0,name,tag_list,city,state_code,country_code
101,Making Sense,"custom-software-development, email-marketing, ...",San Antonio,TX,USA
102,Making Sense,"custom-software-development, email-marketing, ...",Buenos Aires,,ARG
103,Making Sense,"custom-software-development, email-marketing, ...",Mar del Plata,,ARG
104,Popego,"techcrunch50, tc50, web-service, interests, se...",Buenos Aires,,ARG
105,Popego,"techcrunch50, tc50, web-service, interests, se...",San Francisco,CA,USA


In [105]:
df_3['city'].value_counts()[:5]

London           24
New York         23
San Francisco    20
San Diego        11
Chennai          10
Name: city, dtype: int64

In [106]:
# Im going to focus on San Francisco
# Lots of tech start ups with +1 M funding
# Video games companies nearby
# Design companies nearby

Create a collection with the offices in the Companies DB

In [115]:
o = db.get_collection('Offices')

In [116]:
new_collection = c.aggregate([
    {"$unwind":"$offices"},
    {"$match":{"offices.latitude":{"$ne":None}, "offices.longitude":{"$ne":None}}},
    {"$project":{"_id":0}}])
o.insert_many(new_collection)   

<pymongo.results.InsertManyResult at 0x26800876480>

In [167]:
list_offices = o.find({},{"offices":1})

In [168]:
for company in list_offices:
    geojson = {
        "type":"Point",
        "coordinates":[company["offices"]["longitude"], company["offices"]["latitude"]]
    } 
    o.update_one(company, {"$set":{"geojson":geojson}})

In [169]:
o.create_index([("geojson", "2dsphere")])

'geojson_2dsphere'

In [175]:
total_offices_worldwide = pd.DataFrame(list(o.find()))
total_offices_worldwide = total_offices_worldwide[['name', 'offices', 'geojson']]

In [176]:
total_offices_worldwide = pd.concat([total_offices_worldwide, total_offices_worldwide['offices'].apply(pd.Series)], axis=1).reset_index(drop=True)
total_offices_worldwide = total_offices_worldwide[['name', 'country_code', 'city', 'state_code', 'longitude', 'latitude', 'geojson']]
total_offices_worldwide[total_offices_worldwide["country_code"]=="USA"]


Unnamed: 0,name,country_code,city,state_code,longitude,latitude,geojson
0,Wetpaint,USA,Seattle,WA,-122.333253,47.603122,"{'type': 'Point', 'coordinates': [-122.333253,..."
1,Wetpaint,USA,New York,NY,-73.996431,40.723731,"{'type': 'Point', 'coordinates': [-73.9964312,..."
2,AdventNet,USA,Pleasanton,CA,-121.904945,37.692934,"{'type': 'Point', 'coordinates': [-121.904945,..."
3,Zoho,USA,Pleasanton,CA,-121.904945,37.692934,"{'type': 'Point', 'coordinates': [-121.904945,..."
4,Digg,USA,San Francisco,CA,-122.394523,37.764726,"{'type': 'Point', 'coordinates': [-122.394523,..."
...,...,...,...,...,...,...,...
10828,Contemporary Computer Services,USA,Bohemia,NY,-73.088140,40.775055,"{'type': 'Point', 'coordinates': [-73.08814, 4..."
10829,QSGI,USA,Palm Beach,FL,-80.041395,26.705331,"{'type': 'Point', 'coordinates': [-80.041395, ..."
10831,AfterLogic,USA,Livingston,NJ,-74.323554,40.793024,"{'type': 'Point', 'coordinates': [-74.3235539,..."
10832,EnteGreat Solutions,USA,Birmingham,AL,-86.816068,33.518885,"{'type': 'Point', 'coordinates': [-86.816068, ..."


In [178]:
total_offices_CA = total_offices_worldwide[total_offices_worldwide['state_code'] == 'CA'].reset_index(drop=True)
total_offices_CA

Unnamed: 0,name,country_code,city,state_code,longitude,latitude,geojson
0,AdventNet,USA,Pleasanton,CA,-121.904945,37.692934,"{'type': 'Point', 'coordinates': [-121.904945,..."
1,Zoho,USA,Pleasanton,CA,-121.904945,37.692934,"{'type': 'Point', 'coordinates': [-121.904945,..."
2,Digg,USA,San Francisco,CA,-122.394523,37.764726,"{'type': 'Point', 'coordinates': [-122.394523,..."
3,Facebook,USA,Menlo Park,CA,-122.151801,37.416050,"{'type': 'Point', 'coordinates': [-122.151801,..."
4,Postini,USA,San Carlos,CA,-122.247573,37.506885,"{'type': 'Point', 'coordinates': [-122.247573,..."
...,...,...,...,...,...,...,...
2968,Purfresh,USA,Menlo Park,CA,-122.151599,37.477008,"{'type': 'Point', 'coordinates': [-122.151599,..."
2969,MyChances,USA,Menlo Park,CA,-122.180281,37.448491,"{'type': 'Point', 'coordinates': [-122.1802812..."
2970,InSound Medical,USA,Newark,CA,-121.998513,37.512038,"{'type': 'Point', 'coordinates': [-121.998513,..."
2971,Intergy,USA,Dublin,CA,-121.936698,37.701940,"{'type': 'Point', 'coordinates': [-121.9366984..."


In [None]:
#df=df[df["country_code"]=="USA"]
#df=df[df['state_code'] == 'CA'].reset_index(drop=True)

In [108]:
load_dotenv()
token_fsq = os.getenv("token_foursquare")

In [109]:
query = 'club'

In [110]:
url = 'https://api.foursquare.com/v3/places/search?query=%22club&ll=41.39798122862634%2C2.1909169650468487'

headers = {"accept": "application/json", "Authorization": token_fsq, "limit":"10"}

response = requests.get(url, headers=headers).json()

response

{'results': [{'fsq_id': '4adcda4ff964a520f74021e3',
   'categories': [{'id': 10032,
     'name': 'Night Club',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/nightlife/nightclub_',
      'suffix': '.png'}}],
   'chains': [],
   'distance': 3375,
   'geocodes': {'main': {'latitude': 41.384641, 'longitude': 2.154478},
    'roof': {'latitude': 41.384641, 'longitude': 2.154478}},
   'link': '/v3/places/4adcda4ff964a520f74021e3',
   'location': {'address': 'Aragó, 141',
    'admin_region': 'Cataluña',
    'country': 'ES',
    'cross_street': '',
    'formatted_address': 'Aragó, 141, 08015 Barcelona Catalonia',
    'locality': 'Barcelona',
    'neighborhood': ["La Nova Esquerra de l'Eixample"],
    'postcode': '08015',
    'region': 'Catalonia'},
   'name': 'Antilla BCN Latin Club',
   'related_places': {},
   'timezone': 'Europe/Madrid'},
  {'fsq_id': '4da11675d686b60cb1899e28',
   'categories': [{'id': 10032,
     'name': 'Night Club',
     'icon': {'prefix': 'https://ss3.

In [111]:
new_list = []
for i in response["results"]:
    
    name = i["name"]
    address =  i["location"]["formatted_address"]
    lat = i["geocodes"]["main"]["latitude"]
    lon = i["geocodes"]["main"]["longitude"]
        
    type_ = {"typepoint": 
                          {"type": "Point", 
                           "coordinates": [lat, lon]}}
    
    new_list.append({"name":name, "lat":lat, "lon":lon, "type":type_})
    
new_list[:10]

[{'name': 'Antilla BCN Latin Club',
  'lat': 41.384641,
  'lon': 2.154478,
  'type': {'typepoint': {'type': 'Point',
    'coordinates': [41.384641, 2.154478]}}},
 {'name': 'Club Catwalk',
  'lat': 41.385878,
  'lon': 2.196682,
  'type': {'typepoint': {'type': 'Point',
    'coordinates': [41.385878, 2.196682]}}},
 {'name': 'Razzmatazz',
  'lat': 41.397599,
  'lon': 2.191102,
  'type': {'typepoint': {'type': 'Point',
    'coordinates': [41.397599, 2.191102]}}},
 {'name': 'Cdlc',
  'lat': 41.384976,
  'lon': 2.1963,
  'type': {'typepoint': {'type': 'Point',
    'coordinates': [41.384976, 2.1963]}}},
 {'name': 'Opium',
  'lat': 41.385104,
  'lon': 2.196826,
  'type': {'typepoint': {'type': 'Point',
    'coordinates': [41.385104, 2.196826]}}},
 {'name': 'Input',
  'lat': 41.368636,
  'lon': 2.14877,
  'type': {'typepoint': {'type': 'Point',
    'coordinates': [41.368636, 2.14877]}}},
 {'name': 'La Terrrazza',
  'lat': 41.368845,
  'lon': 2.149262,
  'type': {'typepoint': {'type': 'Point',
 

In [112]:
df = pd.DataFrame(new_list)

In [113]:
gdf_1 = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df["lon"], df["lat"]))
gdf_1.head()

Unnamed: 0,name,lat,lon,type,geometry
0,Antilla BCN Latin Club,41.384641,2.154478,"{'typepoint': {'type': 'Point', 'coordinates':...",POINT (2.15448 41.38464)
1,Club Catwalk,41.385878,2.196682,"{'typepoint': {'type': 'Point', 'coordinates':...",POINT (2.19668 41.38588)
2,Razzmatazz,41.397599,2.191102,"{'typepoint': {'type': 'Point', 'coordinates':...",POINT (2.19110 41.39760)
3,Cdlc,41.384976,2.1963,"{'typepoint': {'type': 'Point', 'coordinates':...",POINT (2.19630 41.38498)
4,Opium,41.385104,2.196826,"{'typepoint': {'type': 'Point', 'coordinates':...",POINT (2.19683 41.38510)


In [114]:
Map(Layer(gdf_1, "color:purple", popup_hover=[popup_element("name", "Libraries in Madrid")]))

TypeError: Location should be a sized variable, for example a list or a tuple, instead got <cartoframes.viz.layer.Layer object at 0x0000026864DA5BE0> of type <class 'cartoframes.viz.layer.Layer'>.