In [1]:
#Libraries
from collections import Counter
# from cartoframes.viz import Map, Layer, popup_element
from dotenv import load_dotenv
from folium import Choropleth, Circle, Marker, Icon, Map
from folium.plugins import HeatMap, MarkerCluster
# import geopandas as gpd
import json
import os
import pandas as pd
from pymongo import MongoClient, GEOSPHERE
import requests

In [39]:
#Functions from .py
from src.clean import get_results_from_foursquare as foursquare

In [2]:
client = MongoClient("localhost:27017")

In [3]:
db = client["Ironhack"]

In [4]:
#geting the dataset
c=db.get_collection("companies")

In [5]:
# filter for the design companies
#only 4
filter_design = {"category_code":"design"}
projection_design = {"_id":0, "name":1, "offices.city":1}
list(c.find(filter_design, projection_design))

[{'name': '99designs',
  'offices': [{'city': 'San Francisco'},
   {'city': 'Collingwood'},
   {'city': 'Berlin'}]},
 {'name': 'Graticle', 'offices': [{'city': 'Ellensburg'}]},
 {'name': 'Kickstarter', 'offices': [{'city': 'Brooklyn'}]},
 {'name': 'Moonfruit', 'offices': [{'city': 'London'}]}]

In [6]:
#checking for the industry to define tech industry
list(c.distinct("category_code"))[:5]

[None, 'advertising', 'analytics', 'automotive', 'biotech']

In [7]:
#list with tech companies
tech = ['biotech','cleantech','ecommerce','games_video','nanotech','software','web']

In [8]:
#check list of latest founded year
list(c.distinct("founded_year"))[-4:]

[2010, 2011, 2012, 2013]

In [9]:
c.find_one()["funding_rounds"][0]['raised_amount']

5250000

In [10]:
# filter tech companies for startup criteria
filter_tech = {"category_code":{"$in":tech}}
filter_year = {"founded_year":{"$gte":2000}}
filter_funding = {"funding_rounds.raised_amount":{"$gte":1000000}}
filter_employees={"number_of_employees":{"$lte":100}}

In [11]:
#list of filters with all filters
all_filters=[ filter_tech, filter_year, filter_funding, filter_employees ]

In [71]:
#list with all filters applied to 'c'
projection_3 = {"_id":0,"offices":1}
x=list(c.find({"$and": all_filters}, projection_3))
x[:10]

[{'offices': [{'description': '',
    'address1': '710 - 2nd Avenue',
    'address2': 'Suite 1100',
    'zip_code': '98104',
    'city': 'Seattle',
    'state_code': 'WA',
    'country_code': 'USA',
    'latitude': 47.603122,
    'longitude': -122.333253},
   {'description': '',
    'address1': '270 Lafayette Street',
    'address2': 'Suite 505',
    'zip_code': '10012',
    'city': 'New York',
    'state_code': 'NY',
    'country_code': 'USA',
    'latitude': 40.7237306,
    'longitude': -73.9964312}]},
 {'offices': [{'description': 'Headquarters',
    'address1': '9229 W. Sunset Blvd.',
    'address2': '',
    'zip_code': '90069',
    'city': 'West Hollywood',
    'state_code': 'CA',
    'country_code': 'USA',
    'latitude': 34.090368,
    'longitude': -118.393064}]},
 {'offices': [{'description': '',
    'address1': '100 5th Ave Fl 6',
    'address2': '',
    'zip_code': '10011-6903',
    'city': 'New York',
    'state_code': 'NY',
    'country_code': 'USA',
    'latitude': 40.7464

In [72]:
# Checking an individual element to design the for loop
x[0]['offices'][0]['city']

'Seattle'

In [73]:
# List of cities from offices from companies
cities=[j['city'] for i in x for j in i['offices'] ]
cities[:10]

['Seattle',
 'New York',
 'West Hollywood',
 'New York',
 'Sunnyvale',
 'Culver City',
 'San Francisco',
 'New York',
 'Pleasanton',
 'Luxembourg City']

In [74]:
#Top 10 count of city mentions.
counts = pd.Series(cities).value_counts()
print(counts[:10])

San Francisco    61
New York         57
London           31
Seattle          23
San Mateo        20
Palo Alto        16
Paris            15
Los Angeles      12
Cambridge        12
Sunnyvale        10
dtype: int64


In [16]:
# Best cities to work as a Designer
# https://www.designrush.com/agency/graphic-design/trends/top-countries-and-cities-for-design-jobs#:~:text=New%20York%20City%20is%20an,of%20the%20best%20design%20talent
'''
Berlin
Toronto
Montreal
    New York
Boston
    San Francisco
    London
'''

'\nBerlin\nToronto\nMontreal\n    New York\nBoston\n    San Francisco\n    London\n'

In [17]:
# Cross reference both and we got our 3 cities: San Francisco, New York & London.

In [18]:
# London Tech startup cluster
![East London]()

zsh: parse error near `()'


In [19]:
# New York Tech startup cluster
![Lower & Mid Manhattan]()

zsh: parse error near `()'


In [20]:
# New York json from https://raw.githubusercontent.com/mongodb/docs-assets/geospatial/restaurants.json
nyneigh = db.get_collection("nyneigh")

In [21]:
db.nyneigh.create_index([("location", GEOSPHERE)])

'location_2dsphere'

In [22]:
nyneigh.find_one()

{'_id': ObjectId('55cb9c666c522cafdb053a1a'),
 'geometry': {'coordinates': [[[-73.94193078816193, 40.70072523469547],
    [-73.9443878859649, 40.70042452378256],
    [-73.94424286147482, 40.69969927964773],
    [-73.94409591260093, 40.69897295461309],
    [-73.94394947271304, 40.69822127983908],
    [-73.94391750192877, 40.69805620211356],
    [-73.94380383211836, 40.697469265449826],
    [-73.94378455587042, 40.6973697290538],
    [-73.94374306706803, 40.69715549995503],
    [-73.9437245356891, 40.697059812179496],
    [-73.94368427322361, 40.696851909818065],
    [-73.9436842703752, 40.69685189440415],
    [-73.94363806934868, 40.69661331854307],
    [-73.94362121369004, 40.696526279661654],
    [-73.9435563415296, 40.69619128295102],
    [-73.94354024149403, 40.6961081421151],
    [-73.94352527471477, 40.69603085523812],
    [-73.94338802084431, 40.69528899051899],
    [-73.943242490861, 40.694557485733355],
    [-73.94312826743185, 40.693967038330925],
    [-73.94311427813774, 40.6

In [23]:
list(nyneigh.distinct('name')) [:10]

['Airport',
 'Allerton-Pelham Gardens',
 "Annadale-Huguenot-Prince's Bay-Eltingville",
 'Arden Heights',
 'Astoria',
 'Auburndale',
 'Baisley Park',
 'Bath Beach',
 'Battery Park City-Lower Manhattan',
 'Bay Ridge']

In [24]:
#San Francisco json from 
with open ("jsons-input/SanFrancisco.Neighborhoods.json") as f:
    districts_sf = json.load(f)
with open ("jsons-input/sfneighborhoods.json", "w") as file:
    json.dump(districts_sf['features'], file)

In [25]:
!pwd

/Users/nicostan/Downloads/Ironhack/labs/Project-III


In [26]:
db.sf.create_index([("location", GEOSPHERE)])

'location_2dsphere'

In [27]:
sfneigh=db.get_collection('sf')

In [28]:
sfneigh.find_one()

{'_id': ObjectId('62e4035a2529aa10e9859819'),
 'geometry': {'geometries': [{'type': 'Polygon',
    'coordinates': [[[-122.391701, 37.794113],
      [-122.39198, 37.793906],
      [-122.391614, 37.793571],
      [-122.391714, 37.793459],
      [-122.388816, 37.791005],
      [-122.388932, 37.790919],
      [-122.388616, 37.790348],
      [-122.388076, 37.790518],
      [-122.388375, 37.790334],
      [-122.388225, 37.790032],
      [-122.385852, 37.790951],
      [-122.385496, 37.790559],
      [-122.387589, 37.789838],
      [-122.387289, 37.789347],
      [-122.385303, 37.789838],
      [-122.38512, 37.789313],
      [-122.387174, 37.788807],
      [-122.387032, 37.788255],
      [-122.385261, 37.788537],
      [-122.385136, 37.788156],
      [-122.38739, 37.787736],
      [-122.387415, 37.787269],
      [-122.3845, 37.787437],
      [-122.384342, 37.785728],
      [-122.387577, 37.785485],
      [-122.38765, 37.784929],
      [-122.385546, 37.785009],
      [-122.385455, 37.784711],


In [29]:
[i['neighborhood'] for i in list(sfneigh.distinct('properties'))]

['Tenderloin',
 'SoMa',
 'Rincon Hill',
 'Nob Hill',
 'Mission',
 'Financial District',
 'Ingleside',
 'Lower Pacific Heights',
 'Marina',
 'Portola',
 'Twin Peaks',
 'Lake Merced',
 'Bayshore',
 'South Beach',
 'Chinatown',
 'Castro',
 'Taraval',
 'Panhandle',
 'Inner Richmond',
 'Outer Richmond',
 'Sunset',
 'Bayview',
 'North Beach']

In [30]:
# London json from https://skgrange.github.io/data.html
with open ("jsons-input/london_sport.json") as l:
    districts_lon = json.load(l)
with open ("jsons-input/londonneighborhoods.json", "w") as file:
    json.dump(districts_lon['features'], file)

In [31]:
db.london.create_index([("location", GEOSPHERE)])

'location_2dsphere'

In [32]:
loneigh=db.get_collection('london')

In [33]:
loneigh.find_one()

{'_id': ObjectId('62e50edb2529aa10e9859835'),
 'type': 'Feature',
 'id': 0,
 'properties': {'ons_label': '00AF',
  'name': 'Bromley',
  'partic_per': 21.7,
  'pop_2001': 295535},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[0.031639088712238, 51.4428843227612],
    [0.041526082766736, 51.4404649255641],
    [0.063332800480531, 51.4232110154574],
    [0.076945878365856, 51.4315080025602],
    [0.109226234597471, 51.4135979152486],
    [0.131190923878993, 51.4143730338874],
    [0.150436957527071, 51.4079837004008],
    [0.163993389021342, 51.3919619878559],
    [0.149140558177895, 51.3916280396824],
    [0.154842725154884, 51.377506045826],
    [0.144220785387893, 51.3502024646528],
    [0.138560438666583, 51.3436406209607],
    [0.120056777631048, 51.343613812122],
    [0.123284075525613, 51.3331015205917],
    [0.119503275315481, 51.3291280130177],
    [0.106492317442378, 51.3269074230516],
    [0.086624084129068, 51.3154869820709],
    [0.08447573259809, 51.3051185300379],
  

In [34]:
loneigh.find_one()

{'_id': ObjectId('62e50edb2529aa10e9859835'),
 'type': 'Feature',
 'id': 0,
 'properties': {'ons_label': '00AF',
  'name': 'Bromley',
  'partic_per': 21.7,
  'pop_2001': 295535},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[0.031639088712238, 51.4428843227612],
    [0.041526082766736, 51.4404649255641],
    [0.063332800480531, 51.4232110154574],
    [0.076945878365856, 51.4315080025602],
    [0.109226234597471, 51.4135979152486],
    [0.131190923878993, 51.4143730338874],
    [0.150436957527071, 51.4079837004008],
    [0.163993389021342, 51.3919619878559],
    [0.149140558177895, 51.3916280396824],
    [0.154842725154884, 51.377506045826],
    [0.144220785387893, 51.3502024646528],
    [0.138560438666583, 51.3436406209607],
    [0.120056777631048, 51.343613812122],
    [0.123284075525613, 51.3331015205917],
    [0.119503275315481, 51.3291280130177],
    [0.106492317442378, 51.3269074230516],
    [0.086624084129068, 51.3154869820709],
    [0.08447573259809, 51.3051185300379],
  

In [35]:
[i['name'] for i in list(loneigh.distinct('properties'))][:10]

['City of London',
 'Barking and Dagenham',
 'Barnet',
 'Bexley',
 'Brent',
 'Bromley',
 'Camden',
 'Croydon',
 'Ealing',
 'Enfield']

In [37]:
# Our online research shows the best quality of life is in San Francisco, therefore we will pick a centered location

In [95]:
new_list = [{"name" : 'Startup',
             "lat"  : j["latitude"],
             "long" : j["longitude"], 
             "type" : {"typepoint": {"type": "Point"}}} for i in x for j in i['offices'] if j['city']=='San Francisco']
df = pd.DataFrame(new_list).dropna()
df.sample()

Unnamed: 0,name,lat,long,type
18,Startup,37.786906,-122.397672,{'typepoint': {'type': 'Point'}}


In [97]:
#Credentials
load_dotenv()
key = os.getenv("key")

In [40]:
location = [37.7615584,-122.4155738]
radius=3000
limit=10

In [98]:
#creating the DFs
df1=foursquare (key,"Starbucks", location, radius, limit)
df1.sample()

Unnamed: 0,name,lat,long,type
9,Starbucks,37.751353,-122.431652,{'typepoint': {'type': 'Point'}}


In [99]:
df2=foursquare (key,"Children School", location, radius, limit)
df2.sample()

Unnamed: 0,name,lat,long,type
0,Children School,37.764164,-122.418794,{'typepoint': {'type': 'Point'}}
1,Children School,37.761834,-122.422919,{'typepoint': {'type': 'Point'}}
2,Children School,37.767468,-122.411033,{'typepoint': {'type': 'Point'}}
3,Children School,37.752043,-122.414925,{'typepoint': {'type': 'Point'}}
4,Children School,37.771227,-122.419139,{'typepoint': {'type': 'Point'}}


In [100]:
sf_map = Map(location, zoom_start = 14)

In [101]:
company = Marker(location, tooltip="Company", icon=Icon(color="black",
           opacity=0.1,
           prefix="fa", #It comes from Font Awesome
           icon="database",
           icon_color="green"))

In [102]:
company.add_to(sf_map)

<folium.map.Marker at 0x7f8c2c382b80>

In [103]:
df= pd.concat([df, df1, df2], ignore_index=True) #,df3, df4, df5, df6 ...

In [104]:
for index, row in df.iterrows():
    
     # 1. Location (and some other things like the tooltip)
    place = {"location": [row["lat"], row["long"]], "tooltip": row["name"]}
    
    # 2. Icon based on the value
    if row["name"] == "Startup":
        icon = Icon (
            color="red",
            prefix="fa",
            icon="lightbulb-o",
            icon_color = "yellow") 
    
    
    elif row["name"] == "Starbucks":
        icon = Icon (
            color="green",
            prefix="fa",
            icon="coffee",
            icon_color = "white")
    
    elif row["name"] == "Children School":
        icon = Icon (
            color="lightblue",
            prefix="fa",
            icon="book",
            icon_color = "brown")
    
    new_marker = Marker(**place, icon=icon)
    new_marker.add_to(sf_map)

In [105]:
sf_map

In [None]:
type_point(location)
query = {"location": {"$near": { "$geometry": type_point_ul, "$maxDistance": 6500}}}