### Overview

For this project I have chosen three cities (San Francisco, London and Paris that are known to be good places for gaming companies. The objective of the analysis is to check which of the three is better, and decide where to locate the new offices. 

In [76]:
import pandas as pd
from pymongo import MongoClient
import math
from functools import reduce
import operator

In [18]:
# For working with API requests

import json, requests

In [None]:
# For visualization in maps

import folium
from folium import Choropleth, Circle, Marker, Icon, Map
from folium.plugins import HeatMap, MarkerCluster

In [None]:
#selfmade functions 

import src.frame as fr
import src.maping as mp
import src.api as ap
import src.distance as dt

#### The project is divided in three sections: Mongodb, API and Visualization, and Final Analysis. In each section, the same processes are done with the three selected cities.

### Mongodb

In [9]:
conn = MongoClient("localhost:27017")

In [10]:
db = conn.get_database("ironhack")

In [11]:
companies = db.get_collection("companies")

In [12]:
companies.find_one().keys()

dict_keys(['_id', 'name', 'permalink', 'crunchbase_url', 'homepage_url', 'blog_url', 'blog_feed_url', 'twitter_username', 'category_code', 'number_of_employees', 'founded_year', 'founded_month', 'founded_day', 'deadpooled_year', 'tag_list', 'alias_list', 'email_address', 'phone_number', 'description', 'created_at', 'updated_at', 'overview', 'image', 'products', 'relationships', 'competitions', 'providerships', 'total_money_raised', 'funding_rounds', 'investments', 'acquisition', 'acquisitions', 'offices', 'milestones', 'video_embeds', 'screenshots', 'external_links', 'partners'])

#### San Francisco 

In [14]:
#We choose 30 companies from each city with the objective of checking if they are very close together, or more dispersed

sf = collection("San Francisco", 30)

sf_df = fr.city_df(sf, "San Francisco")

sf_df = sf_df.dropna(0, how = 'all', thresh = 4)

sf_df.head()

Unnamed: 0,name,category_code,city,latitude,longitude
0,StumbleUpon,web,San Francisco,37.775196,-122.419204
1,Wesabe,web,San Francisco,37.793148,-122.402567
2,Pownce,web,San Francisco,37.762541,-122.397224
3,Zooomr,web,San Francisco,37.775196,-122.419204
4,Pageflakes,web,San Francisco,37.758113,-122.414689


#### London

In [16]:
ln = collection("London", 30)

ln_df = fr.city_df(ln, "London")

ln_df = ln_df.dropna(0, how = 'all', thresh = 4)

ln_df.head()

Unnamed: 0,name,category_code,city,latitude,longitude
4,Lastminute,web,London,37.09024,-95.712891
5,Wonga,web,London,51.519204,-0.16261
6,ITV,web,London,37.09024,-95.712891
8,Dopplr,web,London,51.523789,-0.087432
9,Jiglu,software,London,51.54236,-0.149522


#### Paris

In [17]:
pr = collection("Paris", 30)

pr_df = fr.city_df(pr, "Paris")

pr_df = pr_df.dropna(0, how = 'all', thresh = 4)

pr_df.head()

Unnamed: 0,name,category_code,city,latitude,longitude
0,Netvibes,web,Paris,48.870806,2.34668
1,TVtrip,web,Paris,48.856667,2.350987
2,WatZatSong,web,Paris,48.866323,2.381514
3,Jooce,web,Paris,48.856667,2.350987
4,Trivop,web,Paris,48.855853,2.336575


### API and Visualization

### San Francisco

In [20]:
initial_lat = 37.762541

initial_lon = -122.397224

In [21]:
map_sf = mp.mapa(initial_lat, initial_lon, 15)

#### Starbuks

In [22]:
CLIENT_ID = 'KP3LZWOOOVPRQ3SW3RGNNW3OTBF42DQSNG2TTI3AZBVKBYN4'
CLIENT_SECRET = 'TOLDQWSBZHJAWHTT51INCWB3TQG4MTKOKQQBT1HXVVDADR35'
VERSION = '20180323'
LIMIT = 15
LL = '37.775196, -122.419204'
RAD = 100
QUERY = 'StarBucks'

In [23]:
url = f'https://api.foursquare.com/v2/venues/explore?&client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&v={VERSION}&ll={LL}&radius={RAD}&query={QUERY}&limit={LIMIT}'

In [24]:
starbucks = ap.extract(url)

In [25]:
len(starbucks)

15

In [26]:
starbucks_sf = fr.places_df(starbucks, "starbucks", 15)

#### Airports

In [27]:
CLIENT_ID = 'KP3LZWOOOVPRQ3SW3RGNNW3OTBF42DQSNG2TTI3AZBVKBYN4'
CLIENT_SECRET = 'TOLDQWSBZHJAWHTT51INCWB3TQG4MTKOKQQBT1HXVVDADR35'
VERSION = '20180323'
LIMIT = 5
LL = '37.775196, -122.419204'
RAD = 30000
CATEGORY_ID = '4bf58dd8d48988d1ed931735'

In [28]:
url = f'https://api.foursquare.com/v2/venues/explore?&client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&v={VERSION}&ll={LL}&radius={RAD}&limit={LIMIT}&categoryId={CATEGORY_ID}'

In [29]:
airports = ap.extract(url)

In [30]:
airports_sf = fr.places_df(airports, "airport", 5)

In [31]:
airports_sf

Unnamed: 0,name,latitud,longitud,category
0,San Francisco International Airport (SFO) (San...,37.616713,-122.387094,airport
1,Oakland International Airport (OAK) (Oakland I...,37.711731,-122.212495,airport
2,OAK Runway,37.71048,-122.213753,airport
3,Hayward Executive Airport (HWD),37.662295,-122.120114,airport
4,The Centurion Lounge,37.617609,-122.387915,airport


#### Nightlife

In [32]:
CLIENT_ID = 'KP3LZWOOOVPRQ3SW3RGNNW3OTBF42DQSNG2TTI3AZBVKBYN4'
CLIENT_SECRET = 'TOLDQWSBZHJAWHTT51INCWB3TQG4MTKOKQQBT1HXVVDADR35'
VERSION = '20180323'
LIMIT = 15
LL = '37.775196, -122.419204'
RAD = 30000
CATEGORY_ID = '4bf58dd8d48988d116941735'

In [33]:
url = f'https://api.foursquare.com/v2/venues/explore?&client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&v={VERSION}&ll={LL}&radius={RAD}&limit={LIMIT}&categoryId={CATEGORY_ID}'

In [34]:
clubs = ap.extract(url)

In [35]:
clubs_sf = fr.places_df(clubs, "club", 15)

In [36]:
frames = [starbucks_sf, airports_sf, clubs_sf]

san_francisco = pd.concat(frames)

In [37]:
mp.offices(sf_df, map_sf)

In [38]:
mp.full_map(san_francisco, map_sf)

### London

In [39]:
initial_lat = 51.523789
initial_lon = -0.087432

In [40]:
map_ln = mapa(initial_lat, initial_lon ,15)

#### Starbucks London

In [41]:
CLIENT_ID = 'KP3LZWOOOVPRQ3SW3RGNNW3OTBF42DQSNG2TTI3AZBVKBYN4'
CLIENT_SECRET = 'TOLDQWSBZHJAWHTT51INCWB3TQG4MTKOKQQBT1HXVVDADR35'
VERSION = '20180323'
LIMIT = 15
LL = '51.500152, -0.087432'
RAD = 200
QUERY = 'StarBucks'

In [42]:
url = f'https://api.foursquare.com/v2/venues/explore?&client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&v={VERSION}&ll={LL}&radius={RAD}&query={QUERY}&limit={LIMIT}'

In [43]:
starbucks = ap.extract(url)

In [44]:
starbucks_ln = fr.places_df(starbucks, "starbucks", 15)

In [45]:
starbucks_ln.head()

Unnamed: 0,name,latitud,longitud,category
0,Starbucks,51.504796,-0.086873,starbucks
1,Starbucks,51.500896,-0.093399,starbucks
2,Starbucks,51.504041,-0.075553,starbucks
3,Starbucks,51.505487,-0.085133,starbucks
4,Starbucks,51.508436,-0.07916,starbucks


#### Airtports London

In [46]:
CLIENT_ID = 'KP3LZWOOOVPRQ3SW3RGNNW3OTBF42DQSNG2TTI3AZBVKBYN4'
CLIENT_SECRET = 'TOLDQWSBZHJAWHTT51INCWB3TQG4MTKOKQQBT1HXVVDADR35'
VERSION = '20180323'
LIMIT = 5
LL = '51.500152, -0.126236'
RAD = 30000
CATEGORY_ID = '4bf58dd8d48988d1ed931735'

In [47]:
url = f'https://api.foursquare.com/v2/venues/explore?&client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&v={VERSION}&ll={LL}&radius={RAD}&limit={LIMIT}&categoryId={CATEGORY_ID}'

In [48]:
airports = ap.extract(url)

In [49]:
airports_ln = fr.places_df(airports, "airport", 5)

In [50]:
airports_ln

Unnamed: 0,name,latitud,longitud,category
0,London Heathrow Airport (LHR) (London Heathrow...,51.469408,-0.462348,airport
1,London City Airport (LCY) (London City Airport),51.503736,0.049638,airport
2,Biggin Hill Airport (BQH) (Biggin Hill Airport),51.331794,0.028845,airport
3,Elstree Aerodrome,51.65457,-0.323946,airport
4,Virgin Atlantic Clubhouse,51.470186,-0.46054,airport


#### Nightlife 

In [51]:
CLIENT_ID = 'KP3LZWOOOVPRQ3SW3RGNNW3OTBF42DQSNG2TTI3AZBVKBYN4'
CLIENT_SECRET = 'TOLDQWSBZHJAWHTT51INCWB3TQG4MTKOKQQBT1HXVVDADR35'
VERSION = '20180323'
LIMIT = 15
LL = '51.500152, -0.126236'
RAD = 300
CATEGORY_ID = '4bf58dd8d48988d116941735'

In [52]:
url = f'https://api.foursquare.com/v2/venues/explore?&client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&v={VERSION}&ll={LL}&radius={RAD}&limit={LIMIT}&categoryId={CATEGORY_ID}'

In [53]:
clubs = ap.extract(url)

In [54]:
clubs_ln = fr.places_df(clubs, "club", 7)

In [55]:
frames = [airports_ln, starbucks_ln, clubs_ln]

london = pd.concat(frames)

In [56]:
mp.offices(ln_df, map_ln)

In [57]:
mp.full_map(london, map_ln)

### Paris

In [58]:
initial_lat = 48.866323
initial_long = 2.381514

In [59]:
map_pr = mapa(initial_lat, initial_long, 15)

#### Starbucks

In [60]:
CLIENT_ID = 'KP3LZWOOOVPRQ3SW3RGNNW3OTBF42DQSNG2TTI3AZBVKBYN4'
CLIENT_SECRET = 'TOLDQWSBZHJAWHTT51INCWB3TQG4MTKOKQQBT1HXVVDADR35'
VERSION = '20180323'
LIMIT = 15
LL = '48.866323, 2.381514'
RAD = 200
QUERY = 'StarBucks'

In [61]:
url = f'https://api.foursquare.com/v2/venues/explore?&client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&v={VERSION}&ll={LL}&radius={RAD}&query={QUERY}&limit={LIMIT}'

In [62]:
starbucks = ap.extract(url)

In [63]:
starbucks_pr = fr.places_df(starbucks, "starbucks", 15)

#### Airports

In [64]:
CLIENT_ID = 'KP3LZWOOOVPRQ3SW3RGNNW3OTBF42DQSNG2TTI3AZBVKBYN4'
CLIENT_SECRET = 'TOLDQWSBZHJAWHTT51INCWB3TQG4MTKOKQQBT1HXVVDADR35'
VERSION = '20180323'
LIMIT = 5
LL = '48.855853, 2.336575'
RAD = 30000
CATEGORY_ID = '4bf58dd8d48988d1ed931735'

In [65]:
url = f'https://api.foursquare.com/v2/venues/explore?&client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&v={VERSION}&ll={LL}&radius={RAD}&limit={LIMIT}&categoryId={CATEGORY_ID}'

In [66]:
airports = ap.extract(url)

In [67]:
airports_pr = fr.places_df(airports, "airport", 5)

#### Nightlife


In [68]:
CLIENT_ID = 'KP3LZWOOOVPRQ3SW3RGNNW3OTBF42DQSNG2TTI3AZBVKBYN4'
CLIENT_SECRET = 'TOLDQWSBZHJAWHTT51INCWB3TQG4MTKOKQQBT1HXVVDADR35'
VERSION = '20180323'
LIMIT = 15
LL = '48.855853, 2.336575'
RAD = 30000
CATEGORY_ID = '4bf58dd8d48988d116941735'

In [69]:
url = f'https://api.foursquare.com/v2/venues/explore?&client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&v={VERSION}&ll={LL}&radius={RAD}&limit={LIMIT}&categoryId={CATEGORY_ID}'

In [70]:
clubs = ap.extract(url)

In [71]:
clubs_pr = fr.places_df(clubs, "club", 15)

In [72]:
frames = [airports_pr, starbucks_pr, clubs_pr]

paris = pd.concat(frames)

In [73]:
mp.offices(pr_df, map_pr)

In [74]:
mp.full_map(paris, map_pr)

### Final Analysis

The first conclusion we extract from the visual analysis is that London can be excluded, as the companies that belong to the categories we are interested in are way to disperse and far away from the clubs and starbucks, which means our employees will not be able to enjoy themselves, something we absolutely cannot accept. Therefore, London is out of the way, and we have to choose between Paris or San Francisco.  

#### Paris vs San Francisco

For this section, I'm going to select one company of each city and obtain the distance between the company and the closest starbucks, nightclub and airport. The companies selected are Devolia in Paris, and FeVote in San Francisco.

#### The companies have chosen based on the visualization, checking that they were close to other companies, a Starbucks and a club, and somehow centered in the map and the coordinates

In [75]:
FeVote_coord = 37.779392, -122.417928
places = {
    'starbucks': (37.782738,-122.431519),
    'airport_OAK': (37.710480,-122.213753),
    'airport_INT': (37.711731,-122.212495),
    'club': (37.779386,-122.423422),
    'company': (37.779386,-122.423422) 
}

for place, coord in places.items():
    distance = dt.haversine(FeVote_coord, coord)
    print(place, distance)

starbucks 1251.3872542402507
airport_OAK 19524.889155682464
airport_INT 19572.636338473243
club 482.9811308800945
company 482.9811308800945


In [120]:
Devolia_coord = 48.859648,2.348855
places = {
    'starbucks': (48.858902, 2.348151),
    'airport_leBourget': (48.955101,2.441819),
    'airport_ORY': (48.728313, 2.365880),
    'club': (48.857971,2.346152),
    'company': (48.856667,2.350987) 
}

for place, coord in places.items():
    distance = dt.haversine(Devolia_coord, coord)
    print(place, distance)

starbucks 97.66671371576949
airport_leBourget 12605.8626383528
airport_ORY 14661.07923641235
club 271.8762885395118
company 366.43838324786697


#### Conclusion 

The first thing we should point out is, that the results from San Francisco do not make much sense, at least not all of them. For example, the Starbucks is suppossed to be located at 1.25 km from the selected company, something that should not be possible because the limit raius was set at 150 m. The same would go for the club and the nearest company.  

Having said this, the results from Paris are very promising. The company located has a Starbucks at less the 100m, and the closest club is just 271m away. As we can see in the map, there are many companies nearby, being the closest one at 366m, and the there are two airports located at 12km and 14km from the selected spot. 

San Francisco has always been known as the place of birth of enterpreneurship and startups, but there are other locations that are starting to take their place in this field, and Paris seems like a perfect place to start our own company, with many companies nearby from which our developers can learn and exchange experiences, and plenty of clubs and starbucks for our employees to enjoy themselves after a hard day of work. And of course, we can't forget that Paris is the city of love. 