In [334]:
from dotenv import load_dotenv
import src.query as query
import src.folium as fl
import src.data as dt
from pymongo import GEOSPHERE
import pandas as pd
import os
from src.variables import Variables as Var
from pymongo import MongoClient
import src.stakeholders as sh

from datetime import datetime



In [127]:
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
load_dotenv('.env')

True

# Connection to MongoDB database and Collection creation

In [4]:
conn = MongoClient("localhost:27017")
db = conn.get_database("ironhack")

* `offices` is a collection created for coworking spaces, i.e. location candidates.

* `helsinki_places` is a collection to save all the places queried.

In [7]:
offices = db['offices']

In [21]:
helsinki_places = db['helsinki_places']

# Data collection

### Load token for foursqueare and Google APIs

In [11]:
client_id = os.getenv("FOURSQUARE_CLIENT_ID")
client_secret = os.getenv("FOURSQUARE_CLIENT_SECRET")

In [12]:
google_api = os.getenv("GOOGLE_PLACES")

### Establish candidates

These are the coordinates for our selected city: **Helsinki**

In [10]:
city_lat = Var.CITY_LAT
city_lon =  Var.CITY_LON
radius = Var.RADIUS

All coworking spaces as list of initial candidates

In [13]:
data_office = query.foursquare_query(city_lat, city_lon, 'coworking_space', radius, client_id, client_secret)



In [14]:
candidates = dt.read_foursquare_response(data_office, 'coworking_space')

Let's insert them to our brand new collection in MongoDb

In [15]:
x = offices.insert_many(candidates)

In [16]:
offices.find_one()

{'_id': ObjectId('5fafe7f65143de9ede25a204'),
 'type': 'coworking_space',
 'name': 'Tiedekulma / Think Corner',
 'location': {'coordinates': [24.948465, 60.169746], 'type': 'Point'}}

Let's create the Point for each office

In [17]:
offices.create_index([('location', GEOSPHERE)])

'location_2dsphere'

Now, let's plot the candidates to get a picture of their locations

In [18]:
map_candidates = fl.create_map(city_lat, city_lon)

In [19]:
for candidate in candidates:
    lat = candidate['location']['coordinates'][1]
    lon = candidate['location']['coordinates'][0]
    
    fl.add_marker_icon(map_candidates, lat, lon, 'red','building','white',candidate['name'])
    
    

In [20]:
map_candidates

### Venus Search

#### Start-up Tech Companies

Search via Foursquare API

In [22]:
data_tech = query.foursquare_query(city_lat, city_lon, 'tech_startup', radius, client_id, client_secret)

Get a list of companies

In [23]:
tech_companies_list = dt.read_foursquare_response(data_tech, 'tech_startup')

Insert those documents in our collection `helsinki_places`

In [24]:
x = helsinki_places.insert_many(tech_companies_list)

And create index for GeoJson

In [25]:
helsinki_places.create_index([('location', GEOSPHERE)])

'location_2dsphere'

Now, let's find matches for each candidate and add to `offices` number of matches.

In [26]:
dt.find_matches_points(offices, helsinki_places, Var.TECH_COMP, 'tech_startup')

#### Starbucks

Search via Google Places API, using 'Find Place' which returns only one match.

for all candidates, we perform a query to find nearest Starbucks and get unique matches in a list.

In [27]:
starbucks_list = dt.find_all_google_places(offices, 'Starbucks', 50, google_api)


List of places is inserted to collection

In [None]:
x = helsinki_places.insert_many(starbucks_list)
helsinki_places.create_index([('location', GEOSPHERE)])

And now perform matches based on distance defined in variables.py

In [28]:
dt.find_matches_points(offices, helsinki_places, Var.STARBUCKS_DISTANCE, 'Starbucks')

#### Preschool places

Search via Foursquare API. Similar approach as for finding tech companies.

In [29]:
preschool_data = query.foursquare_query(city_lat, city_lon, 'preschool', radius, client_id, client_secret)


In [30]:
preschools_list = dt.read_foursquare_response(preschool_data, 'preschools')
x = helsinki_places.insert_many(preschools_list)
dt.find_matches_points(offices, helsinki_places, Var.PRESCHOOL_DISTANCE, 'preschools')

#### Night-clubs

Search via Foursquare API. Similar approach as for finding tech companies.

In [31]:
disco_data = query.foursquare_query(city_lat, city_lon, 'night_club', radius, client_id, client_secret)


In [32]:
disco_list = dt.read_foursquare_response(disco_data, 'night_club')
x = helsinki_places.insert_many(disco_list)
dt.find_matches_points(offices, helsinki_places, Var.DISCO_DISTANCE, 'night_club')

#### Design Studios


Search via Google Places API. Similar approach as for finding Starbucks.

In [259]:
design_com_list = dt.find_all_google_places(offices, 'Design company', 300, google_api)

In [261]:
x = helsinki_places.insert_many(design_com_list)
helsinki_places.create_index([('location', GEOSPHERE)])

'location_2dsphere'

In [262]:
dt.find_matches_points(offices, helsinki_places, Var.STARBUCKS_DISTANCE, 'Design company')

#### Vegan Restaurants

Search via Foursquare API. Similar approach as for finding tech companies.

In [35]:
vegan_data = query.foursquare_query(city_lat, city_lon, 'vegan_rest', radius, client_id, client_secret)


In [36]:
vegan_list = dt.read_foursquare_response(vegan_data, 'vegan_rest')
x = helsinki_places.insert_many(vegan_list)
dt.find_matches_points(offices, helsinki_places, Var.VEGAN_DISTANCE, 'vegan_rest')

#### Pet Groomers

Search via Foursquare API. Similar approach as for finding tech companies.

In [37]:
pet_groomer_list = dt.find_all_google_places(offices, 'pet groomer', 50, google_api)


In [38]:
x = helsinki_places.insert_many(pet_groomer_list)
helsinki_places.create_index([('location', GEOSPHERE)])
dt.find_matches_points(offices, helsinki_places, Var.PET_GROOMER_DISTANCE, 'pet groomer')

#### Basketball stadiums

Search via Foursquare API. Similar approach as for finding tech companies.

In [42]:
basket_data = query.foursquare_query(city_lat, city_lon, 'basket_stadium', radius, client_id, client_secret)

In [43]:
basket_list = dt.read_foursquare_response(basket_data, 'basket_stadium')
x = helsinki_places.insert_many(basket_list)
dt.find_matches_points(offices, helsinki_places, Var.BASKET_STADIUM_DISTANCE, 'basket_stadium')

#### Travel to Airport

Search via Google Maps API

For each candidate, it is queried time travel from the coworking space location to Helsinki-Vantaa Airport, both by driving and by public transport.

For uniform time travels, travel route is queried for a regular weekday with arrival time at 12:00. As per API request, arrival time is needed to be input as seconds since 1st Jan 1970.

In [100]:
time = query.get_time_for_google(Var.TIME_CALC_YEAR, Var.TIME_CALC_MONTH, Var.TIME_CALC_HOUR, Var.TIME_CALC_DAY)

dt.get_travel_time(offices, Var.AIRPORT_LAT, Var.AIRPORT_LON, google_api, time)


## Summary

In [267]:
df = pd.DataFrame(offices.find({}))

In [268]:
df

Unnamed: 0,_id,type,name,location,tech_startup,Starbucks,preschools,night_club,vegan_rest,pet groomer,basket_stadium,driving,transit,Design company
0,5fafe7f65143de9ede25a204,coworking_space,Tiedekulma / Think Corner,"{'coordinates': [24.948465, 60.169746], 'type'...",26,0,1,11,2,0,6,26.3,35.0,0
1,5fafe7f65143de9ede25a205,coworking_space,Spalt PR Helsinki,"{'coordinates': [24.95028953736944, 60.1678398...",26,0,0,7,4,0,5,27.2,39.0,0
2,5fafe7f65143de9ede25a206,coworking_space,Svensk Ungdom,"{'coordinates': [24.936771228051832, 60.169065...",33,1,0,28,6,0,6,28.3,33.8,0
3,5fafe7f65143de9ede25a207,coworking_space,p2s Media,"{'coordinates': [24.93465212805861, 60.1656326...",36,0,0,13,4,1,5,28.8,42.5,1
4,5fafe7f65143de9ede25a208,coworking_space,E28 Community,"{'coordinates': [24.931665377499726, 60.165782...",33,0,1,12,1,1,5,28.1,47.4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,5fafe7f65143de9ede25a244,coworking_space,Talent Base,"{'coordinates': [24.804944, 60.189072], 'type'...",2,0,0,0,0,0,5,22.3,65.0,0
65,5fafe7f65143de9ede25a245,coworking_space,Hippostalo,"{'coordinates': [24.811269988660655, 60.224374...",1,0,0,1,0,1,4,17.7,44.1,0
66,5fafe7f65143de9ede25a246,coworking_space,Nuortenosasto Pointti,"{'coordinates': [24.80919823050499, 60.2178988...",1,0,0,1,0,0,4,20.1,42.1,0
67,5fafe7f65143de9ede25a247,coworking_space,Ahti Business Park,"{'coordinates': [24.774798229336735, 60.166870...",0,0,0,0,0,0,3,26.2,72.6,0


In [273]:
df_places = pd.DataFrame(helsinki_places.find({}))

In [274]:
df_places

Unnamed: 0,_id,type,name,location,place_id
0,5fafe8175143de9ede25a249,tech_startup,Meetin.gs,"{'coordinates': [24.932639703932836, 60.168104...",
1,5fafe8175143de9ede25a24a,tech_startup,Osuma.fi HQ,"{'coordinates': [24.936691031806237, 60.169992...",
2,5fafe8175143de9ede25a24b,tech_startup,Online Revenue,"{'coordinates': [24.9320125579834, 60.16793430...",
3,5fafe8175143de9ede25a24c,tech_startup,Helsinki Open Device Lab,"{'coordinates': [24.939072132110592, 60.167037...",
4,5fafe8175143de9ede25a24d,tech_startup,Kompozure,"{'coordinates': [24.940059185028073, 60.168532...",
...,...,...,...,...,...
221,5fb021bd5143de9ede25a37e,Design company,Alfons Helsinki Design Studio,"{'coordinates': [24.9473763, 60.1841634], 'typ...",ChIJn2ShT7YLkkYRGscRy-aCaEQ
222,5fb021bd5143de9ede25a37f,Design company,Suomi Design Oy,"{'coordinates': [24.9645638, 60.16785729999999...",ChIJV0G-l44LkkYRuS0fOrIo6mE
223,5fb021bd5143de9ede25a380,Design company,Friday Digital Design Oy,"{'coordinates': [24.9683598, 60.1888519], 'typ...",ChIJN-_Z7XsJkkYRtS1qz1upOrw
224,5fb021bd5143de9ede25a381,Design company,Studio Ville Kokkonen Oy,"{'coordinates': [24.927354, 60.19810159999999]...",ChIJkX-0NYwJkkYRSrdrZii3iPc


Backup saving

In [276]:
df.to_csv('data/backup.csv', index=False)

# Scoring and Sorting

Now, let's find the best suitable coworking space.

First, let's create our collection of stakeholders, i.e. the employees plus the office dog.

In [252]:
stakeholders = []

In [253]:
number_stakeholders ={'Ceo': 1, 'Executive': 10, 'Account': 20, 'Developer': 15, 'Engineer': 20, 'Designer': 20, 'Bluecollar': 1, 'Dog': 1}

In [254]:
for key, value in number_stakeholders.items():
    for _ in range(value):
        
        if key == 'Ceo':
            tmp = sh.Ceo()
        elif key == 'Executive':
            tmp = sh.Executive()
        elif key == 'Account':
            tmp = sh.Account()
        elif key == 'Developer':
            tmp = sh.Developer()
        elif key == 'Engineer':
            tmp = sh.Engineer()
        elif key == 'Designer':
            tmp = sh.Designer()
        elif key == 'Bluecollar':
            tmp = sh.Bluecollar()
        elif key == 'Dog':
            tmp = sh.Dog()
  
        stakeholders.append(tmp)


Now let's assign a child to 30% of employees, i.e. 26 persons approx.

In [255]:
stakeholders = sh.assign_child(stakeholders, Var.PARENTS_CHILD)

<src.stakeholders.Ceo at 0x7fb8481bcd90>

In [357]:
def get_score(stakeholders, param):
    score = 0
    for stakeholder in stakeholders:
        
        if param == 'design':
            cond = stakeholder.like_design
        elif param == 'school':
            cond = stakeholder.has_child
        elif param == 'tech':
            cond = stakeholder.like_tech
        elif param == 'starbucks':
            cond = stakeholder.like_starbucks
        elif param == 'airport':
            cond = stakeholder.need_travel
        elif param == 'club':
            cond = stakeholder.like_party
        elif param == 'vegan':
            cond = stakeholder.is_vegan
        elif param == 'basket':
            cond = stakeholder.like_basket
        elif param == 'dog':
            cond = stakeholder.need_groomer
            
        else:
            pass

        if cond:
            score += stakeholder.score
        else:
            pass
        
    return score
    
    
    
    

In [343]:
def score_travel(row, ideal_car, ideal_transport, red, stakeholders):
    
    car_time = row['driving']
    transport_time = row['transit']
    
    score_total = get_score(stakeholders, 'airport')
    
    penalty = 0
    
    if car_time > ideal_car:
        penalty += (car_time - ideal_car) * red
   
    else:
        pass
    
    if transport_time > ideal_transport:
        penalty += (transport_time - ideal_transport) * red
 
    else:
        pass

    

    score = score_total * (1- penalty)
    
    
    
    return score
    
    
                 
                 

In [408]:
df_score = df[['_id','name', 'location']].copy()

In [409]:
df_score['design_score'] = df['Design company'].apply(lambda x: get_score(stakeholders,'design') if x > 0 else 0)

In [410]:
df_score['school_score'] = df['preschools'].apply(lambda x: get_score(stakeholders,'school') if x > 0 else 0)

In [411]:
threshold = df.tech_startup.quantile(0.75)

In [412]:
df_score['tech_score'] = df['tech_startup'].apply(lambda x: get_score(stakeholders,'tech') if x >= threshold else 0)

In [413]:
df_score['starbucks_score'] = df['Starbucks'].apply(lambda x: get_score(stakeholders,'starbucks') if x > 0 else 0)

In [414]:
df_score['airport_score'] = df.apply(lambda x: score_travel(x,Var.IDEAL_TIME_CAR, Var.IDEAL_TIME_TRANSPORT,Var.PRC_PEN_MIN,stakeholders), axis = 1)

In [415]:
df_score['party_score'] = df['night_club'].apply(lambda x: get_score(stakeholders,'club') if x > 0 else 0)

In [416]:
df_score['vegan_score'] = df['vegan_rest'].apply(lambda x: get_score(stakeholders,'vegan') if x > 0 else 0)

In [417]:
df_score['basket_score'] = df['basket_stadium'].apply(lambda x: get_score(stakeholders,'basket') if x > 0 else 0)

In [418]:
df_score['dog_score'] = df['pet groomer'].apply(lambda x: get_score(stakeholders,'dog') if x > 0 else 0)

In [421]:
df_score['total'] = df_score['design_score']+df_score['school_score']+df_score['tech_score']+df_score['starbucks_score']+df_score['airport_score']+df_score['party_score']+df_score['vegan_score']+df_score['basket_score']+df_score['dog_score']

In [422]:
df_score = df_score.sort_values('total', ascending = False)

In [423]:
df_score

Unnamed: 0,_id,name,location,design_score,school_score,tech_score,starbucks_score,airport_score,party_score,vegan_score,basket_score,dog_score,total
10,5fafe7f65143de9ede25a20e,Epicenter,"{'coordinates': [24.94577562322455, 60.1698553...",0,630,225,500,433.0,1935,100,10,0,3833.0
27,5fafe7f65143de9ede25a21f,Terveystalo Piazza,"{'coordinates': [24.930631, 60.169916], 'type'...",0,630,225,500,401.0,1935,100,10,0,3801.0
28,5fafe7f65143de9ede25a220,Four Reasons,"{'coordinates': [24.929534196853634, 60.168788...",0,630,225,500,392.0,1935,100,10,5,3797.0
4,5fafe7f65143de9ede25a208,E28 Community,"{'coordinates': [24.931665377499726, 60.165782...",300,630,225,0,345.0,1935,100,10,5,3550.0
7,5fafe7f65143de9ede25a20b,Boksi,"{'coordinates': [24.93095672646299, 60.1650659...",300,630,225,0,311.0,1935,100,10,5,3516.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,5fafe7f65143de9ede25a244,Talent Base,"{'coordinates': [24.804944, 60.189072], 'type'...",0,0,0,0,227.0,0,0,10,0,237.0
39,5fafe7f65143de9ede25a22b,Portti C / Gate C,"{'coordinates': [24.92115325198633, 60.1545962...",0,0,0,0,226.0,0,0,10,0,236.0
41,5fafe7f65143de9ede25a22d,Eckerö Line,"{'coordinates': [24.920848013578677, 60.154510...",0,0,0,0,224.0,0,0,10,0,234.0
58,5fafe7f65143de9ede25a23e,Moonsoft Oy,"{'coordinates': [24.820141111789763, 60.179604...",0,0,0,0,191.0,0,0,10,0,201.0


In [405]:
winner_id

'5fafe7f65143de9ede25a20e'

In [390]:
winner_id = str(df_score['_id'].head(1).iloc[0])

In [407]:
offices.find_one({'_id':ObjectId(winner_id)})

NameError: name 'ObjectId' is not defined

In [406]:
offices.find_one({})

{'_id': ObjectId('5fafe7f65143de9ede25a204'),
 'type': 'coworking_space',
 'name': 'Tiedekulma / Think Corner',
 'location': {'coordinates': [24.948465, 60.169746], 'type': 'Point'},
 'tech_startup': 26,
 'Starbucks': 0,
 'preschools': 1,
 'night_club': 11,
 'vegan_rest': 2,
 'pet groomer': 0,
 'basket_stadium': 6,
 'driving': 26.3,
 'transit': 35.0,
 'Design company': 0}

In [323]:
df.describe()

Unnamed: 0,tech_startup,Starbucks,preschools,night_club,vegan_rest,pet groomer,basket_stadium,driving,transit,Design company
count,69.0,69.0,69.0,69.0,69.0,69.0,69.0,69.0,69.0,69.0
mean,12.173913,0.072464,0.550725,4.217391,0.826087,0.405797,5.231884,24.315942,47.289855,0.130435
std,12.074471,0.261154,0.697395,6.803696,1.28288,0.810208,0.710114,4.336291,10.007485,0.339248
min,0.0,0.0,0.0,0.0,0.0,0.0,3.0,14.0,25.8,0.0
25%,2.0,0.0,0.0,0.0,0.0,0.0,5.0,21.2,40.8,0.0
50%,6.0,0.0,0.0,0.0,0.0,0.0,5.0,25.5,47.4,0.0
75%,24.0,0.0,1.0,7.0,1.0,1.0,6.0,27.2,54.5,0.0
max,36.0,1.0,2.0,28.0,6.0,3.0,6.0,30.8,72.6,1.0


In [403]:
df['_id']

RecursionError: maximum recursion depth exceeded while calling a Python object

In [271]:
helsinki_places.remove({'type':'design_studio'})

  helsinki_places.remove({'type':'design_studio'})


{'n': 88, 'ok': 1.0}