In [21]:
from pymongo import MongoClient
import pandas as pd
import numpy as np
import re
import requests as req


client = MongoClient('mongodb://localhost:27017/')
db = client.companies


#### First of all, let's query the "young" (valid) companies from our new database:

In [23]:
young=db.offices_ok.find({"is_young":1},{"_id":0,"index":0,'is_young':0,'is_design':0,'is_similar':0,'is_succesful':0})
mydf=pd.DataFrame(young)

In [27]:
mydf=mydf[['name','category_code','description','tag_list','founded_year','number_of_employees','raised_money_dollars','lat','lng','geo_point']]
mydf.shape

(3504, 10)

#### Now we want to make a geo query for each of our desired companies. We would like to know for each company:
#### 1. How many nearby design, similar and/or succesufl companies there are.
#### 2. How many nearby "old" companies there are.

In [30]:
def find_old(df1, radio_max_meters=2000):
    lista=[]
    for i in range(len(df1)): 
        old=db.offices_ok.find({'$and':[{
                        "geo_point": {
                            "$near": {
                                "$geometry": df1[i],
                                "$maxDistance": radio_max_meters,
                            }
                        }
                    },{
                        'is_young':0
                    }]})
        tempdf=pd.DataFrame(old)
        lista.append(tempdf.shape[0])
    return lista
       


In [31]:
def find_young(df1, radio_max_meters=2000):
    lista=[]
    for i in range(len(df1)): 
        old=db.offices_ok.find({'$and':[{
                        "geo_point": {
                            "$near": {
                                "$geometry": df1[i],
                                "$maxDistance": radio_max_meters,
                            }
                        }
                    },{
                        'is_young':1
                    }]})
        tempdf=pd.DataFrame(old)
        lista.append(tempdf.shape[0])
    return lista

In [32]:
def find_design(df1, radio_max_meters=2000):
    lista=[]
    for i in range(len(df1)): 
        old=db.offices_ok.find({'$and':[{
                        "geo_point": {
                            "$near": {
                                "$geometry": df1[i],
                                "$maxDistance": radio_max_meters,
                            }
                        }
                    },{
                        'is_design':1
                    }]})
        tempdf=pd.DataFrame(old)
        lista.append(tempdf.shape[0])
    return lista

In [33]:
def find_similar(df1, radio_max_meters=2000):
    lista=[]
    for i in range(len(df1)): 
        old=db.offices_ok.find({'$and':[{
                        "geo_point": {
                            "$near": {
                                "$geometry": df1[i],
                                "$maxDistance": radio_max_meters,
                            }
                        }
                    },{
                        'is_similar':1
                    }]})
        tempdf=pd.DataFrame(old)
        lista.append(tempdf.shape[0])
    return lista

In [34]:
def find_succesful(df1, radio_max_meters=2000):
    lista=[]
    for i in range(len(df1)): 
        old=db.offices_ok.find({'$and':[{
                        "geo_point": {
                            "$near": {
                                "$geometry": df1[i],
                                "$maxDistance": radio_max_meters,
                            }
                        }
                    },{
                        'is_succesful':1
                    }]})
        tempdf=pd.DataFrame(old)
        lista.append(tempdf.shape[0])
    return lista

In [36]:
mydf["old_near"]=find_old(mydf["geo_point"])
mydf["young_near"]=find_young(mydf["geo_point"])
mydf["design_near"]=find_design(mydf["geo_point"])
mydf["similar_near"]=find_similar(mydf["geo_point"])
mydf["succesful_near"]=find_succesful(mydf["geo_point"])

In [38]:
mydf.head(3)

Unnamed: 0,name,category_code,description,tag_list,founded_year,number_of_employees,raised_money_dollars,lat,lng,geo_point,old_near,young_near,design_near,similar_near,succesful_near
0,Geni,web,Geneology social network site,"geni, geneology, social, family, genealogy",2006,18,16500000,34.090368,-118.393064,"{'type': 'Point', 'coordinates': [-118.393064,...",0,8,1,8,2
1,MeetMoi,social,Mobile Dating,"mobile, dating, location, realtime, phone",2007,15,5580000,40.757929,-73.985506,"{'type': 'Point', 'coordinates': [-73.985506, ...",33,104,3,81,37
2,Twitter,social,Real time communication platform,"text, messaging, social, community, twitter, t...",2006,1300,1160000000,37.776805,-122.416924,"{'type': 'Point', 'coordinates': [-122.4169244...",19,109,3,83,53


#### Now let's keep only the companies that have no "old" companies around them and create a "score" field for them. The score will basically be the sum of "interesting" companies nearby (design, similiar and/or succesful):

In [72]:
young=mydf[mydf["old_near"]==0]

In [73]:
young["Score"]=young["design_near"]+young["similar_near"]+young["succesful_near"]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


#### Now let's rank the companies by their score and keep the top 100:

In [74]:
young=young.sort_values(by='Score', ascending=False)
young=young.reset_index()


In [156]:
top100=young.copy()[:100]
top100.head(3)

Unnamed: 0,index,name,category_code,description,tag_list,founded_year,number_of_employees,raised_money_dollars,lat,lng,geo_point,old_near,young_near,design_near,similar_near,succesful_near,Score
0,129,SayNow,mobile,Reinventing the phone call,,2005,20,7500000,37.42746,-122.143915,"{'type': 'Point', 'coordinates': [-122.143915,...",0,16,0,15,8,23
1,107,Doostang,web,Exclusive Career Community,"job-search, career, jobs, social-networking, e...",2005,10,5750000,37.427235,-122.145783,"{'type': 'Point', 'coordinates': [-122.145783,...",0,16,0,15,8,23
2,2192,Silver Tail Systems,analytics,Business Logic Abuse Fraud Protection,"business-logic-abuse, security, fraud, online-...",2008,90,22100000,37.428088,-122.143368,"{'type': 'Point', 'coordinates': [-122.143368,...",0,16,0,15,8,23


#### Now we want to know how these locations match the preferences of our employees. We will use the google maps API to figure out:
#### 1. The number of schools in a radius of 1000m.
#### 2. Number of starbucks in a radius of 200 m.
#### 3. Number of airports in a radius of 10000 m.
#### 4. Number of night-clubs in a radius of 500m.
#### 5. Number of vegan restaurant in a radius of 200m.


In [151]:
import os
from dotenv import load_dotenv
load_dotenv()

if not "PLACES_KEY" in os.environ:
    raise ValueError("You should pass a PLACES_KEY, see: https://developers.google.com/places/web-service/get-api-key")

PLACES_KEY = os.environ["PLACES_KEY"]

def get_near(df,tipo,keyword,radius):
    lst=[]
    for i in range(len(df)):
        lat=df["lat"][i]
        lng=df["lng"][i]
        res = req.get("https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={},{}&radius={}&type={}&keyword={}&key={}".format(lat,lng,radius,tipo,keyword,PLACES_KEY))
        content=res.json()
        if content['status']=='OK': lst.append(len(content["results"]))
        else: lst.append(0)
    return lst


In [161]:
top100["schools"]=get_near(top100,'school','school',1000)

In [163]:
top100["starbucks"]=get_near(top100,'cafe','starbucks',200)

In [166]:
top100["night_clubs"]=get_near(top100,'night_club','night_club',500)

In [168]:
top100["vegan_rests"]=get_near(top100,'restaurant','vegan',200)

In [172]:
top100["airports"]=get_near(top100,'airport','international%20airport',10000)

In [173]:
top100

Unnamed: 0,index,name,category_code,description,tag_list,founded_year,number_of_employees,raised_money_dollars,lat,lng,...,young_near,design_near,similar_near,succesful_near,Score,schools,starbucks,night_clubs,vegan_rests,airports
0,129,SayNow,mobile,Reinventing the phone call,,2005,20,7500000,37.427460,-122.143915,...,16,0,15,8,23,17,1,1,8,2
1,107,Doostang,web,Exclusive Career Community,"job-search, career, jobs, social-networking, e...",2005,10,5750000,37.427235,-122.145783,...,16,0,15,8,23,13,2,1,8,2
2,2192,Silver Tail Systems,analytics,Business Logic Abuse Fraud Protection,"business-logic-abuse, security, fraud, online-...",2008,90,22100000,37.428088,-122.143368,...,16,0,15,8,23,15,1,1,7,2
3,844,SocialVibe,advertising,Engagement Advertising,"social-media, social-network, media-platform, ...",2007,80,43900000,34.081524,-118.382674,...,16,2,14,2,18,10,1,14,2,3
4,47,Box,network_hosting,Cloud Content Management,"box-net, file-sharing, backup, storage, share,...",2005,950,409000000,37.425801,-122.143701,...,12,0,11,6,17,12,1,1,7,2
5,1446,vufind,mobile,Interest Graph Personalization/Analytics,"interest-graph, personalized-commerce, ecommer...",2010,7,930000,37.424614,-122.145153,...,12,0,11,6,17,9,2,1,5,2
6,634,Boxbe,web,Email Plugin,"email, anti-spam, social, screening, yahoo-mai...",2005,3,1500000,37.800209,-122.442592,...,15,1,12,3,16,20,1,2,1,0
7,505,Coordinatr,software,,"coordinatr, microevent, event, sms, mobile, fr...",2007,2,0,37.804477,-122.427021,...,12,0,11,4,15,19,0,1,0,0
8,1506,plaYce,games_video,Dead Pool,"techcrunch50, tc50, gaming, virtual-world, mir...",2008,7,0,37.802040,-122.438231,...,13,1,11,3,15,20,3,1,2,0
9,784,Catzilla,advertising,,"search-engine, marketing, local, advertising, ...",2003,2,0,37.798069,-122.433050,...,13,0,11,3,14,20,1,0,4,0


#### Here's a heat map of our top 100 companies:

In [77]:
import folium
from folium import plugins 

m = folium.Map(zoom_start=5)
stationArr = top100[['lat', 'lng']][0:100].values
m.add_child(plugins.HeatMap(stationArr, radius=15))
m

#### And here we can see how well each company scored by the size of their radius:

In [132]:
m = folium.Map(zoom_start=5)

for i in range(len(young["lat"][:100])):
    tooltip = top100["name"][i]
    loc=[top100["lat"][i], top100["lng"][i]]
    score=int(top100["Score"][i])
    folium.Marker(loc, popup=tooltip, tooltip=tooltip).add_to(m)
    folium.Circle(location=loc, radius=(score)*20000, color='blue', fill=True, fill_color='blue').add_to(m)
m
