In [1]:
import pandas as pd
from pymongo import MongoClient
import pprint
import re
import numpy as np
import folium
import os
import requests
from dotenv import load_dotenv
load_dotenv()

if not "KEY" in os.environ:
    raise ValueError("You should pass a KEY")
KEY = os.environ["KEY"]

In [2]:
client = MongoClient('mongodb://localhost:27017/')
db = client.companies

In [3]:
companies = db.data_companies_clean.find({ 
    "total_money_raised":{
        "$gte": 1000000
    }
}
)  

#Primero filtro en las ganancias. Queremos empresas con al menos 1000000 de ingresos

In [4]:
data_companies = pd.DataFrame(companies)

In [5]:
data_companies[data_companies['total_money_raised']==data_companies['total_money_raised'].max()]

#Para hacer una primera aproximación veo qué empresa tiene más dinero.

Unnamed: 0,_id,category_code,city,country,founded_year,geoDescription,latitude,longitude,name,number_of_employees,total_money_raised
68,5d2737547bf280155f216109,social,Vancouver,CAN,2008,"{'type': 'Point', 'coordinates': [-123.0952381...",49.285173,-123.095238,HootSuite,370.0,190000000


In [6]:
list_geo = []
for e in range(len(data_companies)):
    list_geo.append(data_companies.geoDescription[e]) #Lista con todas las geoDescription

In [7]:
#Función para saber el número de empresas que hay alrededor de cada una de ellas.

def findNear(list_geo, radio_meters):
        geopoint = list_geo
        return list(db.data_companies_clean.find({
        "geoDescription": {
         "$near": {
             "$geometry": geopoint,
             "$maxDistance": radio_meters
         }
       }
    }
    )
    ) 

radio_meters = 5000
list_number_offices = []
list_offices=[]
for i in range(len(data_companies)):
    num_offices =  findNear(list_geo[i], radio_meters)
    list_offices.append(num_offices)
    list_number_offices.append(len(num_offices))
data_companies['number of offices near'] = list_number_offices

In [8]:
data_companies.head()

Unnamed: 0,_id,category_code,city,country,founded_year,geoDescription,latitude,longitude,name,number_of_employees,total_money_raised,number of offices near
0,5d2737547bf280155f216096,finance,London,GBR,2007,"{'type': 'Point', 'coordinates': [-0.1418973, ...",51.51088,-0.141897,Seedcamp,4.0,5000000,18
1,5d2737547bf280155f216097,tech,Tel-Aviv,ISR,2007,"{'type': 'Point', 'coordinates': [34.7595, 32....",32.0554,34.7595,Flixwagon,10.0,2500000,5
2,5d2737547bf280155f216098,tech,Toronto,CAN,2007,"{'type': 'Point', 'coordinates': [-79.388279, ...",43.666953,-79.388279,ModiFace,35.0,4250000,5
3,5d2737547bf280155f216099,tech,Helsinki,FIN,2007,"{'type': 'Point', 'coordinates': [24.9410012, ...",60.168149,24.941001,XIHA,8.0,1000000,2
4,5d2737547bf280155f21609a,tech,Amsterdam,NLD,2007,"{'type': 'Point', 'coordinates': [4.8948623, 5...",52.374523,4.894862,Wakoopa,8.0,1000000,6


In [9]:
data_companies['money by offices'] = data_companies['total_money_raised']/data_companies['number of offices near']

#Media de dinero por oficinas y creo nueva columna

In [10]:
def sumEmployees(list_offices):
    list_employee = []
    res=0
    for company in list_offices:
        lst=[]
        for i in company:
            lst.append(i['number_of_employees'])
        res+=i['number_of_employees']
        list_employee.append(lst)
    result = [sum(b) for b in list_employee]
    return result

#función para calcular la suma de empleados por oficina

In [11]:
data_companies['employee by offices'] = sumEmployees(list_offices)/data_companies['number of offices near']

In [12]:
data_companies = data_companies[data_companies['number of offices near'] > 1]

#Quiero que al menos haya una oficina cerca

In [13]:
#Unifico los valores para poder hacer el ranking 

data_companies['Rank_employee'] = data_companies['employee by offices'].rank()
data_companies['Rank_money'] = data_companies['money by offices'].rank()
data_companies['Rank_offices'] = data_companies['number of offices near'].rank()

data_companies.head()

Unnamed: 0,_id,category_code,city,country,founded_year,geoDescription,latitude,longitude,name,number_of_employees,total_money_raised,number of offices near,money by offices,employee by offices,Rank_employee,Rank_money,Rank_offices
0,5d2737547bf280155f216096,finance,London,GBR,2007,"{'type': 'Point', 'coordinates': [-0.1418973, ...",51.51088,-0.141897,Seedcamp,4.0,5000000,18,277777.777778,24.888889,35.0,14.0,59.5
1,5d2737547bf280155f216097,tech,Tel-Aviv,ISR,2007,"{'type': 'Point', 'coordinates': [34.7595, 32....",32.0554,34.7595,Flixwagon,10.0,2500000,5,500000.0,23.8,20.5,25.5,23.5
2,5d2737547bf280155f216098,tech,Toronto,CAN,2007,"{'type': 'Point', 'coordinates': [-79.388279, ...",43.666953,-79.388279,ModiFace,35.0,4250000,5,850000.0,28.0,45.5,40.0,23.5
3,5d2737547bf280155f216099,tech,Helsinki,FIN,2007,"{'type': 'Point', 'coordinates': [24.9410012, ...",60.168149,24.941001,XIHA,8.0,1000000,2,500000.0,7.0,3.0,25.5,5.5
4,5d2737547bf280155f21609a,tech,Amsterdam,NLD,2007,"{'type': 'Point', 'coordinates': [4.8948623, 5...",52.374523,4.894862,Wakoopa,8.0,1000000,6,166666.666667,12.0,7.0,6.0,32.5


In [48]:
data_companies['ranking_total'] =  data_companies['Rank_money']*0.8 + data_companies['Rank_employee']*0.6 + data_companies['Rank_offices']*0.4

#Hago un ranking total

In [49]:
data_companies = data_companies.sort_values(['ranking_total'], ascending=False)

#Ordeno por ranking total

In [50]:
BASE_URL = "https://maps.googleapis.com/maps/api/place/nearbysearch"

In [51]:
res = requests.get("{}/json?location=49.285173,-123.095238&radius=1500&type=school&key={}".format(BASE_URL, KEY)).json()

In [52]:
#Con la Api de google busco las escuelas cerca sobre la empresa localizada en Vancouver que es la primera según ranking

coordinates_schools_latitude = []
coordinates_schools_longitude = []
name_schools = []
for i in range(len(res['results'])):
    coordinates_schools_latitude.append(res['results'][i]['geometry']['location']["lat"])
    coordinates_schools_longitude.append(res['results'][i]['geometry']['location']["lng"])
    name_schools.append(res['results'][i]['name'])
print(coordinates_schools_latitude, coordinates_schools_longitude, name_schools)

[49.28071019999999, 49.2724496, 49.2847903, 49.2803269, 49.2819276, 49.2811966, 49.283943, 49.2842096, 49.2786576, 49.2777529, 49.285232, 49.28302299999999, 49.2787275, 49.28409800000001, 49.2813601, 49.27962349999999, 49.28395920000001, 49.28030099999999, 49.2843832, 49.28005579999999] [-123.1115084, -123.0957245, -123.1136753, -123.1060349, -123.1077491, -123.1083781, -123.1114593, -123.1094381, -123.0802777, -123.0983909, -123.1153523, -123.1127501, -123.093274, -123.0939572, -123.0924816, -123.1006497, -123.1054599, -123.1025561, -123.1070491, -123.0867231] ['Vancouver Community College', 'Eton College Canada', 'ILAC - International Language Academy of Canada', 'International House Vancouver - Modern Languages', 'London School', 'Kalev Fitness Solution', 'Cambridge Western Academy', 'GEOS Language Plus', 'Admiral Seymour Elementary School', 'Brandywine Bartending School', 'Sprott Shaw College Downtown Vancouver - Pender', 'Fine Art Bartending School Vancouver', 'Benedict Marsh: Com

In [53]:
dicc = {'latitude_school':coordinates_schools_latitude,
       'longitude_school': coordinates_schools_longitude,
       'name_school': name_schools}

In [54]:
dataframe_schools = pd.DataFrame(dicc)

In [55]:
display(dataframe_schools)

Unnamed: 0,latitude_school,longitude_school,name_school
0,49.28071,-123.111508,Vancouver Community College
1,49.27245,-123.095725,Eton College Canada
2,49.28479,-123.113675,ILAC - International Language Academy of Canada
3,49.280327,-123.106035,International House Vancouver - Modern Languages
4,49.281928,-123.107749,London School
5,49.281197,-123.108378,Kalev Fitness Solution
6,49.283943,-123.111459,Cambridge Western Academy
7,49.28421,-123.109438,GEOS Language Plus
8,49.278658,-123.080278,Admiral Seymour Elementary School
9,49.277753,-123.098391,Brandywine Bartending School


In [70]:
data_companies.head()

Unnamed: 0,_id,category_code,city,country,founded_year,geoDescription,latitude,longitude,name,number_of_employees,total_money_raised,number of offices near,money by offices,employee by offices,Rank_employee,Rank_money,Rank_offices,ranking_total
0,5d2737547bf280155f216109,social,Vancouver,CAN,2008,"{'type': 'Point', 'coordinates': [-123.0952381...",49.285173,-123.095238,HootSuite,370.0,190000000,6,31666670.0,78.5,60.5,63.0,32.5,99.7
1,5d2737547bf280155f2160d3,social,Berlin,DEU,2008,"{'type': 'Point', 'coordinates': [13.3842605, ...",52.530852,13.38426,ResearchGate,74.0,35000000,7,5000000.0,62.857143,55.5,59.0,40.0,96.5
2,5d2737547bf280155f216128,tech,Vancouver,CAN,2009,"{'type': 'Point', 'coordinates': [-123.109217,...",49.282455,-123.109217,Tiny Speck,45.0,17200000,6,2866667.0,78.5,60.5,56.0,32.5,94.1
3,5d2737547bf280155f21609c,education,Berlin,DEU,2007,"{'type': 'Point', 'coordinates': [13.38861, 52...",52.4897,13.38861,babbel,205.0,10000000,6,1666667.0,72.5,57.5,52.0,32.5,89.1
4,5d2737547bf280155f21611d,tech,Sao Paulo,BRA,2009,"{'type': 'Point', 'coordinates': [-46.6388182,...",-23.548943,-46.638818,Brandsclub,140.0,17000000,2,8500000.0,95.0,63.0,61.0,5.5,88.8


In [57]:
data_companies = data_companies.reset_index(drop=True)

In [58]:
data_companies.to_csv('./data_companies_select.csv')

In [59]:
res = requests.get("{}/json?location=49.285173,-123.095238&radius=1500&type=cafe&key={}&name=Starbucks".format(BASE_URL, KEY))

In [60]:
res = res.json()

In [61]:
coordinates_starbucks_latitude = []
coordinates_starbucks_longitude = []
for i in range(len(res['results'])):
    coordinates_starbucks_latitude.append(res['results'][i]['geometry']['location']["lat"])
    coordinates_starbucks_longitude.append(res['results'][i]['geometry']['location']["lng"])
print(coordinates_starbucks_latitude, coordinates_starbucks_longitude)

[49.2856442, 49.2859207, 49.2844427, 49.2767968, 49.2827536, 49.2730065, 49.27997, 49.2877342, 49.2800011, 49.2810692, 49.2844893, 49.28460980000001, 49.282976, 49.2876188, 49.2792904, 49.271067, 49.2885143, 49.2798197] [-123.111855, -123.1151827, -123.108513, -123.1148211, -123.0856851, -123.0996974, -123.1072445, -123.1132988, -123.1177892, -123.0739561, -123.1134266, -123.1119558, -123.11564, -123.1155677, -123.1169454, -123.0877751, -123.1176877, -123.1179367]


In [62]:
dicc_starbucks = {'latitude_starbucks':coordinates_starbucks_latitude,
       'longitude_starbucks': coordinates_starbucks_longitude}
dataframe_starbucks = pd.DataFrame(dicc_starbucks)
display(dataframe_starbucks)

Unnamed: 0,latitude_starbucks,longitude_starbucks
0,49.285644,-123.111855
1,49.285921,-123.115183
2,49.284443,-123.108513
3,49.276797,-123.114821
4,49.282754,-123.085685
5,49.273007,-123.099697
6,49.27997,-123.107244
7,49.287734,-123.113299
8,49.280001,-123.117789
9,49.281069,-123.073956


In [29]:
#Mapa con toda las compañías

In [63]:
map_companies = folium.Map(location=[51.510880, -0.141897], width=750, height=500, zoom_start=2)
for index, row in data_companies.iterrows():
    folium.CircleMarker([row['latitude'], row['longitude']],
                        radius=3,
                        popup="Country: {}. Money: {}. Name:{}".format(row['country'], row['total_money_raised'], row['name']),
                        fill_color="#F35C50", # divvy color
                       ).add_to(map_companies)

map_companies.save('map_companies.html')
map_companies

In [31]:
#Mapa con las empresas del radio en la zona, escuelas cercanas (en rojo), Starbucks (verde) y la calle donde 
#sería interesante localizar la empresa

In [69]:
map_vancouver = folium.Map(location=[49.285173, -123.095238], width=750, height=500, zoom_start=15)
for index, row in data_companies.iterrows():
    folium.Marker([row['latitude'], row['longitude']],
                        radius=7,
                        popup="Name company: {}. Money: {}. Employees{}".format(row['name'], row['total_money_raised'], row['number_of_employees']),
                        icon=folium.Icon(color='red', icon='info-sign'),
                        fill_color="#F45649", 
                       ).add_to(map_vancouver)
for index, row in dataframe_schools.iterrows():
    folium.Marker([row['latitude_school'], row['longitude_school']],
                        radius=4,
                        popup="School: {}".format(row['name_school']),
                        icon=folium.Icon(icon='cloud'),
                        fill_color="#F35C50", # divvy color
                       ).add_to(map_vancouver)
for index, row in dataframe_starbucks.iterrows():
    folium.Marker([row['latitude_starbucks'], row['longitude_starbucks']],
                        radius=4,
                        icon=folium.Icon(color='darkgreen',icon='info-sign'),
                        fill_color="#F35C50", # divvy color
                       ).add_to(map_vancouver)
folium.Marker([49.282223,-123.109113],
                        radius=4,
                        icon=folium.Icon(color='pink',icon='info-sign'),
                        popup="Our company",
                        fill_color="#F35C50", # divvy color
                       ).add_to(map_vancouver)
folium.Circle([49.282223, -123.109113],
                    radius=300
                   ).add_to(map_vancouver)

map_vancouver.save('map_vancouver.html')
map_vancouver