In [1]:
import pandas as pd
from pymongo import MongoClient
import pprint
import re
import numpy as np
import folium
import os
import requests
from dotenv import load_dotenv
load_dotenv()

if not "KEY" in os.environ:
    raise ValueError("You should pass a KEY")
KEY = os.environ["KEY"]

In [2]:
client = MongoClient('mongodb://localhost:27017/')
db = client.companies

In [3]:
companies = db.data_companies_clean.find({ 
    "total_money_raised":{
        "$gte": 1000000
    }
}
)  

#Primero filtro en las ganancias. Queremos empresas con al menos 1000000 de ingresos

In [4]:
data_companies = pd.DataFrame(companies)

In [5]:
data_companies[data_companies['total_money_raised']==data_companies['total_money_raised'].max()]

#Para hacer una primera aproximación veo qué empresa tiene más dinero.

Unnamed: 0,_id,category_code,city,country,founded_year,geoDescription,latitude,longitude,name,number_of_employees,total_money_raised
68,5d2737547bf280155f216109,social,Vancouver,CAN,2008,"{'type': 'Point', 'coordinates': [-123.0952381...",49.285173,-123.095238,HootSuite,370.0,190000000


In [6]:
list_geo = []
for e in range(len(data_companies)):
    list_geo.append(data_companies.geoDescription[e]) #Lista con todas las geoDescription

In [7]:
#Función para saber el número de empresas que hay alrededor de cada una de ellas.

def findNear(list_geo, radio_meters):
        geopoint = list_geo
        return list(db.data_companies_clean.find({
        "geoDescription": {
         "$near": {
             "$geometry": geopoint,
             "$maxDistance": radio_meters
         }
       }
    }
    )
    ) 

radio_meters = 5000
list_number_offices = []
list_offices=[]
for i in range(len(data_companies)):
    num_offices =  findNear(list_geo[i], radio_meters)
    list_offices.append(num_offices)
    list_number_offices.append(len(num_offices))
data_companies['number of offices near'] = list_number_offices

In [8]:
data_companies.head()

Unnamed: 0,_id,category_code,city,country,founded_year,geoDescription,latitude,longitude,name,number_of_employees,total_money_raised,number of offices near
0,5d2737547bf280155f216096,finance,London,GBR,2007,"{'type': 'Point', 'coordinates': [-0.1418973, ...",51.51088,-0.141897,Seedcamp,4.0,5000000,18
1,5d2737547bf280155f216097,tech,Tel-Aviv,ISR,2007,"{'type': 'Point', 'coordinates': [34.7595, 32....",32.0554,34.7595,Flixwagon,10.0,2500000,5
2,5d2737547bf280155f216098,tech,Toronto,CAN,2007,"{'type': 'Point', 'coordinates': [-79.388279, ...",43.666953,-79.388279,ModiFace,35.0,4250000,5
3,5d2737547bf280155f216099,tech,Helsinki,FIN,2007,"{'type': 'Point', 'coordinates': [24.9410012, ...",60.168149,24.941001,XIHA,8.0,1000000,2
4,5d2737547bf280155f21609a,tech,Amsterdam,NLD,2007,"{'type': 'Point', 'coordinates': [4.8948623, 5...",52.374523,4.894862,Wakoopa,8.0,1000000,6


In [9]:
data_companies['money by offices'] = data_companies['total_money_raised']/data_companies['number of offices near']

#Media de dinero por oficinas y creo nueva columna

In [10]:
def sumEmployees(list_offices):
    list_employee = []
    res=0
    for company in list_offices:
        lst=[]
        for i in company:
            lst.append(i['number_of_employees'])
        res+=i['number_of_employees']
        list_employee.append(lst)
    result = [sum(b) for b in list_employee]
    return result

#función para calcular la suma de empleados por oficina

In [11]:
data_companies['employee by offices'] = sumEmployees(list_offices)/data_companies['number of offices near']

In [12]:
data_companies = data_companies[data_companies['number of offices near'] > 1]

#Quiero que al menos haya una oficina cerca

In [31]:
data_companies['ranking'] = data_companies['number of offices near']*0.8 + (data_companies['money by offices']/1000000)*0.6 + data_companies['employee by offices']*0.4

#Establezco un pequeño ranking para darle más importancia a las empresas cerca, seguid de otros valores

In [33]:
data_companies = data_companies.sort_values(['ranking'], ascending=False)

#Ordeno por ranking final

In [15]:
BASE_URL = "https://maps.googleapis.com/maps/api/place/nearbysearch"

In [16]:
res = requests.get("{}/json?location=49.285173,-123.095238&radius=1500&type=school&key={}".format(BASE_URL, KEY)).json()

In [17]:
#Con la Api de google busco las escuelas cerca sobre la empresa localizada en Vancouver que es la primera según ranking

coordinates_schools_latitude = []
coordinates_schools_longitude = []
name_schools = []
for i in range(len(res)):
    coordinates_schools_latitude.append(res['results'][i]['geometry']['location']["lat"])
    coordinates_schools_longitude.append(res['results'][i]['geometry']['location']["lng"])
    name_schools.append(res['results'][i]['name'])
print(coordinates_schools_latitude, coordinates_schools_longitude, name_schools)

[49.28071019999999, 49.2724496, 49.2847903, 49.2803269] [-123.1115084, -123.0957245, -123.1136753, -123.1060349] ['Vancouver Community College', 'Eton College Canada', 'ILAC - International Language Academy of Canada', 'International House Vancouver - Modern Languages']


In [18]:
dicc = {'latitude_school':coordinates_schools_latitude,
       'longitude_school': coordinates_schools_longitude,
       'name_school': name_schools}

In [19]:
dataframe_schools = pd.DataFrame(dicc)

In [20]:
display(dataframe_schools)

Unnamed: 0,latitude_school,longitude_school,name_school
0,49.28071,-123.111508,Vancouver Community College
1,49.27245,-123.095725,Eton College Canada
2,49.28479,-123.113675,ILAC - International Language Academy of Canada
3,49.280327,-123.106035,International House Vancouver - Modern Languages


In [53]:
display(data_companies)

Unnamed: 0,_id,category_code,city,country,founded_year,geoDescription,latitude,longitude,name,number_of_employees,total_money_raised,number of offices near,money by offices,employee by offices,ranking
0,5d2737547bf280155f216109,social,Vancouver,CAN,2008,"{'type': 'Point', 'coordinates': [-123.0952381...",49.285173,-123.095238,HootSuite,370.0,190000000,6,3.166667e+07,78.500000,55.200000
2,5d2737547bf280155f21611d,tech,Sao Paulo,BRA,2009,"{'type': 'Point', 'coordinates': [-46.6388182,...",-23.548943,-46.638818,Brandsclub,140.0,17000000,2,8.500000e+06,95.000000,44.700000
7,5d2737547bf280155f216128,tech,Vancouver,CAN,2009,"{'type': 'Point', 'coordinates': [-123.109217,...",49.282455,-123.109217,Tiny Speck,45.0,17200000,6,2.866667e+06,78.500000,37.920000
29,5d2737547bf280155f2160b7,tech,"Vancouver, BC",CAN,2007,"{'type': 'Point', 'coordinates': [-123.120893,...",49.275332,-123.120893,Lat49,24.0,3800000,6,6.333333e+05,78.500000,36.580000
50,5d2737547bf280155f2160a4,tech,Vancouver,CAN,2007,"{'type': 'Point', 'coordinates': [-123.1078045...",49.282108,-123.107805,Strutta,12.0,1600000,6,2.666667e+05,78.500000,36.360000
11,5d2737547bf280155f21609c,education,Berlin,DEU,2007,"{'type': 'Point', 'coordinates': [13.38861, 52...",52.489700,13.388610,babbel,205.0,10000000,6,1.666667e+06,72.500000,34.800000
15,5d2737547bf280155f216118,tech,Berlin,DEU,2007,"{'type': 'Point', 'coordinates': [13.398504, 5...",52.494727,13.398504,Moviepilot,43.0,7000000,6,1.166667e+06,72.500000,34.500000
4,5d2737547bf280155f2160d3,social,Berlin,DEU,2008,"{'type': 'Point', 'coordinates': [13.3842605, ...",52.530852,13.384260,ResearchGate,74.0,35000000,7,5.000000e+06,62.857143,33.742857
35,5d2737547bf280155f2160c0,tech,Berlin,DEU,2008,"{'type': 'Point', 'coordinates': [13.4124563, ...",52.528361,13.412456,plista,100.0,3800000,7,5.428571e+05,62.857143,31.068571
1,5d2737547bf280155f2160d9,tech,Copenhagen V,DNK,2007,"{'type': 'Point', 'coordinates': [12.5605372, ...",55.673962,12.560537,Trustpilot,100.0,18400000,2,9.200000e+06,54.000000,28.720000


In [22]:
data_companies = data_companies.reset_index(drop=True)

In [23]:
data_companies.to_csv('./data_companies_select.csv')

In [38]:
res = requests.get("{}/json?location=49.285173,-123.095238&radius=1500&type=cafe&key={}&name=Starbucks".format(BASE_URL, KEY))

In [41]:
res = res.json()

In [43]:
coordinates_starbucks_latitude = []
coordinates_starbucks_longitude = []
for i in range(len(res)):
    coordinates_starbucks_latitude.append(res['results'][i]['geometry']['location']["lat"])
    coordinates_starbucks_longitude.append(res['results'][i]['geometry']['location']["lng"])
print(coordinates_starbucks_latitude, coordinates_starbucks_longitude)

[49.2856442, 49.2859207, 49.2844427] [-123.111855, -123.1151827, -123.108513]


In [45]:
dicc_starbucks = {'latitude_starbucks':coordinates_starbucks_latitude,
       'longitude_starbucks': coordinates_starbucks_longitude}
dataframe_starbucks = pd.DataFrame(dicc_starbucks)
display(dataframe_starbucks)

Unnamed: 0,latitude_starbucks,longitude_starbucks
0,49.285644,-123.111855
1,49.285921,-123.115183
2,49.284443,-123.108513


In [76]:
#Mapa con toda las compañías

In [78]:
map_companies = folium.Map(location=[51.510880, -0.141897], width=750, height=500, zoom_start=2)
for index, row in data_companies.iterrows():
    folium.CircleMarker([row['latitude'], row['longitude']],
                        radius=3,
                        popup="Country: {}. Money: {}. Name:{}".format(row['country'], row['total_money_raised'], row['name']),
                        fill_color="#F35C50", # divvy color
                       ).add_to(map_companies)

map_companies.save('map_companies.html')
map_companies

In [72]:
#Mapa con las empresas del radio en la zona, escuelas cercanas (en rojo), Starbucks (verde) y la calle donde 
#sería interesante localizar la empresa

In [74]:
map_vancouver = folium.Map(location=[49.285173, -123.095238], width=750, height=500, zoom_start=15)
for index, row in data_companies.iterrows():
    folium.Marker([row['latitude'], row['longitude']],
                        radius=7,
                        popup="Name company: {}. Money: {}. Employees{}".format(row['name'], row['total_money_raised'], row['number_of_employees']),
                        icon=folium.Icon(color='red', icon='info-sign'),
                        fill_color="#F45649", 
                       ).add_to(map_vancouver)
for index, row in dataframe_schools.iterrows():
    folium.Marker([row['latitude_school'], row['longitude_school']],
                        radius=4,
                        popup="School: {}".format(row['name_school']),
                        icon=folium.Icon(icon='cloud'),
                        fill_color="#F35C50", # divvy color
                       ).add_to(map_vancouver)
for index, row in dataframe_starbucks.iterrows():
    folium.Marker([row['latitude_starbucks'], row['longitude_starbucks']],
                        radius=4,
                        icon=folium.Icon(color='darkgreen',icon='info-sign'),
                        fill_color="#F35C50", # divvy color
                       ).add_to(map_vancouver)
folium.Marker([49.282223,-123.109113],
                        radius=4,
                        icon=folium.Icon(color='pink',icon='info-sign'),
                        popup="Our company",
                        fill_color="#F35C50", # divvy color
                       ).add_to(map_vancouver)
map_vancouver.save('map_vancouver.html')
map_vancouver