In [1]:
import requests
import json
import pandas as pd
import numpy as np
from pymongo import MongoClient
import math
from bs4 import BeautifulSoup
import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster
#import src.functions as fn

In [2]:
client = MongoClient("mongodb://localhost/companies")
db = client.get_database()

## Indice calidad de vida

En primer lugar, voy a basarme en el índice de calidad de vida para seleccionar una zona. 

El índice de calidad de vida se calcula en base otros índices: índice de poder adquisitivo, índice de seguridad, indice de sanidad, de costo de visa de tiempo de desplazamiento den tráfico, contaminación clima y relación precio/ingresos para propiedades.

Para obtener esta información he hecho scrapping de la web https://es.numbeo.com/calidad-de-vida/clasificaciones-por-pa%C3%ADs.

In [3]:
data=requests.get("https://es.numbeo.com/calidad-de-vida/clasificaciones-por-pa%C3%ADs").text
soup= BeautifulSoup(data, 'html.parser')

def procesaIndices(fila):
    m = fila.find_all("td")
    #print(m[2])
    return {
        "country":m[1].text.strip(),
        "calidad_vida":float((m[2].text).replace(',','.')),
        "poder_adquisitivo":float((m[3].text).replace(',','.')),
        "seguridad":float((m[4].text).replace(',','.')),
        "sanidad":float((m[5].text).replace(',','.')),
        "costo_vida":float((m[6].text).replace(',','.')),
        "relacion_precio_vs_ingresos":float((m[7].text).replace(',','.')),
        "tiempo_desplazamiento":float((m[8].text).replace(',','.')),
        "contaminación":float((m[9].text).replace(',','.')),
        "clima":float((m[10].text).replace(',','.'))
    }
    
indice = soup.find_all('table')[2]
indice_dict = [procesaIndices(fila) for fila in indice.find_all("tr")[1:]]


In [4]:
#Genero df:
df_indices=pd.DataFrame(indice_dict)

#Exporto en formato json y luego importo en MongoDB Compass:
df_indices.to_json("output/indices_calidad.json", orient="records")

El país que elegiré para poner la nueva sede, será aquel con mejor indice de calidad y además con un indice de clima superior a 90. Como se ve a continuación será Australia.

In [5]:
df_indices[(df_indices.clima>90)].head()

Unnamed: 0,country,calidad_vida,poder_adquisitivo,seguridad,sanidad,costo_vida,relacion_precio_vs_ingresos,tiempo_desplazamiento,contaminación,clima
3,Australia,186.21,107.31,58.64,77.38,73.54,7.52,34.73,23.46,92.7
7,Nueva Zelanda,181.02,92.66,59.07,73.81,72.53,8.52,31.1,23.4,95.46
14,España,169.82,72.03,68.04,78.88,53.77,9.37,29.1,39.99,94.19
18,Portugal,162.91,49.43,70.37,71.88,49.52,12.7,30.0,30.89,97.31
25,Francia,153.95,80.36,53.21,79.99,74.14,13.04,34.76,43.56,90.25


## Importo de MongoDB Compass:
Con una query me traeré de la colección 'companies' todas las empresas que tenga una ganancia distinta de 0$.

In [6]:
q1={"total_money_raised": {"$ne": "$0"}}
companies=list(db["companies"].find(q1,{"name":1,"founded_year":1,"total_money_raised":1, "offices":1,"category_code":1}))

In [7]:
df = pd.DataFrame(companies)

Voy a tratar el df para desagregar la columna offices, dejando cada oficina en una línea distina y luego desdoblando las columnas que contienen la información de cada oficina.

In [8]:
#Desagrego las oficinas, para que por compañía cada oficina aparezca en una línea independiente:
df = df.explode('offices')
#display(df.head())

#Desagrego la columna office para obtener los datos en columnas de cada oficina:
df_offices = df[["offices"]].apply(lambda x: x.offices, result_type="expand", axis=1)
#display(df_offices.head())

#Unifico los dos df anteriores para tener un único df completo con toda la información
clean_data = pd.concat([df,df_offices], axis=1)
display(clean_data.head())

Unnamed: 0,_id,name,category_code,founded_year,total_money_raised,offices,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude
0,52cdef7c4bab8bd675297d8f,Omnidrive,network_hosting,2005.0,$800k,"{'description': '', 'address1': 'Suite 200', '...",,Suite 200,654 High Street,94301.0,Palo Alto,CA,ISR,,
1,52cdef7c4bab8bd675297d91,Geni,web,2006.0,$16.5M,"{'description': 'Headquarters', 'address1': '9...",Headquarters,9229 W. Sunset Blvd.,,90069.0,West Hollywood,CA,USA,34.090368,-118.393064
2,52cdef7c4bab8bd675297d8a,Wetpaint,web,2005.0,$39.8M,"{'description': '', 'address1': '710 - 2nd Ave...",,710 - 2nd Avenue,Suite 1100,98104.0,Seattle,WA,USA,47.603122,-122.333253
2,52cdef7c4bab8bd675297d8a,Wetpaint,web,2005.0,$39.8M,"{'description': '', 'address1': '270 Lafayette...",,270 Lafayette Street,Suite 505,10012.0,New York,NY,USA,40.723731,-73.996431
3,52cdef7c4bab8bd675297d96,Gizmoz,web,2003.0,$18.1M,"{'description': None, 'address1': None, 'addre...",,,,,Menlo Park,CA,USA,37.48413,-122.169472


Elimino la columna _id, ya que se generará un nuevo id cuando cargue la nueva colección, y también elimino office que ya está toda la información desagregada y no nos sirve más.

In [9]:
clean_data = clean_data.drop(columns=["_id","offices"])
clean_data.head()

Unnamed: 0,name,category_code,founded_year,total_money_raised,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude
0,Omnidrive,network_hosting,2005.0,$800k,,Suite 200,654 High Street,94301.0,Palo Alto,CA,ISR,,
1,Geni,web,2006.0,$16.5M,Headquarters,9229 W. Sunset Blvd.,,90069.0,West Hollywood,CA,USA,34.090368,-118.393064
2,Wetpaint,web,2005.0,$39.8M,,710 - 2nd Avenue,Suite 1100,98104.0,Seattle,WA,USA,47.603122,-122.333253
2,Wetpaint,web,2005.0,$39.8M,,270 Lafayette Street,Suite 505,10012.0,New York,NY,USA,40.723731,-73.996431
3,Gizmoz,web,2003.0,$18.1M,,,,,Menlo Park,CA,USA,37.48413,-122.169472


Con la siguiente función, daré el formato GeoJson, necesario para poder generar los indexes en MongoDB Compass. En aquellos casos en los que latitud o longitud sean valores NaN, se indicará None, para evitar posibles conflicos en la carga de la colección.

In [10]:
def asGeoJSON(lat,lng):
    try:
        lat = float(lat)
        lng = float(lng)
        if not math.isnan(lat) and not math.isnan(lng):
            return {
                "type":"Point",
                "coordinates":[lng,lat]
            }
    except Exception:
        print("Invalid data")
        return None
        

clean_data["location"] = clean_data[["latitude","longitude"]].apply(lambda x:asGeoJSON(x.latitude,x.longitude), axis=1)
clean_data[["latitude","longitude","location"]].head()

Unnamed: 0,latitude,longitude,location
0,,,
1,34.090368,-118.393064,"{'type': 'Point', 'coordinates': [-118.393064,..."
2,47.603122,-122.333253,"{'type': 'Point', 'coordinates': [-122.333253,..."
2,40.723731,-73.996431,"{'type': 'Point', 'coordinates': [-73.9964312,..."
3,37.48413,-122.169472,"{'type': 'Point', 'coordinates': [-122.169472,..."


Añado una columna que tipifique si la empresa tiene 10 o menos años('young') o más de 10 años ('old').

In [11]:
def oldYoung(row):
    if row['founded_year']>=2007:
        return 'young'
    else:
        return 'old'

clean_data['old_young']= clean_data.apply(lambda row: oldYoung(row), axis=1)

clean_data[['name', 'founded_year','old_young']].head(10)

Unnamed: 0,name,founded_year,old_young
0,Omnidrive,2005.0,old
1,Geni,2006.0,old
2,Wetpaint,2005.0,old
2,Wetpaint,2005.0,old
3,Gizmoz,2003.0,old
4,Lala,,old
5,StumbleUpon,2002.0,old
5,StumbleUpon,2002.0,old
6,Slacker,2006.0,old
7,Scribd,2007.0,young


Exporto a json y luego lo importo a MongoDB Compass.

In [12]:
clean_data.to_json("output/cleaned_offices.json", orient="records")

Lo importo en Mongo DB Compass: $ mongoimport --db companies --collection offices --jsonArray --drop cleaned_offices.json

Una vez importado, creo un geospartial index: Indexes > Create Index > (write any indexname) > Select fieldname:"location" and 2dsphere

## Exporto de MongoDB:
Ahora exporto las oficinas que están en Australia para ver en un mapa su distribución

In [13]:
q2={"$and": [{"country_code": {"$eq": "AUS"}}, {"location": {"$ne":None}}]}
aus = list(db["offices"].find(q2, {'_id':0,'name':1, 'total_money_raised':1,'category_code':1, 'founded_year':1, 'city':1, 'latitude':1, 'longitude':1, 'old_young':1}))
df_aus = pd.DataFrame(aus)
df_aus

Unnamed: 0,name,category_code,founded_year,total_money_raised,city,latitude,longitude,old_young
0,Google,search,1998.0,$555M,Melbourne,-37.879234,145.073608,old
1,Google,search,1998.0,$555M,Sydney,-34.822723,138.612396,old
2,Hitwise,web,,$4.2M,Melbourne,-37.814251,144.963169,old
3,2threads,games_video,2004.0,$300k,sydney,-33.884685,151.216427,old
4,99designs,design,2008.0,$35M,Collingwood,-37.802659,144.986855,young
5,Austhink Software,software,2004.0,$3.77M,"Melbourne, Victoria",-37.813603,144.96616,old
6,Artabase,network_hosting,2004.0,$150k,"Southbank, VIC",-37.803294,144.99974,old
7,Exinda,software,2002.0,$19M,Abbotsford,-37.804233,144.998935,old
8,Dexterra,mobile,2000.0,$123M,Sydney,-33.872864,151.207543,old
9,Coremetrics,advertising,1999.0,$91M,Melbourne,-37.814541,144.9705,old


## Mapa en folium:

Antes consultaré las coordenadas de Australia en geoCode con la siguiente función:

In [91]:
def geocode(address):
    data = requests.get(f"https://geocode.xyz/{address}?json=1").json()
    print(data)
    return {
        "type":"Point",
        "coordinates":[float(data["longt"]),float(data["latt"])]
    }

aus_geo = geocode("Melbourne")

{'standard': {'addresst': {}, 'city': 'Melbourne', 'prov': 'US', 'countryname': 'United States of America', 'postal': {}, 'confidence': '0.90'}, 'longt': '-80.66773', 'alt': {}, 'elevation': {}, 'latt': '28.14645'}


In [15]:
start_lat = -25.2743988
start_lon = 133.7751312
aus_map = folium.Map(location=[start_lat, start_lon],tiles='cartodbpositron', zoom_start=4)
aus_map

In [16]:
df_aus.apply(lambda row:folium.Marker(location=[row["latitude"], 
                                                row["longitude"]], popup=row["old_young"]).add_to(aus_map),axis=1)
aus_map

Genero query para traeme los aeropuertos que hay en Australia

In [17]:
q3={"$and": [{"country": {"$eq":"Australia"}}, {"$or":[{"city":{"$eq":"Sydney"}},{"city":{"$eq":"Melbourne"}}]}]}
aus_airport = list(db['airports'].find(q3,{"_id":0, "name":1,"city":1,"lat":1,"lon":1}))

df_airports = pd.DataFrame (aus_airport)
df_airports.head()

Unnamed: 0,lat,lon,name,city
0,-37.6759,144.844,Melbourne International Airport,Melbourne
1,-33.9344,151.168,Kingsford Smith Airport,Sydney
2,-33.9344,151.168,Kingsford Smith Airport,Sydney


In [18]:
df_airports.apply(lambda row:folium.Marker(location=[row["lat"], 
                                                row["lon"]], popup=row["name"],icon=folium.Icon(color='black', icon_color='white', icon='plane')).add_to(aus_map),axis=1)
aus_map

## Consultas a GooglePlace: starbucks, pubs, parques...

Request a la API de google consultando los starbucks en sydney.

In [None]:
import os
from dotenv import load_dotenv
load_dotenv('src/.env')

def requestGooglePlace(place):
    token = os.getenv("API_GOOGLE_KEY")
    #print(token)
    if not token:
        raise ValueError("Necesitas un API_GOOGLE_KEY")
    
    baseUrl = "https://maps.googleapis.com/"
    endpoint="maps/api/place/textsearch/json"
    url = baseUrl+endpoint
    print(f"Requesting data from {url}")
    params = {
        "query":f"{place}",
        "key": f"{token}"
    }
    res = requests.get(url,params=params)
    if res.status_code != 200:
        print(res.text)
        raise ValueError("Bad Response")
    return res.json()

#Prueba: data_Starbucks=requestGooglePlace("Starbucks+in+Sydney")

## Request de Starbucks en Melbourne

Elijo Melbourne como ciudad para establecer la oficina, ya que hay más empresas jóvenes que en Sydney.

In [None]:
#data_Starbucks_Melbourn=requestGooglePlace("Starbucks+in+Melbourn+Australia")

Extraigo la key "results" y convierto a dataframe para trabajar con la columna "geometry"

In [None]:
results_mel=data_Starbucks_Melbourn['results']
df_starbucks_melbourn = pd.DataFrame(results_mel)

#Guardo el response de la request lanzada:
json_starbucks_melbourn=json.dumps(results_mel)
writeFile =open('starbucks_melbourn.json', 'w')
writeFile.write(json_starbucks_melbourn)
writeFile.close()

In [None]:
#Desagrego la columna geometry.location para obtener la latitud y longitud en columnas de cada Starbucks:
df_loc_mel_cafe = df_starbucks_melbourn[["geometry"]].apply(lambda x: x.geometry, result_type="expand", axis=1)

df_loc_mel_cafe2 = df_loc_mel_cafe[["location"]].apply(lambda x: x.location, result_type="expand", axis=1)
#display(df_loc_syd_cafe2.head())

#Unifico los dos df anteriores para tener un único df completo con toda la información
clean_starbucks_melbourn = pd.concat([df_starbucks_melbourn,df_loc_mel_cafe2], axis=1)
display(clean_starbucks_melbourn.head())

Añado columna con formato geoquery:

In [None]:
clean_starbucks_melbourn["location"] = clean_starbucks_melbourn[["lat","lng"]].apply(lambda x:asGeoJSON(x.lat,x.lng), axis=1)
new_clean_starbucks_melbourn = clean_starbucks_melbourn[["name","lat","lng","location"]]
new_clean_starbucks_melbourn.head()

In [None]:
new_clean_starbucks_melbourn.to_json("output/new_clean_starbucks_melbourn.json", orient="records")

In [None]:
new_clean_starbucks_melbourn.apply(lambda row:folium.Marker(location=[row["lat"], 
                                                row["lng"]], popup=row["name"],icon=folium.Icon(color='darkgreen', icon_color='white',icon='S')).add_to(aus_map),axis=1)
aus_map

## Request Pubs en Melbourne

In [None]:
#data_pubs=requestGooglePlace("pubs+in+Melbourne+Australia")

In [None]:
results_pubs_melbourn=data_pubs['results']
df_pubs_melbourn = pd.DataFrame(results_pubs_melbourn)
json_pubs_melbourn=json.dumps(results_pubs_melbourn)

writeFile =open('output/pubs_melbourn.json', 'w')
writeFile.write(json_pubs_melbourn)
writeFile.close()

In [None]:
#Desagrego la columna geometry.location para obtener la latitud y longitud en columnas de cada Starbucks:
df_loc_mel_pubs = df_pubs_melbourn[["geometry"]].apply(lambda x: x.geometry, result_type="expand", axis=1)

df_loc_mel_pubs2 = df_loc_mel_pubs[["location"]].apply(lambda x: x.location, result_type="expand", axis=1)
#display(df_loc_mel_pubs2.head())

#Unifico los dos df anteriores para tener un único df completo con toda la información
clean_pubs_melbourn = pd.concat([df_pubs_melbourn,df_loc_mel_pubs2], axis=1)
display(clean_pubs_melbourn.head())

In [None]:
clean_pubs_melbourn["location"] = clean_pubs_melbourn[["lat","lng"]].apply(lambda x:asGeoJSON(x.lat,x.lng), axis=1)
new_clean_pubs_melbourn = clean_pubs_melbourn[["name","lat","lng","location"]]
new_clean_pubs_melbourn.head()

In [None]:
new_clean_pubs_melbourn.to_json("output/new_clean_pubs_melbourn.json", orient="records")

In [None]:
new_clean_pubs_melbourn.apply(lambda row:folium.Marker(location=[row["lat"], 
                                                row["lng"]], popup=row["name"],icon=folium.Icon(color='pink', icon_color='white', icon='glass')).add_to(aus_map),axis=1)
aus_map

## Request Schools en Melbourne

In [None]:
#data_schools = requestGooglePlace("schools+in+Melbourne+Australia")

In [None]:
results_schools_melbourne = data_schools['results']
df_schools_melbourne = pd.DataFrame(results_schools_melbourne)
json_schools_melbourne = json.dumps(results_schools_melbourne)

writeFile =open('output/schools_melbourne.json', 'w')
writeFile.write(json_schools_melbourne)
writeFile.close()

In [None]:
#Desagrego la columna geometry.location para obtener la latitud y longitud en columnas de cada Starbucks:
df_loc_mel_schools = df_schools_melbourne[["geometry"]].apply(lambda x: x.geometry, result_type="expand", axis=1)

df_loc_mel_schools2 = df_loc_mel_schools[["location"]].apply(lambda x: x.location, result_type="expand", axis=1)
#display(df_loc_mel_schools2.head())

#Unifico los dos df anteriores para tener un único df completo con toda la información
clean_schools_melbourne = pd.concat([df_schools_melbourne,df_loc_mel_schools2], axis=1)
display(clean_schools_melbourne.head())

In [None]:
clean_schools_melbourne["location"] = clean_schools_melbourne[["lat","lng"]].apply(lambda x:asGeoJSON(x.lat,x.lng), axis=1)
new_clean_schools_melbourne = clean_schools_melbourne[["name","lat","lng","location"]]
new_clean_schools_melbourne.head()

In [None]:
new_clean_schools_melbourne.to_json("output/new_clean_schools_melbourn.json", orient="records")

## Request Parks in Melbourne

In [None]:
#data_parks = requestGooglePlace("schools+in+Melbourne+Australia")

In [None]:
results_parks_melbourne =data_parks['results']
df_parks_melbourne = pd.DataFrame(results_parks_melbourne)
json_parks_melbourne = json.dumps(results_parks_melbourne)

writeFile =open('output/parks_melbourne.json', 'w')
writeFile.write(json_parks_melbourne)
writeFile.close()

In [None]:
#Desagrego la columna geometry.location para obtener la latitud y longitud en columnas de cada Starbucks:
df_loc_mel_parks = df_parks_melbourne[["geometry"]].apply(lambda x: x.geometry, result_type="expand", axis=1)

df_loc_mel_parks2 = df_loc_mel_parks[["location"]].apply(lambda x: x.location, result_type="expand", axis=1)
#display(df_loc_mel_schools2.head())

#Unifico los dos df anteriores para tener un único df completo con toda la información
clean_parks_melbourne = pd.concat([df_parks_melbourne,df_loc_mel_parks2], axis=1)
display(clean_parks_melbourne.head())

In [None]:
clean_parks_melbourne["location"] = clean_parks_melbourne[["lat","lng"]].apply(lambda x:asGeoJSON(x.lat,x.lng), axis=1)
new_clean_parks_melbourne = clean_parks_melbourne[["name","lat","lng","location"]]
new_clean_parks_melbourne.head()

In [None]:
new_clean_parks_melbourne.to_json("output/new_clean_parks_melbourn.json", orient="records")

## Request Veggie Restaurants in Melbourne

In [None]:
#data_veggies = requestGooglePlace("veggie+restaurants+in+Melbourne+Australia")

In [None]:
results_veggies_melbourne=data_veggies['results']
df_veggies_melbourne = pd.DataFrame(results_veggies_melbourne)
json_veggies_melbourne = json.dumps(results_veggies_melbourne)

writeFile =open('output/veggies_melbourne.json', 'w')
writeFile.write(json_veggies_melbourne)
writeFile.close()

In [None]:
#Desagrego la columna geometry.location para obtener la latitud y longitud en columnas de cada Starbucks:
df_loc_mel_veggies = df_veggies_melbourne[["geometry"]].apply(lambda x: x.geometry, result_type="expand", axis=1)

df_loc_mel_veggies2 = df_loc_mel_veggies[["location"]].apply(lambda x: x.location, result_type="expand", axis=1)
#display(df_loc_mel_schools2.head())

#Unifico los dos df anteriores para tener un único df completo con toda la información
clean_veggies_melbourne = pd.concat([df_veggies_melbourne,df_loc_mel_veggies2], axis=1)
display(clean_veggies_melbourne.head())

In [None]:
clean_veggies_melbourne["location"] = clean_veggies_melbourne[["lat","lng"]].apply(lambda x:asGeoJSON(x.lat,x.lng), axis=1)
new_clean_veggies_melbourne = clean_veggies_melbourne[["name","lat","lng","location"]]
new_clean_veggies_melbourne.head()

In [None]:
new_clean_veggies_melbourne.to_json("output/new_clean_veggies_melbourn.json", orient="records")

## Prueba para seleccionar la mejor ubicación

In [19]:
query_young= {"$and":[{"location": {"$ne":None}}, {"city":{"$eq": "Melbourne"}}]}
melbourne_office=list(db["offices"].find(query_young,{"_id":0,"name":1,"old_young":1,"total_money_raised":1, "offices":1,"category_code":1, "location":1}))
df_melbourne_office=pd.DataFrame(melbourne_office)

df_melbourne_office.to_json("output/melbourne_offices.json", orient="records")

df_melbourne_office

Unnamed: 0,name,category_code,total_money_raised,location,old_young
0,Google,search,$555M,"{'type': 'Point', 'coordinates': [145.073608, ...",old
1,Hitwise,web,$4.2M,"{'type': 'Point', 'coordinates': [144.963169, ...",old
2,Direct Hit,search,$26M,"{'type': 'Point', 'coordinates': [-80.6475007,...",old
3,Coremetrics,advertising,$91M,"{'type': 'Point', 'coordinates': [144.9705, -3...",old
4,Aconex,software,$85M,"{'type': 'Point', 'coordinates': [144.9715423,...",old
5,Travel Distribution Systems,enterprise,$1.1M,"{'type': 'Point', 'coordinates': [145.205688, ...",old
6,Internet Marketing Academy Australia,other,$100k,"{'type': 'Point', 'coordinates': [144.976425, ...",young


Lista de localizaciones de las oficinas de Melbourne para cruzar con las colecciones creadas: starbucks, parques, colegios, pubs y restaurantes veggies y ver cuántos de cada uno hay a menos de 5km.

In [20]:
list_melbourne_offices=[]
for e in range(len(df_melbourne_office)):
    list_melbourne_offices.append(df_melbourne_office["location"][e])


Defino la función que contará el número de starbucks, parques, colegios, pubs y restaurantes veggies que están a menos de 5 km de cada oficina.

In [83]:
def countNearStarbucks(list_location , maxDistance):
    
    return list(db.starbucks_melbourne.find({
       "location": {
         "$near": {
           "$geometry": list_location,
           "$maxDistance": maxDistance
         }
       }
    }
    )
    )

maxDistance=5000
lista_starbucks=[]
num_starbucks=[]

for i in range(len(list_melbourne_offices)):
    coincidencias= countNearStarbucks(list_melbourne_offices[i], maxDistance)
    lista_starbucks.append(coincidencias)
    num_starbucks.append(len(coincidencias))

df_melbourne_office['num_starbucks']=num_starbucks


In [84]:
df_melbourne_office

Unnamed: 0,name,category_code,total_money_raised,location,old_young,num_starbucks,num_pubs,num_parks,num_schools
0,Google,search,$555M,"{'type': 'Point', 'coordinates': [145.073608, ...",old,2,0,0,0
1,Hitwise,web,$4.2M,"{'type': 'Point', 'coordinates': [144.963169, ...",old,7,20,20,20
2,Direct Hit,search,$26M,"{'type': 'Point', 'coordinates': [-80.6475007,...",old,0,0,0,0
3,Coremetrics,advertising,$91M,"{'type': 'Point', 'coordinates': [144.9705, -3...",old,7,20,20,20
4,Aconex,software,$85M,"{'type': 'Point', 'coordinates': [144.9715423,...",old,7,20,20,20
5,Travel Distribution Systems,enterprise,$1.1M,"{'type': 'Point', 'coordinates': [145.205688, ...",old,2,0,0,0
6,Internet Marketing Academy Australia,other,$100k,"{'type': 'Point', 'coordinates': [144.976425, ...",young,7,20,20,20


In [81]:
def countNearPubs(list_location, maxDistance):
    
    return list(db.pubs_melbourne.find({
       "location": {
         "$near": {
           "$geometry": list_location,
           "$maxDistance": maxDistance
         }
       }
    }
    )
    )

maxDistance=5000
lista_pubs=[]
num_pubs=[]

for i in range(len(list_melbourne_offices)):
    coincidencias= countNearPubs(list_melbourne_offices[i], maxDistance)
    lista_pubs.append(coincidencias)
    num_pubs.append(len(coincidencias))

df_melbourne_office['num_pubs']=num_pubs

In [82]:
df_melbourne_office

Unnamed: 0,name,category_code,total_money_raised,location,old_young,num_starbucks,num_pubs,num_parks,num_schools
0,Google,search,$555M,"{'type': 'Point', 'coordinates': [145.073608, ...",old,2,0,0,0
1,Hitwise,web,$4.2M,"{'type': 'Point', 'coordinates': [144.963169, ...",old,7,20,20,20
2,Direct Hit,search,$26M,"{'type': 'Point', 'coordinates': [-80.6475007,...",old,0,0,0,0
3,Coremetrics,advertising,$91M,"{'type': 'Point', 'coordinates': [144.9705, -3...",old,7,20,20,20
4,Aconex,software,$85M,"{'type': 'Point', 'coordinates': [144.9715423,...",old,7,20,20,20
5,Travel Distribution Systems,enterprise,$1.1M,"{'type': 'Point', 'coordinates': [145.205688, ...",old,2,0,0,0
6,Internet Marketing Academy Australia,other,$100k,"{'type': 'Point', 'coordinates': [144.976425, ...",young,7,20,20,20


In [89]:
def countNearParks(list_location, maxDistance):
    
    return list(db.parks_melbourne.find({
       "location": {
         "$near": {
           "$geometry": list_location,
           "$maxDistance": maxDistance
         }
       }
    }
    )
    )

maxDistance=2000
lista_parks=[]
num_parks=[]

for i in range(len(list_melbourne_offices)):
    coincidencias= countNearPubs(list_melbourne_offices[i], maxDistance)
    lista_parks.append(coincidencias)
    num_parks.append(len(coincidencias))

df_melbourne_office['num_parks']=num_parks

In [90]:
df_melbourne_office

Unnamed: 0,name,category_code,total_money_raised,location,old_young,num_starbucks,num_pubs,num_parks,num_schools,num_veggies
0,Google,search,$555M,"{'type': 'Point', 'coordinates': [145.073608, ...",old,2,0,0,0,0
1,Hitwise,web,$4.2M,"{'type': 'Point', 'coordinates': [144.963169, ...",old,7,20,18,20,18
2,Direct Hit,search,$26M,"{'type': 'Point', 'coordinates': [-80.6475007,...",old,0,0,0,0,0
3,Coremetrics,advertising,$91M,"{'type': 'Point', 'coordinates': [144.9705, -3...",old,7,20,18,20,18
4,Aconex,software,$85M,"{'type': 'Point', 'coordinates': [144.9715423,...",old,7,20,18,20,18
5,Travel Distribution Systems,enterprise,$1.1M,"{'type': 'Point', 'coordinates': [145.205688, ...",old,2,0,0,0,0
6,Internet Marketing Academy Australia,other,$100k,"{'type': 'Point', 'coordinates': [144.976425, ...",young,7,20,1,20,1


In [65]:
def countNearSchools(list_location, maxDistance):
    
    return list(db.schools_melbourne.find({
       "location": {
         "$near": {
           "$geometry": list_location,
           "$maxDistance": maxDistance
         }
       }
    }
    )
    )

maxDistance=5000
lista_schools=[]
num_schools=[]

for i in range(len(list_melbourne_offices)):
    coincidencias= countNearPubs(list_melbourne_offices[i], maxDistance)
    lista_schools.append(coincidencias)
    num_schools.append(len(coincidencias))

df_melbourne_office['num_schools']=num_schools

In [66]:
df_melbourne_office

Unnamed: 0,name,category_code,total_money_raised,location,old_young,num_starbucks,num_pubs,num_parks,num_schools
0,Google,search,$555M,"{'type': 'Point', 'coordinates': [145.073608, ...",old,2,0,0,0
1,Hitwise,web,$4.2M,"{'type': 'Point', 'coordinates': [144.963169, ...",old,7,20,20,20
2,Direct Hit,search,$26M,"{'type': 'Point', 'coordinates': [-80.6475007,...",old,0,0,0,0
3,Coremetrics,advertising,$91M,"{'type': 'Point', 'coordinates': [144.9705, -3...",old,7,20,20,20
4,Aconex,software,$85M,"{'type': 'Point', 'coordinates': [144.9715423,...",old,7,20,20,20
5,Travel Distribution Systems,enterprise,$1.1M,"{'type': 'Point', 'coordinates': [145.205688, ...",old,2,0,0,0
6,Internet Marketing Academy Australia,other,$100k,"{'type': 'Point', 'coordinates': [144.976425, ...",young,7,20,20,20


In [87]:
def countNearVeggies(list_location, maxDistance):
    
    return list(db.veggies_melbourne.find({
       "location": {
         "$near": {
           "$geometry": list_location,
           "$maxDistance": maxDistance
         }
       }
    }
    )
    )

maxDistance=2000
lista_veggies=[]
num_veggies=[]

for i in range(len(list_melbourne_offices)):
    coincidencias= countNearPubs(list_melbourne_offices[i], maxDistance)
    lista_veggies.append(coincidencias)
    num_veggies.append(len(coincidencias))

df_melbourne_office['num_veggies']=num_veggies

In [88]:
df_melbourne_office

Unnamed: 0,name,category_code,total_money_raised,location,old_young,num_starbucks,num_pubs,num_parks,num_schools,num_veggies
0,Google,search,$555M,"{'type': 'Point', 'coordinates': [145.073608, ...",old,2,0,0,0,0
1,Hitwise,web,$4.2M,"{'type': 'Point', 'coordinates': [144.963169, ...",old,7,20,20,20,18
2,Direct Hit,search,$26M,"{'type': 'Point', 'coordinates': [-80.6475007,...",old,0,0,0,0,0
3,Coremetrics,advertising,$91M,"{'type': 'Point', 'coordinates': [144.9705, -3...",old,7,20,20,20,18
4,Aconex,software,$85M,"{'type': 'Point', 'coordinates': [144.9715423,...",old,7,20,20,20,18
5,Travel Distribution Systems,enterprise,$1.1M,"{'type': 'Point', 'coordinates': [145.205688, ...",old,2,0,0,0,0
6,Internet Marketing Academy Australia,other,$100k,"{'type': 'Point', 'coordinates': [144.976425, ...",young,7,20,20,20,1
