In [1]:
# https://api.mongodb.com/python/current/tutorial.html

from pymongo import MongoClient

client = MongoClient('mongodb://localhost:27017/')
db = client.companies

In [8]:
at_least_1_office = db.comp.find({
    "offices":{
        "$not":{
            "$size":0
        }
    }
},{"name":1, "offices":1})

In [9]:
import pandas as pd
df = pd.DataFrame(at_least_1_office)
df.head()

Unnamed: 0,_id,name,offices
0,52cdef7c4bab8bd675297d8a,Wetpaint,"[{'description': '', 'address1': '710 - 2nd Av..."
1,52cdef7c4bab8bd675297d8b,AdventNet,"[{'description': 'Headquarters', 'address1': '..."
2,52cdef7c4bab8bd675297d8c,Zoho,"[{'description': 'Headquarters', 'address1': '..."
3,52cdef7c4bab8bd675297d8d,Digg,"[{'description': None, 'address1': '135 Missis..."
4,52cdef7c4bab8bd675297d8e,Facebook,"[{'description': 'Headquarters', 'address1': '..."


In [66]:
def getFirst(data):
    data = data['offices']
#    return (len(data),data[0]['latitude'],data[0]['longitude'])

    # Only create the geoJSON object if all geodata is available
    principal = None
    if data[0]['latitude'] and data[0]['longitude']:
        principal = {
            "type":"Point",
            "coordinates":[data[0]['longitude'], data[0]['latitude']]
        }

    return {
        "totalOffices": len(data),
        "lat": data[0]['latitude'],
        "lng": data[0]['longitude'],
        "oficina_principal": principal
    }


first_office = df[["offices"]].apply(getFirst, result_type="expand", axis=1)

In [67]:
df_clean = pd.concat([df,first_office], axis=1)[["name","lat","lng", "oficina_principal","totalOffices"]]

In [68]:
display(df_clean.shape)
df_clean.head()

(13744, 5)

Unnamed: 0,name,lat,lng,oficina_principal,totalOffices
0,Wetpaint,47.603122,-122.333253,"{'type': 'Point', 'coordinates': [-122.333253,...",2.0
1,AdventNet,37.692934,-121.904945,"{'type': 'Point', 'coordinates': [-121.904945,...",1.0
2,Zoho,37.692934,-121.904945,"{'type': 'Point', 'coordinates': [-121.904945,...",1.0
3,Digg,37.764726,-122.394523,"{'type': 'Point', 'coordinates': [-122.394523,...",1.0
4,Facebook,37.41605,-122.151801,"{'type': 'Point', 'coordinates': [-122.151801,...",3.0


In [69]:

# Remove offices with unexisting lat,lng or both lat and lng data
#df_clean.dropna(axis=0, inplace=True)

# mongoimport --db companies --collection first_office --jsonArray ./data/oficinas.json
df_clean.to_json('./data/oficinas.json', orient="records")

In [83]:
#https://docs.mongodb.com/manual/reference/operator/query/near/index.html
def findNear(geopoint, radio_max_meters=1000):
    return db.first_office.find({
        "oficina_principal": {
         "$near": {
           "$geometry": geopoint,
           "$maxDistance": radio_max_meters,
         }
       }
    })


# https://developers.google.com/maps/documentation/geocoding/intro
park_avenue_con_21_street = {
    "type":"Point",
    "coordinates":[-73.987308,40.738935]
}
radio_max_meters = 10000
num_offices = findNear(park_avenue_con_21_street, radio_max_meters).count()

print(f"Hay {num_offices} oficinas cerca a {radio_max_meters} metros")
print("La mas cercana es")
print(list(findNear(park_avenue_con_21_street, radio_max_meters).limit(1)))



Hay 556 oficinas cerca a 10000 metros
La mas cercana es
[{'_id': ObjectId('5d231e0ae0aeedf6e06cd897'), 'name': 'SpaBooker', 'lat': 40.738567, 'lng': -73.987199, 'oficina_principal': {'type': 'Point', 'coordinates': [-73.987199, 40.738567]}, 'totalOffices': 1.0}]




In [89]:
pd.DataFrame(findNear(park_avenue_con_21_street, 5000)).to_csv("./data/geoesp.csv")