## Python BI Location project

In [1]:
#Importing libraries

import pandas as pd
import re
from pymongo import MongoClient

In [2]:
#Importing data base

client=MongoClient()
db = client.db_companies
collection_companies = db.companies

In [3]:
#Some categories available

import itertools
from operator import itemgetter
categories = list(collection_companies.find({ 'category_code' : { '$exists' : 'true' }}, {'_id':0,'category_code' :1}).limit(2000))
res = dict((k, list(g)) for k, g in itertools.groupby(categories, key=itemgetter('category_code')))
list(res.keys())

['web',
 'enterprise',
 'software',
 'news',
 'social',
 'network_hosting',
 'games_video',
 'music',
 'mobile',
 'search',
 'advertising',
 'messaging',
 'security',
 'photo_video',
 'finance',
 'hardware',
 'ecommerce',
 'travel',
 'public_relations',
 'other',
 'real_estate',
 'semiconductor',
 'analytics',
 'health',
 'legal',
 'sports',
 'biotech',
 'cleantech',
 'education',
 'consulting',
 'transportation',
 None,
 'hospitality',
 'fashion',
 'nonprofit',
 'nanotech',
 'automotive',
 'design']

In [4]:
#Filtering all data

filtering = list(collection_companies.find({'$or': [{'category_code':'ecommerce'},\
                                         {'category_code':'hardware'},\
                                         {'category_code':'software'}],\
                                         'number_of_employees' : {'$lt': 1000 },\
                                         'offices' : {'$not': {'$size':0}},\
                                         'offices.latitude' : { '$ne': None }},\
                                         {'_id':0,'name':1,'category_code':1,'offices':1}))

In [5]:
#Amount of data

len(filtering)

1201

In [6]:
#Creating a data frame with the data

df=pd.DataFrame(filtering)
df.head()

Unnamed: 0,name,category_code,offices
0,ooma,hardware,"[{'description': '', 'address1': '1840 Embarca..."
1,GoingOn,software,"[{'description': 'GoingOn Networks, Inc.', 'ad..."
2,Bazaarvoice,software,"[{'description': 'Head Office', 'address1': '3..."
3,spigit,software,"[{'description': '', 'address1': '311 Ray Stre..."
4,JumpBox,software,"[{'description': '', 'address1': 'PO Box 15265..."


In [7]:
#Deleting null data

df=df.dropna()

In [8]:
#Function to get lat and len

def get_first(data):
    data=data['offices']
    
    principal=None
    
    
    if data[0]['latitude'] and data[0]['longitude']:
        # esto ya es una geoquery (geopoint)
        principal={
            'type':'Point',
            'coordinates':[data[0]['longitude'],
                           data[0]['latitude']]
        }
        
    return {'totalOffices': len(data), 
            'lat': data[0]['latitude'],
            'lng': data[0]['longitude'],
            'principal': principal}

In [9]:
#New structurated data with lat and long

New_office=df[['offices']].apply(get_first, result_type='expand', axis=1)

In [10]:
#Overview

offices = New_office.dropna()
offices.head()

Unnamed: 0,totalOffices,lat,lng,principal
0,1,37.451958,-122.116026,"{'type': 'Point', 'coordinates': [-122.116026,..."
1,1,37.782263,-122.392142,"{'type': 'Point', 'coordinates': [-122.392142,..."
2,1,30.407545,-97.717667,"{'type': 'Point', 'coordinates': [-97.717667, ..."
3,3,37.663728,-121.873181,"{'type': 'Point', 'coordinates': [-121.8731805..."
4,1,33.429864,-111.944967,"{'type': 'Point', 'coordinates': [-111.944967,..."


In [11]:
#New data frame 

df=pd.concat([df, offices], axis=1).drop(columns=['offices'])
df=df.dropna()
df.head()

Unnamed: 0,name,category_code,totalOffices,lat,lng,principal
0,ooma,hardware,1,37.451958,-122.116026,"{'type': 'Point', 'coordinates': [-122.116026,..."
1,GoingOn,software,1,37.782263,-122.392142,"{'type': 'Point', 'coordinates': [-122.392142,..."
2,Bazaarvoice,software,1,30.407545,-97.717667,"{'type': 'Point', 'coordinates': [-97.717667, ..."
3,spigit,software,3,37.663728,-121.873181,"{'type': 'Point', 'coordinates': [-121.8731805..."
4,JumpBox,software,1,33.429864,-111.944967,"{'type': 'Point', 'coordinates': [-111.944967,..."


In [12]:
#Filtering data document into the original folder

db.offices.insert_many(df.to_dict('records'))

<pymongo.results.InsertManyResult at 0x2853b052248>

In [13]:
#2dsphere

db.offices.create_index([('principal', '2dsphere')])

'principal_2dsphere'

Trying with google hearth failed: access denied

In [4]:
import ee

In [6]:
print(ee.__version__)

0.1.218


In [7]:
!Users\Ivan\Profile\miniconda3\condabin\activate ee

El sistema no puede encontrar la ruta especificada.


Next try

In [1]:
!pip install overpy

Collecting overpy
  Downloading https://files.pythonhosted.org/packages/9d/20/31f2e2af0dbc05a358259b9cdf67056d34b002d8041944628d05adfab9ba/overpy-0.4.tar.gz (41kB)
Building wheels for collected packages: overpy
  Building wheel for overpy (setup.py): started
  Building wheel for overpy (setup.py): finished with status 'done'
  Created wheel for overpy: filename=overpy-0.4-cp37-none-any.whl size=45777 sha256=6a98eaa951fcf8c4f4e4fb1470ebf1af6aa7fa1cfd32a9f321d3b1f39cc40f5e
  Stored in directory: C:\Users\Ivan\AppData\Local\pip\Cache\wheels\10\53\c2\e6b6f97e7bb419193bd3aafbe38628666f0f93a1cec9dc521d
Successfully built overpy
Installing collected packages: overpy
Successfully installed overpy-0.4


In [7]:
import overpy

api = overpy.Overpass()
r = api.query("""
area["ISO3166-1"="DE"][admin_level=2];
(node["amenity"="marketplace"](area);
 way["amenity"="marketplace"](area);
 rel["amenity"="marketplace"](area);
);
out center;
""")

coords  = []
coords += [(float(node.lon), float(node.lat)) 
           for node in r.nodes]
coords += [(float(way.center_lon), float(way.center_lat)) 
           for way in r.ways]
coords += [(float(rel.center_lon), float(rel.center_lat)) 
           for rel in r.relations]

In [9]:
len(coords)

1140

In [11]:
# fetch all ways and nodes
'''result = api.query("""
    way(50.746,7.154,50.748,7.157) ["highway"];
    (._;>;);
    out body;
    """)

for way in result.ways:
    print("Name: %s" % way.tags.get("name", "n/a"))
    print("  Highway: %s" % way.tags.get("highway", "n/a"))
    print("  Nodes:")
    for node in way.nodes:
        print("    Lat: %f, Lon: %f" % (node.lat, node.lon))'''

'result = api.query("""\n    way(50.746,7.154,50.748,7.157) ["highway"];\n    (._;>;);\n    out body;\n    """)\n\nfor way in result.ways:\n    print("Name: %s" % way.tags.get("name", "n/a"))\n    print("  Highway: %s" % way.tags.get("highway", "n/a"))\n    print("  Nodes:")\n    for node in way.nodes:\n        print("    Lat: %f, Lon: %f" % (node.lat, node.lon))'