forked from behackett/presentations
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 286709d
Showing
8 changed files
with
29,712 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Presentations from various conferences. |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
To load the datasets and geo index them first start mongod then... | ||
$ ./load_data.sh | ||
$ python geo_index_data.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
# Examples of doing various Geo queries using pymongo. | ||
# These are just simple examples. No attempt was made | ||
# to optimize the performance of the queries. | ||
|
||
import math | ||
|
||
import bson | ||
import pymongo | ||
|
||
# Approximate radius of Earth according to Google calculator | ||
RADIUS_MILES = 3963 | ||
# Very rough scale multiplier for distance on Earth. | ||
# For standard $near queries. Spherical queries just use radius. | ||
DISTANCE_MULTIPLIER = RADIUS_MILES * (math.pi / 180) | ||
|
||
# Connect to mongod | ||
connection = pymongo.Connection() | ||
|
||
# Some examples require a newer mongod versions... | ||
version_string = (connection.server_info()['version'])[:3] | ||
version = tuple([int(num) for num in version_string.split('.')]) | ||
|
||
def bart_foreign_cinema(num_stops=1): | ||
"""Find the BART station(s) closest to the Foreign Cinema | ||
in San Francisco. | ||
:Parameters: | ||
- `num_stops`: How many stations to list. | ||
""" | ||
db = connection.bart | ||
cursor = db.stops.find( | ||
{'stop_geo': {'$near': [-122.419088, 37.75689]}}).limit(num_stops) | ||
for doc in cursor: | ||
print doc['stop_name'] | ||
|
||
# Using geoNear we can get an approximte or spherical | ||
# distance and scale it to our needs | ||
def bart_foreign_cinema_geonear(spherical=False): | ||
"""How far away is the closest BART station to the | ||
Foreign Cinema in San Francisco. | ||
:Parameters: | ||
- `spherical`: Should we do a spherical query? | ||
""" | ||
db = connection.bart | ||
if spherical: | ||
mult = RADIUS_MILES | ||
else: | ||
mult = DISTANCE_MULTIPLIER | ||
q = bson.son.SON({'geoNear': 'stops'}) | ||
q.update({'near': [-122.419088, 37.75689]}) | ||
q.update({'distanceMultiplier': mult}) | ||
if spherical: | ||
q.update({'spherical': True}) | ||
results = db.command(q) | ||
name = results['results'][0]['obj']['stop_name'] | ||
dist = results['results'][0]['dis'] | ||
print "Distance to %s: %r miles" % (name, dist) | ||
|
||
map_func = "function(){ emit(this.state, this.pop); }" | ||
reduce_func = "function(key, values){ return Array.sum(values); }" | ||
|
||
# Calculate the entire population of the zipcode dataset. | ||
# Inline map reduce just returns the result set instead | ||
# of storing it in the database. | ||
def calculate_population(): | ||
"""Use inline map reduce to calculate the entire | ||
population from the zips dataset. | ||
""" | ||
if version < (1, 8): | ||
print "inline map reduce requires mongod >= 1.8" | ||
db = connection.geo | ||
print sum([res['value'] | ||
for res | ||
in db.zips.inline_map_reduce(map_func, reduce_func)]) | ||
|
||
# How many people live within 100 miles of the | ||
# Empire State Building (based on our dataset)? | ||
|
||
range_in_miles = 100.0 | ||
max_distance = range_in_miles / DISTANCE_MULTIPLIER | ||
nearq = bson.son.SON({'$near': [-73.985656, 40.748433]}) | ||
nearq.update({'$maxDistance': max_distance}) | ||
# Standard $near queries are limited to a result set of 100 documents. | ||
# We use $within to get around that limitation. | ||
withinq = {'$within': {'$center': [[-73.985656, 40.748433], max_distance]}} | ||
|
||
def empire_state_find(): | ||
"""How many people live within 100 miles of the Empire State Building | ||
(according to our dataset). This calculates the answer twice. Once | ||
with $within, once with $near. | ||
""" | ||
db = connection.geo | ||
# We only really care about the 'pop' field in the result documents. | ||
# The second parameter of find() tells mongod what fields to return | ||
# to us. MongoDB always returns '_id' unless you tell it not to. | ||
cursor = db.zips.find({'loc': withinq}, {'pop': True, '_id': False}) | ||
print '$within: %d' % (sum([doc['pop'] for doc in cursor]),) | ||
cursor = db.zips.find({'loc': nearq}, | ||
{'pop': True, '_id': False}).limit(60000) | ||
print '$near: %d' % (sum([doc['pop'] for doc in cursor]),) | ||
|
||
def empire_state_spherical(): | ||
"""How many people live within 100 miles of the Empire State Building | ||
(according to our dataset). This calculates the answer using $nearSphere | ||
so the distance calulation should be accurate. | ||
""" | ||
db = connection.geo | ||
q = bson.son.SON({'$nearSphere': [-73.985656, 40.748433]}) | ||
q.update({'$maxDistance': 100.0 / 3963}) | ||
cursor = db.zips.find({'loc': q}).limit(60000) | ||
print '$nearSphere: %d' % (sum([doc['pop'] for doc in cursor]),) | ||
|
||
# Using map/reduce or group with GEO queries requires MongoDB 1.9. | ||
# ---------------------------------------------------------------------------- | ||
# Same result using map/reduce. | ||
def empire_state_map_reduce(): | ||
"""Same $within query from above using map/reduce. | ||
""" | ||
if version < (1, 9): | ||
print "map/reduce with geo requires mongod >= 1.9" | ||
else: | ||
db = connection.geo | ||
result = db.zips.inline_map_reduce(map_func, | ||
reduce_func, | ||
query={'loc': withinq}) | ||
print sum([doc['value'] for doc in result]) | ||
|
||
# Same result using group. | ||
def empire_state_group(): | ||
"""Same $within query again using group. | ||
""" | ||
if version < (1, 9): | ||
print "group with geo requires mongod >= 1.9" | ||
else: | ||
db = connection.geo | ||
pop_reduce = "function(obj, prev){ prev.sum += obj.pop; }" | ||
result = db.zips.group(['state'], {'loc': withinq}, {'sum': 0}, pop_reduce) | ||
print sum([doc['sum'] for doc in result]) | ||
# ---------------------------------------------------------------------------- | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#!/usr/bin/python | ||
# | ||
# GeoIndexer for MongoDB Where2.0 Datasets | ||
# | ||
# Original script by: | ||
# Brendan W. McAdams <bmcadams@evilmonkeylabs.com> | ||
# | ||
# Quick and dirty script which creates Geo Indices in MongoDB. | ||
# | ||
# Assumes you already loaded it with the provided shell script. | ||
# | ||
# Needs PyMongo 1.6 or greater | ||
|
||
import pymongo | ||
from pymongo import Connection | ||
|
||
connection = Connection() | ||
db = connection['bart'] | ||
print "Indexing the Stops Data." | ||
for row in db.stops.find(): | ||
row['stop_geo'] = [row['stop_lon'], row['stop_lat']] | ||
db.stops.save(row) | ||
|
||
db.stops.ensure_index([('stop_geo', pymongo.GEO2D)]) | ||
print "Reindexed stops with Geospatial data." | ||
|
||
print "Indexing the Shapes data" | ||
for row in db.shapes.find(): | ||
row['shape_pt_geo'] = {'lon': row['shape_pt_lon'], 'lat': row['shape_pt_lat']} | ||
db.shapes.save(row) | ||
|
||
db.shapes.ensure_index([('shape_pt_geo', pymongo.GEO2D)]) | ||
print "Reindexed shapes with Geospatial data." | ||
|
||
db = connection['geo'] | ||
print "Indexing the Zips Data." | ||
for row in db.zips.find(): | ||
row['loc'] = [-(row['loc']['x']), row['loc']['y']] | ||
db.zips.save(row) | ||
|
||
db.zips.ensure_index([('loc', pymongo.GEO2D)]) | ||
print "Reindexed zips with Geospatial data." | ||
|
||
print "Done." |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#!/bin/sh | ||
# | ||
# Quick & dirty script to load the BART Transit | ||
# Data to mongodb. Assumes mongod is running. | ||
# | ||
# Data file is from: http://www.bart.gov/schedules/developers/gtfs.aspx | ||
# and freely available/redistributable. | ||
# | ||
# Spec on the data format is available at: | ||
# http://code.google.com/transit/spec/transit_feed_specification.html | ||
# | ||
# Original script developed by Brendan W. McAdams | ||
|
||
MONGO_IMPORT=/opt/mongo/bin/mongoimport | ||
DOS2UNIX= | ||
IMPORT_CMD="$MONGO_IMPORT -d bart --type csv --headerline --drop --ignoreBlanks" | ||
BART_ZIP="google_transit.zip" | ||
echo "Unzipping BART File ($BART_ZIP)" | ||
|
||
unzip -o $BART_ZIP | ||
|
||
# Agency is malformed in current file release... Fix it | ||
|
||
# add the missing endline character. | ||
echo '\n' >> agency.txt | ||
|
||
echo "Loading Agency file..." | ||
perl -pi -e 's/\r\n|\n|\r/\n/g' agency.txt | ||
$IMPORT_CMD -c agency agency.txt | ||
echo "Loading Stops file..." | ||
perl -pi -e 's/\r\n|\n|\r/\n/g' stops.txt | ||
$IMPORT_CMD -c stops stops.txt | ||
echo "Loading Routes file..." | ||
perl -pi -e 's/\r\n|\n|\r/\n/g' routes.txt | ||
$IMPORT_CMD -c routes routes.txt | ||
echo "Loading Trips file..." | ||
perl -pi -e 's/\r\n|\n|\r/\n/g' trips.txt | ||
$IMPORT_CMD -c trips trips.txt | ||
echo "Loading Stop Times file..." | ||
perl -pi -e 's/\r\n|\n|\r/\n/g' stop_times.txt | ||
$IMPORT_CMD -c stop_times stop_times.txt | ||
echo "Loading Calendar file..." | ||
perl -pi -e 's/\r\n|\n|\r/\n/g' calendar.txt | ||
$IMPORT_CMD -c calendar calendar.txt | ||
echo "Loading Calendar Dates File..." | ||
perl -pi -e 's/\r\n|\n|\r/\n/g' calendar_dates.txt | ||
$IMPORT_CMD -c calendar_dates calendar_dates.txt | ||
echo "Loading Shapes file..." # For line drawing - might be usable by GeoMongo | ||
perl -pi -e 's/\r\n|\n|\r/\n/g' shapes.txt | ||
$IMPORT_CMD -c shapes shapes.txt | ||
|
||
echo "Loading zips.json to geo..." | ||
$MONGO_IMPORT zips.json -d geo -c zips --drop |
Oops, something went wrong.