Permalink
Browse files

Set up presentations repo...

  • Loading branch information...
0 parents commit 286709dca06e59d1acb075cc6c9057612c949567 @behackett behackett committed Apr 19, 2011
1 README
@@ -0,0 +1 @@
+Presentations from various conferences.
Binary file not shown.
@@ -0,0 +1,3 @@
+To load the datasets and geo index them first start mongod then...
+$ ./load_data.sh
+$ python geo_index_data.py
@@ -0,0 +1,141 @@
+# Examples of doing various Geo queries using pymongo.
+# These are just simple examples. No attempt was made
+# to optimize the performance of the queries.
+
+import math
+
+import bson
+import pymongo
+
+# Approximate radius of Earth according to Google calculator
+RADIUS_MILES = 3963
+# Very rough scale multiplier for distance on Earth.
+# For standard $near queries. Spherical queries just use radius.
+DISTANCE_MULTIPLIER = RADIUS_MILES * (math.pi / 180)
+
+# Connect to mongod
+connection = pymongo.Connection()
+
+# Some examples require a newer mongod versions...
+version_string = (connection.server_info()['version'])[:3]
+version = tuple([int(num) for num in version_string.split('.')])
+
+def bart_foreign_cinema(num_stops=1):
+ """Find the BART station(s) closest to the Foreign Cinema
+ in San Francisco.
+
+ :Parameters:
+ - `num_stops`: How many stations to list.
+ """
+ db = connection.bart
+ cursor = db.stops.find(
+ {'stop_geo': {'$near': [-122.419088, 37.75689]}}).limit(num_stops)
+ for doc in cursor:
+ print doc['stop_name']
+
+# Using geoNear we can get an approximte or spherical
+# distance and scale it to our needs
+def bart_foreign_cinema_geonear(spherical=False):
+ """How far away is the closest BART station to the
+ Foreign Cinema in San Francisco.
+
+ :Parameters:
+ - `spherical`: Should we do a spherical query?
+ """
+ db = connection.bart
+ if spherical:
+ mult = RADIUS_MILES
+ else:
+ mult = DISTANCE_MULTIPLIER
+ q = bson.son.SON({'geoNear': 'stops'})
+ q.update({'near': [-122.419088, 37.75689]})
+ q.update({'distanceMultiplier': mult})
+ if spherical:
+ q.update({'spherical': True})
+ results = db.command(q)
+ name = results['results'][0]['obj']['stop_name']
+ dist = results['results'][0]['dis']
+ print "Distance to %s: %r miles" % (name, dist)
+
+map_func = "function(){ emit(this.state, this.pop); }"
+reduce_func = "function(key, values){ return Array.sum(values); }"
+
+# Calculate the entire population of the zipcode dataset.
+# Inline map reduce just returns the result set instead
+# of storing it in the database.
+def calculate_population():
+ """Use inline map reduce to calculate the entire
+ population from the zips dataset.
+ """
+ if version < (1, 8):
+ print "inline map reduce requires mongod >= 1.8"
+ db = connection.geo
+ print sum([res['value']
+ for res
+ in db.zips.inline_map_reduce(map_func, reduce_func)])
+
+# How many people live within 100 miles of the
+# Empire State Building (based on our dataset)?
+
+range_in_miles = 100.0
+max_distance = range_in_miles / DISTANCE_MULTIPLIER
+nearq = bson.son.SON({'$near': [-73.985656, 40.748433]})
+nearq.update({'$maxDistance': max_distance})
+# Standard $near queries are limited to a result set of 100 documents.
+# We use $within to get around that limitation.
+withinq = {'$within': {'$center': [[-73.985656, 40.748433], max_distance]}}
+
+def empire_state_find():
+ """How many people live within 100 miles of the Empire State Building
+ (according to our dataset). This calculates the answer twice. Once
+ with $within, once with $near.
+ """
+ db = connection.geo
+ # We only really care about the 'pop' field in the result documents.
+ # The second parameter of find() tells mongod what fields to return
+ # to us. MongoDB always returns '_id' unless you tell it not to.
+ cursor = db.zips.find({'loc': withinq}, {'pop': True, '_id': False})
+ print '$within: %d' % (sum([doc['pop'] for doc in cursor]),)
+ cursor = db.zips.find({'loc': nearq},
+ {'pop': True, '_id': False}).limit(60000)
+ print '$near: %d' % (sum([doc['pop'] for doc in cursor]),)
+
+def empire_state_spherical():
+ """How many people live within 100 miles of the Empire State Building
+ (according to our dataset). This calculates the answer using $nearSphere
+ so the distance calulation should be accurate.
+ """
+ db = connection.geo
+ q = bson.son.SON({'$nearSphere': [-73.985656, 40.748433]})
+ q.update({'$maxDistance': 100.0 / 3963})
+ cursor = db.zips.find({'loc': q}).limit(60000)
+ print '$nearSphere: %d' % (sum([doc['pop'] for doc in cursor]),)
+
+# Using map/reduce or group with GEO queries requires MongoDB 1.9.
+# ----------------------------------------------------------------------------
+# Same result using map/reduce.
+def empire_state_map_reduce():
+ """Same $within query from above using map/reduce.
+ """
+ if version < (1, 9):
+ print "map/reduce with geo requires mongod >= 1.9"
+ else:
+ db = connection.geo
+ result = db.zips.inline_map_reduce(map_func,
+ reduce_func,
+ query={'loc': withinq})
+ print sum([doc['value'] for doc in result])
+
+# Same result using group.
+def empire_state_group():
+ """Same $within query again using group.
+ """
+ if version < (1, 9):
+ print "group with geo requires mongod >= 1.9"
+ else:
+ db = connection.geo
+ pop_reduce = "function(obj, prev){ prev.sum += obj.pop; }"
+ result = db.zips.group(['state'], {'loc': withinq}, {'sum': 0}, pop_reduce)
+ print sum([doc['sum'] for doc in result])
+# ----------------------------------------------------------------------------
+
@@ -0,0 +1,44 @@
+#!/usr/bin/python
+#
+# GeoIndexer for MongoDB Where2.0 Datasets
+#
+# Original script by:
+# Brendan W. McAdams <bmcadams@evilmonkeylabs.com>
+#
+# Quick and dirty script which creates Geo Indices in MongoDB.
+#
+# Assumes you already loaded it with the provided shell script.
+#
+# Needs PyMongo 1.6 or greater
+
+import pymongo
+from pymongo import Connection
+
+connection = Connection()
+db = connection['bart']
+print "Indexing the Stops Data."
+for row in db.stops.find():
+ row['stop_geo'] = [row['stop_lon'], row['stop_lat']]
+ db.stops.save(row)
+
+db.stops.ensure_index([('stop_geo', pymongo.GEO2D)])
+print "Reindexed stops with Geospatial data."
+
+print "Indexing the Shapes data"
+for row in db.shapes.find():
+ row['shape_pt_geo'] = {'lon': row['shape_pt_lon'], 'lat': row['shape_pt_lat']}
+ db.shapes.save(row)
+
+db.shapes.ensure_index([('shape_pt_geo', pymongo.GEO2D)])
+print "Reindexed shapes with Geospatial data."
+
+db = connection['geo']
+print "Indexing the Zips Data."
+for row in db.zips.find():
+ row['loc'] = [-(row['loc']['x']), row['loc']['y']]
+ db.zips.save(row)
+
+db.zips.ensure_index([('loc', pymongo.GEO2D)])
+print "Reindexed zips with Geospatial data."
+
+print "Done."
Binary file not shown.
@@ -0,0 +1,53 @@
+#!/bin/sh
+#
+# Quick & dirty script to load the BART Transit
+# Data to mongodb. Assumes mongod is running.
+#
+# Data file is from: http://www.bart.gov/schedules/developers/gtfs.aspx
+# and freely available/redistributable.
+#
+# Spec on the data format is available at:
+# http://code.google.com/transit/spec/transit_feed_specification.html
+#
+# Original script developed by Brendan W. McAdams
+
+MONGO_IMPORT=/opt/mongo/bin/mongoimport
+DOS2UNIX=
+IMPORT_CMD="$MONGO_IMPORT -d bart --type csv --headerline --drop --ignoreBlanks"
+BART_ZIP="google_transit.zip"
+echo "Unzipping BART File ($BART_ZIP)"
+
+unzip -o $BART_ZIP
+
+# Agency is malformed in current file release... Fix it
+
+# add the missing endline character.
+echo '\n' >> agency.txt
+
+echo "Loading Agency file..."
+perl -pi -e 's/\r\n|\n|\r/\n/g' agency.txt
+$IMPORT_CMD -c agency agency.txt
+echo "Loading Stops file..."
+perl -pi -e 's/\r\n|\n|\r/\n/g' stops.txt
+$IMPORT_CMD -c stops stops.txt
+echo "Loading Routes file..."
+perl -pi -e 's/\r\n|\n|\r/\n/g' routes.txt
+$IMPORT_CMD -c routes routes.txt
+echo "Loading Trips file..."
+perl -pi -e 's/\r\n|\n|\r/\n/g' trips.txt
+$IMPORT_CMD -c trips trips.txt
+echo "Loading Stop Times file..."
+perl -pi -e 's/\r\n|\n|\r/\n/g' stop_times.txt
+$IMPORT_CMD -c stop_times stop_times.txt
+echo "Loading Calendar file..."
+perl -pi -e 's/\r\n|\n|\r/\n/g' calendar.txt
+$IMPORT_CMD -c calendar calendar.txt
+echo "Loading Calendar Dates File..."
+perl -pi -e 's/\r\n|\n|\r/\n/g' calendar_dates.txt
+$IMPORT_CMD -c calendar_dates calendar_dates.txt
+echo "Loading Shapes file..." # For line drawing - might be usable by GeoMongo
+perl -pi -e 's/\r\n|\n|\r/\n/g' shapes.txt
+$IMPORT_CMD -c shapes shapes.txt
+
+echo "Loading zips.json to geo..."
+$MONGO_IMPORT zips.json -d geo -c zips --drop
Oops, something went wrong.

0 comments on commit 286709d

Please sign in to comment.