In [1]:
import shleem

In [2]:
shleem_test_server = shleem.mongodb.server("shleem_test_server")

In [3]:
shleem_test_server

MongoDB server DataSource: shleem_test_server

In [4]:
type(shleem_test_server)

shleem.mongodb.mongodb.MongoDBServer

In [5]:
shleem_test_server.identifier

'shleem_test_server'

In [6]:
shleem_test_server.server_name

'shleem_test_server'

In [7]:
client = shleem_test_server._get_connection()

In [8]:
client

MongoClient(host=['ds149954.mlab.com:49954'], document_class=dict, tz_aware=False, connect=True, authsource='shleem_test')

In [9]:
client['shleem_test']['example_data_collection'].find_one()

{'_id': ObjectId('59e8a02a297efbb3162667f1'),
 'address': {'building': '469',
  'coord': [-73.961704, 40.662942],
  'street': 'Flatbush Avenue',
  'zipcode': '11225'},
 'borough': 'Brooklyn',
 'cuisine': 'Hamburgers',
 'grades': [{'date': datetime.datetime(2014, 12, 30, 0, 0),
   'grade': 'A',
   'score': 8},
  {'date': datetime.datetime(2014, 7, 1, 0, 0), 'grade': 'B', 'score': 23},
  {'date': datetime.datetime(2013, 4, 30, 0, 0), 'grade': 'A', 'score': 12},
  {'date': datetime.datetime(2012, 5, 8, 0, 0), 'grade': 'A', 'score': 12}],
 'name': "Wendy'S",
 'restaurant_id': '30112340'}

In [10]:
test_db = shleem_test_server['shleem_test']

In [11]:
test_db

MongoDB database DataSource: shleem_test_server.shleem_test

In [12]:
test_db.identifier

'shleem_test_server.shleem_test'

In [13]:
test_db.db_name

'shleem_test'

In [14]:
examp = test_db['example_data_collection']

In [15]:
examp

MongoDB collection DataSource: shleem_test_server.shleem_test.example_data_collection

In [16]:
queens_restaurants = examp.query({"borough": "Queens"}, identifier="queens_restaurants")

In [17]:
type(queens_restaurants)

shleem.mongodb.mongodb.MongoDBQuery

In [18]:
queens_restaurants

MongoDB query DataSource: shleem_test_server.shleem_test.example_data_collection.queens_restaurants

In [19]:
queens_restaurants.query

{'borough': 'Queens'}

In [20]:
queens_restaurants.source_type

'MongoDB'

In [21]:
cursor = queens_restaurants.tap()

In [22]:
cursor

<pymongo.cursor.Cursor at 0x10be04dd8>

In [23]:
cursor.next()

{'_id': ObjectId('59e8a02a297efbb3162667f3'),
 'address': {'building': '97-22',
  'coord': [-73.8601152, 40.7311739],
  'street': '63 Road',
  'zipcode': '11374'},
 'borough': 'Queens',
 'cuisine': 'Jewish/Kosher',
 'grades': [{'date': datetime.datetime(2014, 11, 24, 0, 0),
   'grade': 'Z',
   'score': 20},
  {'date': datetime.datetime(2013, 1, 17, 0, 0), 'grade': 'A', 'score': 13},
  {'date': datetime.datetime(2012, 8, 2, 0, 0), 'grade': 'A', 'score': 13},
  {'date': datetime.datetime(2011, 12, 15, 0, 0), 'grade': 'B', 'score': 25}],
 'name': 'Tov Kosher Kitchen',
 'restaurant_id': '40356068'}

### Parameterized Queries

In [24]:
def getter(field_name):
    return lambda **kwargs: kwargs[field_name]

In [25]:
zipcode_range = examp.query(
    {"address.zipcode": {
        "$gte": getter("min_val"), "$lte": getter("max_val")
    }},
    identifier="zipcode_range"
)

In [26]:
zipcode_range

MongoDB query DataSource: shleem_test_server.shleem_test.example_data_collection.zipcode_range

In [27]:
cursor = zipcode_range.tap(min_val="11249", max_val="11300")

In [28]:
res = [doc for doc in cursor]

In [29]:
len(res)

163

In [30]:
res[0]

{'_id': ObjectId('59e8a02a297efbb316266920'),
 'address': {'building': '188',
  'coord': [-73.9581492, 40.7177363],
  'street': 'Bedford Avenue',
  'zipcode': '11249'},
 'borough': 'Brooklyn',
 'cuisine': 'American',
 'grades': [{'date': datetime.datetime(2014, 3, 19, 0, 0),
   'grade': 'A',
   'score': 12},
  {'date': datetime.datetime(2013, 3, 21, 0, 0), 'grade': 'A', 'score': 12},
  {'date': datetime.datetime(2012, 3, 15, 0, 0), 'grade': 'A', 'score': 10},
  {'date': datetime.datetime(2011, 10, 13, 0, 0), 'grade': 'B', 'score': 14}],
 'name': 'Greenpoint Tavern',
 'restaurant_id': '40370342'}

### Aggregation

In [31]:
agg_pipeline = [
    {'$group': {'_id': '$borough', 'count': {'$sum': 1}}},
]

In [32]:
borough_counts = examp.aggregation(agg_pipeline)

In [33]:
repr(borough_counts)

'MongoDB aggregation DataSource: shleem_test_server.shleem_test.example_data_collection.7098918b9417525bcb7126c8978ad3c10ac4a352a2238d3e614cfcd50b6b82bc'

In [34]:
borough_counts.identifier

'shleem_test_server.shleem_test.example_data_collection.7098918b9417525bcb7126c8978ad3c10ac4a352a2238d3e614cfcd50b6b82bc'

In [35]:
cursor = borough_counts.tap()

[{'$group': {'_id': '$borough', 'count': {'$sum': 1}}}]


In [36]:
[doc for doc in cursor]

[{'_id': 'Missing', 'count': 51},
 {'_id': 'Manhattan', 'count': 10259},
 {'_id': 'Brooklyn', 'count': 6086},
 {'_id': 'Staten Island', 'count': 969},
 {'_id': 'Queens', 'count': 5656},
 {'_id': 'Bronx', 'count': 2338}]

### Parameterized Aggregations

In [37]:
param_agg = [
    {"$match": {"address.zipcode": {"$gte": "11695"}}},
    {"$unwind": "$grades"},
    {"$group": {"_id": {"name": "$name", "address": "$address"}, "sum_score": {"$sum": "$grades.score"}, "num_score": {"$sum": 1}}},
    {"$project": {"_id": 1, "avg_score": {"$divide": ["$sum_score", "$num_score"]}}},
    {"$match": {"avg_score": {"$gt": lambda **kwargs: kwargs["avg_score_threshold"]}}}
]

In [38]:
by_avg_score_threshold = examp.aggregation(param_agg)

In [39]:
by_avg_score_threshold = examp.aggregation(param_agg, "restaurant_by_avg_score_threshold")
by_avg_score_threshold

MongoDB aggregation DataSource: shleem_test_server.shleem_test.example_data_collection.restaurant_by_avg_score_threshold

In [40]:
cursor = by_avg_score_threshold.tap(avg_score_threshold=13.5)

[{'$match': {'address.zipcode': {'$gte': '11695'}}}, {'$unwind': '$grades'}, {'$group': {'_id': {'name': '$name', 'address': '$address'}, 'sum_score': {'$sum': '$grades.score'}, 'num_score': {'$sum': 1}}}, {'$project': {'_id': 1, 'avg_score': {'$divide': ['$sum_score', '$num_score']}}}, {'$match': {'avg_score': {'$gt': 13.5}}}]


In [41]:
cursor.next()

{'_id': {'address': {'building': '202-20',
   'coord': [-73.88648210000001, 40.5666985],
   'street': 'Rockaway Point Boulevard',
   'zipcode': '11697'},
  'name': 'Country Heart Cooking'},
 'avg_score': 14.4}

### parameterized aggregation with a callable in a list...

In [42]:
param_agg2 = [
    {"$match": {"address.zipcode": {"$gte": "11695"}}},
    {"$unwind": "$grades"},
    {"$group": {"_id": {"name": "$name", "address": "$address"}, "sum_score": {"$sum": "$grades.score"}}},
    {"$project": {"_id": 1, "avg_score": {"$divide": ["$sum_score", getter("normalizer")]}}},
    {"$match": {"avg_score": {"$gt": lambda **kwargs: kwargs["avg_score_threshold"]}}}
]

In [43]:
by_norm_score_threshold = examp.aggregation(param_agg2)

In [49]:
thresh2 = 13.5

In [50]:
cursor = by_norm_score_threshold.tap(avg_score_threshold=thresh2, normalizer=4)

[{'$match': {'address.zipcode': {'$gte': '11695'}}}, {'$unwind': '$grades'}, {'$group': {'_id': {'name': '$name', 'address': '$address'}, 'sum_score': {'$sum': '$grades.score'}}}, {'$project': {'_id': 1, 'avg_score': {'$divide': ['$sum_score', 4]}}}, {'$match': {'avg_score': {'$gt': 13.5}}}]


In [51]:
all_res = [doc for doc in cursor]

In [52]:
len(all_res)

2

In [53]:
all_res

[{'_id': {'address': {'building': '202-20',
    'coord': [-73.88648210000001, 40.5666985],
    'street': 'Rockaway Point Boulevard',
    'zipcode': '11697'},
   'name': 'Country Heart Cooking'},
  'avg_score': 18.0},
 {'_id': {'address': {'building': '202-24',
    'coord': [-73.9250442, 40.5595462],
    'street': 'Rockaway Point Boulevard',
    'zipcode': '11697'},
   'name': 'Blarney Castle'},
  'avg_score': 16.25}]