In [1]:
import shleem

In [2]:
import dinglebop as dp

In [3]:
dinglebop_test_server = shleem.mongodb.server('dinglebop_test_server')

In [4]:
dinglebop_test = dinglebop_test_server['dinglebop_test']

In [5]:
example_datasource = dinglebop_test['example_datasource']

In [6]:
queens_restaurants = example_datasource.query({"borough": "Queens"}, identifier='queens_restaurants')
queens_restaurants

MongoDB query DataSource: dinglebop_test_server.dinglebop_test.example_datasource.queens_restaurants

In [7]:
dp.dataset.tabular.from_mongodb_tap?

[0;31mSignature:[0m [0mdp[0m[0;34m.[0m[0mdataset[0m[0;34m.[0m[0mtabular[0m[0;34m.[0m[0mfrom_mongodb_tap[0m[0;34m([0m[0mdatatap[0m[0;34m,[0m [0midentifier[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mfieldnames[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mflatten[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m [0mmissing_val[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Creates a tabular dataset from a MongoDB data tap.

Arguments
---------
datatap : shleem.MongoDBQuery or shleem.MongoDBAggregation
    A MongoDB-based shleem data tap.
identifier : str, optional
    A string identifier for this dataset, preferrably containing only
    lowercase letters, numbers and underscores.
fieldnames : list, optional
    A list of of field names to keep from the source documents. If not
    given, field names are inferred from the first yielded document.
flatten : bool, optional
    If True, sub-dicts and lists in documents are flattened. Def

In [8]:
fieldnames = ['address.street', 'address.building', 'borough', 'cuisine', 'name']

In [9]:
basic_flat_queens_restaurants = dp.dataset.tabular.from_mongodb_tap(queens_restaurants, identifier='basic_flat_queens_restaurants', fieldnames=fieldnames, flatten=True)
basic_flat_queens_restaurants

DataSet: dinglebop_test_server.dinglebop_test.example_datasource.queens_restaurants.basic_flat_queens_restaurants

In [10]:
gene = basic_flat_queens_restaurants.tap()

In [11]:
gene.__next__()

{'address.building': '8825',
 'address.street': 'Astoria Boulevard',
 'borough': 'Queens',
 'cuisine': 'American',
 'name': 'Brunos On The Boulevard'}

### Aggregation

In [24]:
agg_pipeline = [
    {'$group': {'_id': '$borough', 'count': {'$sum': 1}}},
    {'$project': {'borough': '$_id', 'count': 1, '_id': 0}}
]

In [25]:
borough_counts = example_datasource.aggregation(agg_pipeline, identifier='borough_counts')
borough_counts

MongoDB aggregation DataSource: dinglebop_test_server.dinglebop_test.example_datasource.borough_counts

In [26]:
borough_counts.tap().__next__()

{'borough': 'Missing', 'count': 51}

In [29]:
borough_counts_table = dp.dataset.tabular.from_mongodb_tap(borough_counts, identifier='borough_counts_table')
borough_counts_table

DataSet: dinglebop_test_server.dinglebop_test.example_datasource.borough_counts.borough_counts_table

In [30]:
borough_counts_table.as_dataframe()

Unnamed: 0,count,borough
0,51,Missing
1,6086,Brooklyn
2,5656,Queens
3,2338,Bronx
4,10259,Manhattan
5,969,Staten Island


In [31]:
borough_counts_table.as_ndarray()

array([[51, 'Missing'],
       [6086, 'Brooklyn'],
       [5656, 'Queens'],
       [2338, 'Bronx'],
       [10259, 'Manhattan'],
       [969, 'Staten Island']], dtype=object)

In [32]:
for item in borough_counts_table.as_dict_iter():
    print(item)

{'count': 51, 'borough': 'Missing'}
{'count': 6086, 'borough': 'Brooklyn'}
{'count': 5656, 'borough': 'Queens'}
{'count': 2338, 'borough': 'Bronx'}
{'count': 10259, 'borough': 'Manhattan'}
{'count': 969, 'borough': 'Staten Island'}
