# H5mongo

This databases uses `pymongo` as the backend database. Only meta data (or part of it) is stored in the database, not the raw data

In [None]:
import pymongo
from pymongo import MongoClient

from h5rdmtoolbox import tutorial
import h5rdmtoolbox as h5tbx

In [None]:
client = MongoClient()
client

In [None]:
db = client['h5database_notebook_tutorial']
collection = db['test']
collection.drop() # delete all entries if already exist

Let's generate some test data

In [None]:
usernames = ('Allen', 'Mike', 'Ellen', 'Alliot')
company = ('bikeCompany', 'shoeCompany', 'bikeCompany', 'shoeCompany')
filenames = []
for i, (username, company) in enumerate(zip(usernames, company)):
    with h5tbx.H5File(h5tbx.generate_temporary_filename(), 'w') as h5:
        filenames.append(h5.hdf_filename)
        h5.attrs['username'] = username
        h5.attrs['company'] = company
        h5.attrs['meta'] = {'day': 'monday', 'iday': 0}
        g = h5.create_group('idgroup')
        g.attrs['id'] = i

Import the mongo module (will add the accessor `mongo` to datasets and groups)

In [None]:
from h5rdmtoolbox.h5database import mongo

In [None]:
for fname in filenames:
    with h5tbx.H5File(fname) as h5:
        h5.mongo.insert(collection=collection, recursive=True)

Let's inspect the found database entries:

In [None]:
from pprint import pprint

Let's do the equivalent filter request as before (`sub_repo = repo.filter(Entry['/operation_point'].attrs['long_name'] == 'Operation point data group')`)

In [None]:
%%time
res = collection.find({})
for r in res.rewind():
    pprint(r)

In [None]:
%%time
res = collection.find({'id': {"$eq": 1}})
for r in res.rewind():
    pprint(r)

In [None]:
%%time
res = collection.find({'company': "shoeCompany"})
for r in res.rewind():
    pprint(r)

Number of found documents:

In [None]:
collection.count_documents({'company': "shoeCompany"})

Total number of documents:

In [None]:
collection.count_documents({})

You can also search for an entry within the meta-dictionary

In [None]:
%%time
res = collection.find({'meta.day': "monday"})
for r in res.rewind():
    pprint(r)
collection.count_documents({'meta.day': "monday"})