In [25]:
from datetime import datetime
import time

import numpy as np
import matplotlib.pyplot as plt
import h5py

import uuid
import pprint

import pymongo
from pymongo import MongoClient

import warnings
warnings.filterwarnings('ignore')

# make graphics inline
%matplotlib inline

## Select Database and Collection

In [26]:
client = MongoClient('xf03id1-mdb01', 27017)
fs  = client['filestore-new']
datum = fs['datum']

In [36]:
datum.count()

248156649

Create uuid-based resource ID

In [37]:
resource_id = str(uuid.uuid4())
print("resource id: ", resource_id)

resource id:  fb8eb5b9-3f7c-410e-9ed7-f10d0edc135b


## Bulk insert of 10000 documents with random keys

Prepape a bulk insert request

In [38]:
bulk = datum.initialize_unordered_bulk_op()
for i in range(10000):    
    dm = dict(resource=resource_id, datum_id=str(uuid.uuid4()))
    bulk.insert(dm)

Execute a bulk insert request

In [39]:
t1 = datetime.now()
results = bulk.execute()
t2 = datetime.now()
print( "time:", (t2 - t1), " seconds")

time: 0:00:24.045399  seconds


Print results

In [40]:
print(results)

{'writeErrors': [], 'writeConcernErrors': [], 'nInserted': 10000, 'nUpserted': 0, 'nMatched': 0, 'nModified': 0, 'nRemoved': 0, 'upserted': []}


## Bulk insert of 10000 documents with random keys prepended with timestamp

Create a bulk timestamp

In [41]:
ts =  str(datetime.now().timestamp())
print("timestamp: ", ts)

timestamp:  1532720130.896131


Prepape a bulk insert request

In [42]:
bulk = datum.initialize_unordered_bulk_op()
for i in range(10000):    
    dm = dict(resource=resource_id, datum_id=ts+str(uuid.uuid4()))
    bulk.insert(dm)

Execute a bulk insert request

In [43]:
t1 = datetime.now()
results = bulk.execute()
t2 = datetime.now()
print( "time:", (t2 - t1), " seconds")

time: 0:00:00.439803  seconds


In [44]:
print(results)

{'writeErrors': [], 'writeConcernErrors': [], 'nInserted': 10000, 'nUpserted': 0, 'nMatched': 0, 'nModified': 0, 'nRemoved': 0, 'upserted': []}


## Collection statistics

In [45]:
pprint.pprint(fs.command('collStats', 'datum'))

{'$clusterTime': {'clusterTime': Timestamp(1532720567, 1),
                  'signature': {'hash': b'\x00\x00\x00\x00\x00\x00\x00\x00'
                                        b'\x00\x00\x00\x00\x00\x00\x00\x00'
                                        b'\x00\x00\x00\x00',
                                'keyId': 0}},
 'avgObjSize': 165.0,
 'capped': False,
 'count': 248176649,
 'indexSizes': {'_id_': 2717958144.0,
                'datum_id_1': 13433151488.0,
                'resource_1': 1067773952},
 'nchunks': 940,
 'nindexes': 3,
 'ns': 'filestore-new.datum',
 'ok': 1.0,
 'operationTime': Timestamp(1532720567, 1),
 'sharded': True,
 'shards': {'rshxn': {'$clusterTime': {'clusterTime': Timestamp(1532720567, 1),
                                       'signature': {'hash': b'\x00\x00\x00\x00'
                                                             b'\x00\x00\x00\x00'
                                                             b'\x00\x00\x00\x00'
                                   