In [1]:
from datetime import datetime
import time

import numpy as np
import matplotlib.pyplot as plt
import h5py

import uuid
import pprint

import pymongo
from pymongo import MongoClient

# make graphics inline
%matplotlib inline

## Time

In [2]:
from pytz import timezone
eastern = timezone('US/Eastern')
print('Eastern time: ', eastern.localize(datetime.now()), 'Local time: ', datetime.now())

Eastern time:  2018-06-13 10:49:14.191141-04:00 Local time:  2018-06-13 10:49:14.191252


### Database, collection, index information

In [3]:
client = MongoClient('gpu-001', 27018)
db = client['filestore-new']
col = db['datum']

In [4]:
col.index_information()

{'_id_': {'key': [('_id', 1)], 'ns': 'filestore-new.datum', 'v': 1},
 'datum_id_1': {'key': [('datum_id', 1)],
  'ns': 'filestore-new.datum',
  'unique': True,
  'v': 1},
 'resource_1': {'key': [('resource', 1)], 'ns': 'filestore-new.datum', 'v': 1}}

### Bulk insert of 10000 documents with random scattering keys

In [5]:
resource_id = str(uuid.uuid4())

bulk = col.initialize_unordered_bulk_op()
for i in range(10000):    
    dm = dict(resource=resource_id, datum_id=str(uuid.uuid4()))
    bulk.insert(dm)

In [6]:
t1 = datetime.now()
bulk.execute()
t2 = datetime.now()
print( "time:", (t2 - t1), " seconds")

time: 0:00:58.255455  seconds


### Bulk insert of 10000 documents with random keys prepended with timestamp

In [7]:
t =  str(datetime.now().timestamp())

In [8]:
resource_id = str(uuid.uuid4())

bulk = col.initialize_unordered_bulk_op()
for i in range(10000):    
    dm = dict(resource=resource_id, datum_id=t+str(uuid.uuid4()))
    bulk.insert(dm)

In [9]:
t1 = datetime.now()
bulk.execute()
t2 = datetime.now()
print( "time:", (t2 - t1), " seconds")

time: 0:00:00.262786  seconds


### Collection statistics

In [10]:
pprint.pprint(db.command('collStats', 'datum'))

{'avgObjSize': 165,
 'capped': False,
 'count': 191208497,
 'indexDetails': {'_id_': {'LSM': {'bloom filter false positives': 0,
                                   'bloom filter hits': 0,
                                   'bloom filter misses': 0,
                                   'bloom filter pages evicted from cache': 0,
                                   'bloom filter pages read into cache': 0,
                                   'bloom filters in the LSM tree': 0,
                                   'chunks in the LSM tree': 0,
                                   'highest merge generation in the LSM tree': 0,
                                   'queries that could have benefited from a Bloom filter that did not exist': 0,
                                   'sleep for LSM checkpoint throttle': 0,
                                   'sleep for LSM merge throttle': 0,
                                   'total size of bloom filters': 0},
                           'block-manager': {'allo