# Test Scan 48995
## Data comparison between two MongoDB databases. May 29, 2017

In [56]:
from datetime import datetime

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import math
import matplotlib.pyplot as plt
import h5py

import pprint

import pymongo
from pymongo import MongoClient

# make graphics inline
%matplotlib inline

In [2]:
scanid = 48995

In [91]:
class MongoDataTest(object):
    
    def __init__(self, hostname, port, ds, fs):
        self.hostname = hostname
        self.port = port
        self.ds = ds
        self.fs = fs
        
    def get_data(self, scanid):
        
        client = MongoClient(self.hostname, self.port)
        ds = client[self.ds]
        fs = client[self.fs]
        
        data = {}
        run_starts = ds['run_start']
        run_start_docs = [doc for doc in run_starts.find({'scan_id' : scanid}, {"_id":0})]
        run_start_doc = run_start_docs[0]
        data['run_start'] = run_start_docs
        
        run_stops = ds['run_stop']
        run_stop_docs = [doc for doc in run_stops.find({'run_start' : run_start_doc['uid']}, {"_id":0})]
        data['run_stop'] = run_stop_docs
        
        event_descriptors = ds['event_descriptor']
        event_descriptor_docs = [doc for doc in event_descriptors.find({'run_start' : run_start_doc['uid']}, {"_id":0})]
        data['event_descriptor'] = event_descriptor_docs
        
        events = ds['event']
        event_docs = []
        primary_event_docs = []
        for edd in event_descriptor_docs:
            docs = [doc for doc in events.find({'descriptor' : edd['uid']}, {"_id":0})]
            if edd['name'] == 'primary':
                primary_event_docs = docs
            if edd['name'] == 'baseline':
                for doc in docs:
                    for k in list(doc['data'].keys()):
                        v = doc['data'][k]
                        if type(v) != str:
                            if np.isnan(v) :
                                doc['data'][k] = 'nan'
            event_docs += docs
        data['event'] = event_docs
        
        datum = fs['datum']
        datum_docs = []
        res_ids = []
        for k in ['merlin1', 'xspress3_ch1', 'xspress3_ch2', 'xspress3_ch3']:
            ids = [event_doc['data'][k] for event_doc in primary_event_docs]
            d_docs = [doc for doc in datum.find({'datum_id' : {'$in' : ids}}, {"_id":0})] 
            res_ids += [doc['resource'] for doc in d_docs]
            datum_docs += d_docs
        data['datum'] = datum_docs
        
        resources = fs['resource']
        set_res_ids = set(res_ids)
        resource_docs = [doc for doc in resources.find({'uid' : {'$in' : list(set_res_ids)}}, {"_id":0})]
        data['resource'] = resource_docs
        
        client.close()
        
        return data

In [92]:
mdb01Test = MongoDataTest('xf03id1-mdb01', 27017, 'datastore-new', 'filestore-new')
t1 = datetime.now();
mdb01Data = mdb01Test.get_data(scanid)
t2 = datetime.now();
print("time:", (t2 - t1), " seconds")

time: 0:00:03.221713  seconds


In [93]:
ca1Test = MongoDataTest('xf03id-ca1', 27017, 'datastore-1', 'filestore-1')
t1 = datetime.now();
ca1Data = ca1Test.get_data(scanid)
t2 = datetime.now();
print("time:", (t2 - t1), " seconds")

time: 0:00:01.692206  seconds


In [94]:
for colname in ['run_start', 'run_stop', 'event_descriptor', 'event', 'datum', 'resource']:
    print(colname, len(mdb01Data[colname]), len(ca1Data[colname]), mdb01Data[colname] != ca1Data[colname])

run_start 1 1 False
run_stop 1 1 False
event_descriptor 2 2 False
event 10002 10002 False
datum 40000 40000 False
resource 2 2 False
