Merge pull request #284 from pyGSTio/feature-mongodb-upgrade

Feature mongodb upgrade
sandialabs · Jan 10, 2023 · a4106a7 · a4106a7
2 parents 19bc512 + 32dfc59
commit a4106a7
Show file tree

Hide file tree

Showing 33 changed files with 1,865 additions and 1,609 deletions.
diff --git a/pygsti/algorithms/core.py b/pygsti/algorithms/core.py
@@ -13,6 +13,7 @@
 
 import collections as _collections
 import time as _time
+import copy as _copy
 
 import numpy as _np
 import scipy.optimize as _spo
@@ -797,8 +798,12 @@ def _max_array_types(artypes_list):  # get the maximum number of each array type
 
             for j, obj_fn_builder in enumerate(iteration_objfn_builders):
                 tNxt = _time.time()
-                optimizer.fditer = optimizer.first_fditer if (i == 0 and j == 0) else 0
-                opt_result, mdc_store = run_gst_fit(mdc_store, optimizer, obj_fn_builder, printer - 1)
+                if i == 0 and j == 0:  # special case: in first optimization run, use "first_fditer"
+                    first_iter_optimizer = _copy.deepcopy(optimizer)  # use a separate copy of optimizer, as it
+                    first_iter_optimizer.fditer = optimizer.first_fditer  # is a persistent object (so don't modify!)
+                    opt_result, mdc_store = run_gst_fit(mdc_store, first_iter_optimizer, obj_fn_builder, printer - 1)
+                else:
+                    opt_result, mdc_store = run_gst_fit(mdc_store, optimizer, obj_fn_builder, printer - 1)
                 profiler.add_time('run_iterative_gst: iter %d %s-opt' % (i + 1, obj_fn_builder.name), tNxt)
 
             tNxt = _time.time()

diff --git a/pygsti/algorithms/rbfit.py b/pygsti/algorithms/rbfit.py
@@ -437,6 +437,7 @@ def __init__(self, fittype, seed, rtype, success, estimates, variable, stds=None
         bootstraps_failrate : float, optional
             The proporition of the estimates of the parameters from bootstrapped dataset failed.
         """
+        super().__init__()
         self.fittype = fittype
         self.seed = seed
         self.rtype = rtype

diff --git a/pygsti/baseobjs/basis.py b/pygsti/baseobjs/basis.py
@@ -276,6 +276,7 @@ def cast(cls, name_or_basis_or_matrices, dim=None, sparse=None, classical_name='
             raise ValueError("Can't cast %s to be a basis!" % str(type(name_or_basis_or_matrices)))
 
     def __init__(self, name, longname, real, sparse):
+        super().__init__()
         self.name = name
         self.longname = longname
         self.real = real  # whether coefficients must be real (*not* whether elements are real - they're always complex)

diff --git a/pygsti/baseobjs/mongoserializable.py b/pygsti/baseobjs/mongoserializable.py
diff --git a/pygsti/baseobjs/nicelyserializable.py b/pygsti/baseobjs/nicelyserializable.py
@@ -15,9 +15,10 @@
 import json as _json
 import numpy as _np
 import scipy.sparse as _sps
+from pygsti.baseobjs.mongoserializable import MongoSerializable
 
 
-class NicelySerializable(object):
+class NicelySerializable(MongoSerializable):
     """
     The base class for all "nicely serializable" objects in pyGSTi.
 
@@ -344,3 +345,11 @@ def _decodevalue(cls, val):
             return complex(val)
         else:
             return val
+
+    @classmethod
+    def _create_obj_from_doc_and_mongodb(cls, doc, mongodb):
+        #Ignore mongodb, just init from doc:
+        return cls.from_nice_serialization(doc)
+
+    def _add_auxiliary_write_ops_and_update_doc(self, doc, write_ops, mongodb, collection_name, overwrite_existing):
+        doc.update(self.to_nice_serialization())
diff --git a/pygsti/baseobjs/qubitgraph.py b/pygsti/baseobjs/qubitgraph.py
@@ -183,6 +183,7 @@ def __init__(self, qubit_labels, initial_connectivity=None, initial_edges=None,
             `initial_edges`, and this argument is required whenever such
             indices are used.
         """
+        super().__init__()
         self.nqubits = len(qubit_labels)
         self.directed = directed
 

diff --git a/pygsti/baseobjs/statespace.py b/pygsti/baseobjs/statespace.py
@@ -49,6 +49,9 @@ def cast(cls, obj):
             return QubitSpace(obj)
         return ExplicitStateSpace(obj)
 
+    def __init__(self):
+        super().__init__()
+
     @property
     def udim(self):
         """
@@ -579,6 +582,7 @@ class QuditSpace(StateSpace):
     """
 
     def __init__(self, nqudits_or_labels, udim_or_udims):
+        super().__init__()
         if isinstance(nqudits_or_labels, int):
             self.qudit_labels = tuple(range(nqudits_or_labels))
         else:
@@ -990,6 +994,7 @@ def __init__(self, label_list, udims=None, types=None):
         #Step1: convert label_list (and dims, if given) to a list of
         # elements describing each "tensor product block" - each of
         # which is a tuple of string labels.
+        super().__init__()
 
         def is_label(x):
             """ Return whether x is a valid space-label """

diff --git a/pygsti/baseobjs/unitarygatefunction.py b/pygsti/baseobjs/unitarygatefunction.py
@@ -36,3 +36,6 @@ def _from_nice_serialization(cls, state):
         ret = cls()  # assumes no __init__ args
         ret.shape = tuple(state['shape'])
         return ret
+
+    def __init__(self):
+        super().__init__()
diff --git a/pygsti/circuits/circuitlist.py b/pygsti/circuits/circuitlist.py
@@ -87,6 +87,7 @@ def __init__(self, circuits, op_label_aliases=None, circuit_rules=None, circuit_
         name : str, optional
             An optional name for this list, used for status messages.
         """
+        super().__init__()
         self._circuits = tuple(map(_Circuit.cast, circuits))  # *static* container - can't add/append
         self.op_label_aliases = op_label_aliases
         self.circuit_rules = circuit_rules

diff --git a/pygsti/circuits/circuitstructure.py b/pygsti/circuits/circuitstructure.py
@@ -53,6 +53,7 @@ def __init__(self, elements, num_rows=None, num_cols=None, op_label_aliases=None
         """
         Create a new CircuitPlaquette.
         """
+        super().__init__()
         self.elements = _collections.OrderedDict(elements)
         self.circuit_rules = circuit_rules
         self.op_label_aliases = op_label_aliases

diff --git a/pygsti/data/dataset.py b/pygsti/data/dataset.py
@@ -24,6 +24,7 @@
 
 from pygsti.circuits import circuit as _cir
 from pygsti.baseobjs import outcomelabeldict as _ld, _compatibility as _compat
+from pygsti.baseobjs.mongoserializable import MongoSerializable as _MongoSerializable
 from pygsti.tools import NamedDict as _NamedDict
 from pygsti.tools import listtools as _lt
 from pygsti.tools.legacytools import deprecate as _deprecated_fn
@@ -821,7 +822,7 @@ def _round_int_repcnt(nreps):
         return int(round(nreps))
 
 
-class DataSet(object):
+class DataSet(_MongoSerializable):
     """
     An association between Circuits and outcome counts, serving as the input data for many QCVV protocols.
 
@@ -909,6 +910,7 @@ class DataSet(object):
         Keys should be the circuits in this DataSet and value should
         be Python dictionaries.
     """
+    collection_name = "pygsti_datasets"
 
     def __init__(self, oli_data=None, time_data=None, rep_data=None,
                  circuits=None, circuit_indices=None,
@@ -992,6 +994,8 @@ def __init__(self, oli_data=None, time_data=None, rep_data=None,
         DataSet
            a new data set object.
         """
+        super().__init__()
+
         # uuid for efficient hashing (set when done adding data or loading from file)
         self.uuid = None
 
@@ -3134,3 +3138,107 @@ def auxinfo_dataframe(self, pivot_valuename=None, pivot_value=None, drop_columns
 
         df = cdict.to_dataframe()
         return _process_dataframe(df, pivot_valuename, pivot_value, drop_columns)
+
+    @classmethod
+    def _create_obj_from_doc_and_mongodb(cls, doc, mongodb, collision_action="aggregate",
+                                         record_zero_counts=False, with_times="auto",
+                                         circuit_parse_cache=None, verbosity=1):
+        from pymongo import ASCENDING, DESCENDING
+        from pygsti.io import stdinput as _stdinput
+        datarow_collection_name = doc['datarow_collection_name']
+        outcomeLabels = doc['outcomes']
+
+        dataset = DataSet(outcome_labels=outcomeLabels, collision_action=collision_action,
+                          comment=doc['comment'])
+        parser = _stdinput.StdInputParser()
+
+        datarow_collection = mongodb[datarow_collection_name]
+        for i, datarow_doc in enumerate(datarow_collection.find({'parent': doc['_id']}).sort('index', ASCENDING)):
+            if i != datarow_doc['index']:
+                _warnings.warn("Data set's row data is incomplete! There seem to be missing rows.")
+
+            circuit = parser.parse_circuit(datarow_doc['circuit'], lookup={},  # allow a lookup to be passed?
+                                           create_subcircuits=not _cir.Circuit.default_expand_subcircuits)
+
+            oliArray = _np.array(datarow_doc['outcome_indices'], dataset.oliType)
+            countArray = _np.array(datarow_doc['repetitions'], dataset.repType)
+            if 'times' not in datarow_doc:  # with_times can be False or 'auto'
+                if with_times is True:
+                    raise ValueError("Circuit %d does not contain time information and 'with_times=True'" % i)
+                timeArray = _np.zeros(countArray.shape[0], dataset.timeType)
+            else:
+                if with_times is False:
+                    raise ValueError("Circuit %d contains time information and 'with_times=False'" % i)
+                timeArray = _np.array(datarow_doc['time'], dataset.timeType)
+
+            dataset._add_raw_arrays(circuit, oliArray, timeArray, countArray,
+                                    overwrite_existing=True,
+                                    record_zero_counts=record_zero_counts,
+                                    aux=datarow_doc.get('aux', {}))
+
+        dataset.done_adding_data()
+        return dataset
+
+    def _add_auxiliary_write_ops_and_update_doc(self, doc, write_ops, mongodb, collection_name, overwrite_existing,
+                                                circuits=None, outcome_label_order=None, with_times="auto",
+                                                datarow_collection_name='pygsti_datarows'):
+        if circuits is not None:
+            if len(circuits) > 0 and not isinstance(circuits[0], _cir.Circuit):
+                raise ValueError("Argument circuits must be a list of Circuit objects!")
+        else:
+            circuits = list(self.keys())
+
+        if outcome_label_order is not None:  # convert to tuples if needed
+            outcome_label_order = [(ol,) if isinstance(ol, str) else ol
+                                   for ol in outcome_label_order]
+
+        outcomeLabels = self.outcome_labels
+        if outcome_label_order is not None:
+            assert(len(outcome_label_order) == len(outcomeLabels))
+            assert(all([ol in outcomeLabels for ol in outcome_label_order]))
+            assert(all([ol in outcome_label_order for ol in outcomeLabels]))
+            outcomeLabels = outcome_label_order
+            oli_map_data = {self.olIndex[ol]: i for i, ol in enumerate(outcomeLabels)}  # dataset -> stored indices
+
+            def oli_map(outcome_label_indices):
+                return [oli_map_data[i] for i in outcome_label_indices]
+        else:
+            def oli_map(outcome_label_indices):
+                return [i.item() for i in outcome_label_indices]  # converts numpy types -> native python types
+
+        doc['outcomes'] = outcomeLabels
+        doc['comment'] = self.comment if hasattr(self, 'comment') else None
+        doc['datarow_collection_name'] = datarow_collection_name
+
+        if with_times == "auto":
+            trivial_times = self.has_trivial_timedependence
+        else:
+            trivial_times = not with_times
+
+        dataset_id = doc['_id']
+
+        for i, circuit in enumerate(circuits):  # circuit should be a Circuit object here
+            dataRow = self[circuit]
+            datarow_doc = {'index': i,
+                           'circuit': circuit.str,
+                           'parent': dataset_id,
+                           'outcome_indices': oli_map(dataRow.oli),
+                           'repetitions': [r.item() for r in dataRow.reps]  # converts numpy -> Python native types
+                           }
+
+            if trivial_times:  # ensure that "repetitions" are just "counts" in trivial-time case
+                assert(len(dataRow.oli) == len(set(dataRow.oli))), "Duplicate value in trivial-time data set row!"
+            else:
+                datarow_doc['times'] = list(dataRow.time)
+
+            if dataRow.aux:
+                datarow_doc['aux'] = dataRow.aux  # needs to be JSON-able!
+
+            write_ops.add_one_op(datarow_collection_name, {'circuit': circuit.str, 'parent': dataset_id},
+                                 datarow_doc, overwrite_existing, mongodb)
+
+    @classmethod
+    def _remove_from_mongodb(cls, mongodb, collection_name, doc_id, session, recursive):
+        dataset_doc = mongodb[collection_name].find_one_and_delete({'_id': doc_id}, session=session)
+        datarow_collection_name = dataset_doc['datarow_collection_name']
+        mongodb[datarow_collection_name].delete_many({'parent': dataset_doc['_id']}, session=session)
diff --git a/pygsti/forwardsims/forwardsim.py b/pygsti/forwardsims/forwardsim.py
@@ -80,6 +80,7 @@ def _array_types_for_method(cls, method_name):
         return ()
 
     def __init__(self, model=None):
+        super().__init__()
         #self.dim = model.dim
         self.model = model