Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Significant performance improvements to AddResult / GetParameter #4446

Merged
merged 4 commits into from Sep 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmarking/README.rst
Expand Up @@ -21,7 +21,7 @@ following command from this directory:

.. code:: bash

asv run python=same
asv run --python=same

If you do not have an environment set, then ``asv`` can set it up
automatically. The benchmarks are executed in the same way:
Expand Down
2 changes: 1 addition & 1 deletion benchmarking/asv.conf.json
Expand Up @@ -29,7 +29,7 @@
// If missing or the empty string, the tool will be automatically
// determined by looking for tools on the PATH environment
// variable.
"environment_type": "conda",
"environment_type": "virtualenv",

// timeout in seconds for installing any dependencies in environment
// defaults to 10 min
Expand Down
3 changes: 3 additions & 0 deletions docs/changes/newsfragments/4446.improved
@@ -0,0 +1,3 @@
Improve performance of ``sqlite3`` converters and adapters used to write and read in the database.

Get rid of ``sqlite3.Row`` and irrelevant unpacking to ``list``.
9 changes: 6 additions & 3 deletions qcodes/dataset/data_set.py
Expand Up @@ -1174,11 +1174,14 @@ def unsubscribe_all(self) -> None:
"""
Remove all subscribers
"""
sql = "select * from sqlite_master where type = 'trigger';"
sql = """
SELECT name FROM sqlite_master
WHERE type = 'trigger'
"""
triggers = atomic_transaction(self.conn, sql).fetchall()
with atomic(self.conn) as conn:
for trigger in triggers:
remove_trigger(conn, trigger['name'])
for (trigger,) in triggers:
remove_trigger(conn, trigger)
for sub in self.subscribers.values():
sub.schedule_stop()
sub.join()
Expand Down
9 changes: 5 additions & 4 deletions qcodes/dataset/experiment_container.py
Expand Up @@ -169,8 +169,10 @@ def data_set(self, counter: int) -> DataSet:

def data_sets(self) -> list[DataSetProtocol]:
"""Get all the datasets of this experiment"""
runs = get_runs(self.conn, self.exp_id)
return [load_by_id(run['run_id'], conn=self.conn) for run in runs]
return [
load_by_id(run_id, conn=self.conn)
for run_id in get_runs(self.conn, self.exp_id)
]

def last_data_set(self) -> DataSetProtocol:
"""Get the last dataset of this experiment"""
Expand Down Expand Up @@ -216,8 +218,7 @@ def experiments(conn: ConnectionPlus | None = None) -> list[Experiment]:
"""
conn = conn_from_dbpath_or_conn(conn=conn, path_to_db=None)
log.info(f"loading experiments from {conn.path_to_dbfile}")
rows = get_experiments(conn)
return [load_experiment(row['exp_id'], conn) for row in rows]
return [load_experiment(exp_id, conn) for exp_id in get_experiments(conn)]


def new_experiment(
Expand Down
52 changes: 24 additions & 28 deletions qcodes/dataset/sqlite/database.py
Expand Up @@ -6,6 +6,7 @@
from __future__ import annotations

import io
import math
import sqlite3
import sys
from collections.abc import Iterator
Expand Down Expand Up @@ -37,15 +38,20 @@ def _adapt_array(arr: np.ndarray) -> sqlite3.Binary:
https://stackoverflow.com/questions/3425320/sqlite3-programmingerror-you-must-not-use-8-bit-bytestrings-unless-you-use-a-te
"""
out = io.BytesIO()
np.save(out, arr)
# Directly use np.lib.format.write_array instead of np.save, force version to be
# 3.0 (when reading, version 1.0 and 2.0 can result in a slow clean up step to
# ensure backward compatibility with python 2) and disable pickle (slow and
# insecure)
np.lib.format.write_array(out, arr, version=(3, 0), allow_pickle=False)
out.seek(0)
return sqlite3.Binary(out.read())


def _convert_array(text: bytes) -> np.ndarray:
out = io.BytesIO(text)
out.seek(0)
return np.load(out)
# Using np.lib.format.read_array (counterpart of np.lib.format.write_array)
# npy format version 3.0 is 3 times faster than previous verions (no clean up step
# for python 2 backward compatibility)
return np.lib.format.read_array(io.BytesIO(text), allow_pickle=False)


def _convert_complex(text: bytes) -> np.complexfloating:
Expand Down Expand Up @@ -75,36 +81,29 @@ def _convert_numeric(value: bytes) -> float | int | str:
try:
# First, try to convert bytes to float
numeric = float(value)
except ValueError as e:
# If an exception has been raised, we first need to find out
# if the reason was the conversion to float, and, if so, we are sure
# that we need to return a string
if "could not convert string to float" in str(e):
return str(value, encoding=this_session_default_encoding)
else:
# otherwise, the exception is forwarded up the stack
raise e

# If that worked, e.g. did not raise an exception, then we check if the
# outcome is 'nan'
if np.isnan(numeric):
return numeric

# Then we check if the outcome is 'inf', includes +inf and -inf
if np.isinf(numeric):
except ValueError:
# Let string casting fail if bytes encoding is invalid
return str(value, encoding=this_session_default_encoding)

# If that worked, e.g. did not raise an exception, then we check if the outcome is
# either an infinity or a NaN
# For a single value, math.isfinite is 10 times faster than np.isinfinite (or
# combining np.isnan and np.isinf)
if not math.isfinite(numeric):
return numeric

# If it is not 'nan' and not 'inf', then we need to see if the value is
# really an integer or with floating point digits
# If it is not 'nan' and not 'inf', then we need to see if the value is really an
# integer or with floating point digits
numeric_int = int(numeric)
if numeric != numeric_int:
return numeric
else:
return numeric_int


def _adapt_float(fl: float) -> float | str:
if np.isnan(fl):
# For a single value, math.isnan is 10 times faster than np.isnan
# Overall, saving floats with numeric format is 2 times faster with math.isnan
if math.isnan(fl):
return "nan"
return float(fl)

Expand Down Expand Up @@ -150,9 +149,6 @@ def connect(name: str | Path, debug: bool = False, version: int = -1) -> Connect
f"version of QCoDeS supports up to "
f"version {latest_supported_version}")

# sqlite3 options
conn.row_factory = sqlite3.Row

# Make sure numpy ints and floats types are inserted properly
for numpy_int in numpy_ints:
sqlite3.register_adapter(numpy_int, int)
Expand Down
34 changes: 12 additions & 22 deletions qcodes/dataset/sqlite/db_upgrades/upgrade_2_to_3.py
Expand Up @@ -16,7 +16,7 @@
atomic_transaction,
transaction,
)
from qcodes.dataset.sqlite.query_helpers import one
from qcodes.dataset.sqlite.query_helpers import get_description_map, one

log = logging.getLogger(__name__)

Expand All @@ -29,8 +29,8 @@ def _2to3_get_result_tables(conn: ConnectionPlus) -> dict[int, str]:
data = cur.fetchall()
cur.close()
results = {}
for row in data:
results[row['run_id']] = row['result_table_name']
for run_id, result_table_name in data:
results[run_id] = result_table_name
return results


Expand All @@ -47,9 +47,7 @@ def _2to3_get_layout_ids(conn: ConnectionPlus) -> DefaultDict[int, list[int]]:

results: DefaultDict[int, list[int]] = defaultdict(list)

for row in data:
run_id = row['run_id']
layout_id = row['layout_id']
for run_id, layout_id in data:
results[run_id].append(layout_id)

return results
Expand All @@ -68,9 +66,7 @@ def _2to3_get_indeps(conn: ConnectionPlus) -> DefaultDict[int, list[int]]:
cur.close()
results: DefaultDict[int, list[int]] = defaultdict(list)

for row in data:
run_id = row['run_id']
layout_id = row['layout_id']
for run_id, layout_id in data:
results[run_id].append(layout_id)

return results
Expand All @@ -89,9 +85,7 @@ def _2to3_get_deps(conn: ConnectionPlus) -> DefaultDict[int, list[int]]:
cur.close()
results: DefaultDict[int, list[int]] = defaultdict(list)

for row in data:
run_id = row['run_id']
layout_id = row['layout_id']
for run_id, layout_id in data:
results[run_id].append(layout_id)

return results
Expand All @@ -112,9 +106,7 @@ def _2to3_get_dependencies(conn: ConnectionPlus) -> DefaultDict[int, list[int]]:
if len(data) == 0:
return results

for row in data:
dep = row['dependent']
indep = row['independent']
for dep, indep in data:
results[dep].append(indep)

return results
Expand All @@ -129,11 +121,8 @@ def _2to3_get_layouts(conn: ConnectionPlus) -> dict[int, tuple[str, str, str, st
cur.execute(query)

results: dict[int, tuple[str, str, str, str]] = {}
for row in cur.fetchall():
results[row['layout_id']] = (row['parameter'],
row['label'],
row['unit'],
row['inferred_from'])
for layout_id, parameter, label, unit, inferred_from in cur.fetchall():
results[layout_id] = (parameter, label, unit, inferred_from)
return results


Expand All @@ -159,10 +148,11 @@ def _2to3_get_paramspecs(
# get the data type
sql = f'PRAGMA TABLE_INFO("{result_table_name}")'
c = transaction(conn, sql)
description = get_description_map(c)
paramtype = None
for row in c.fetchall():
if row['name'] == name:
paramtype = row['type']
if row[description["name"]] == name:
paramtype = row[description["type"]]
break
if paramtype is None:
raise TypeError(f"Could not determine type of {name} during the"
Expand Down