From 7750508d23698965e26a945f26757f7b8f24e5c8 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Wed, 4 Jun 2025 17:23:50 +0200 Subject: [PATCH 1/4] Add index to sessions DB table --- reframe/frontend/reporting/storage.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/reframe/frontend/reporting/storage.py b/reframe/frontend/reporting/storage.py index 32c364abcb..de8afb2714 100644 --- a/reframe/frontend/reporting/storage.py +++ b/reframe/frontend/reporting/storage.py @@ -101,6 +101,7 @@ def _db_file(self): self._db_create() + self._db_create_indexes() self._db_schema_check() return self.__db_file @@ -161,12 +162,20 @@ def _db_create(self): 'uuid TEXT, ' 'FOREIGN KEY(session_uuid) ' 'REFERENCES sessions(uuid) ON DELETE CASCADE)') + + # Update DB file mode + os.chmod(self.__db_file, self.__db_file_mode) + + def _db_create_indexes(self): + clsname = type(self).__name__ + getlogger().debug(f'{clsname}: creating database indexes if needed') + with self._db_connect(self.__db_file) as conn: conn.execute('CREATE INDEX IF NOT EXISTS index_testcases_time ' 'on testcases(job_completion_time_unix)') conn.execute('CREATE TABLE IF NOT EXISTS metadata(' 'schema_version TEXT)') - # Update DB file mode - os.chmod(self.__db_file, self.__db_file_mode) + conn.execute('CREATE INDEX IF NOT EXISTS index_sessions_time ' + 'on sessions(session_start_unix)') def _db_schema_check(self): with self._db_read(self.__db_file) as conn: From 3d40b79b6effb63e1dc210417b9a9e17ba9f37b5 Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Thu, 5 Jun 2025 14:12:33 +0200 Subject: [PATCH 2/4] Avoid unnecessary string concat in `_mass_json_decode()` --- reframe/frontend/reporting/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reframe/frontend/reporting/storage.py b/reframe/frontend/reporting/storage.py index de8afb2714..f06d2dc515 100644 --- a/reframe/frontend/reporting/storage.py +++ b/reframe/frontend/reporting/storage.py @@ -256,7 +256,7 @@ def _extract_sess_info(s): @time_function def _mass_json_decode(json_objs): - data = '[' + ','.join(json_objs) + ']' + data = f'[{",".join(json_objs)}]' getlogger().debug( f'decoding JSON raw data of length {len(data)}' ) From 9c32051b3099854afa56685c74fd66a1c6a068ce Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Fri, 6 Jun 2025 17:51:59 +0200 Subject: [PATCH 3/4] Return the raw session data without decoding in `--describe-sessions` --- reframe/frontend/cli.py | 4 +- reframe/frontend/reporting/__init__.py | 6 ++- reframe/frontend/reporting/storage.py | 72 +++++++++++++++----------- reframe/frontend/reporting/utility.py | 4 +- 4 files changed, 51 insertions(+), 35 deletions(-) diff --git a/reframe/frontend/cli.py b/reframe/frontend/cli.py index 194ccb439e..8f4c87e482 100644 --- a/reframe/frontend/cli.py +++ b/reframe/frontend/cli.py @@ -1083,9 +1083,7 @@ def restrict_logging(): lambda htype: htype != 'stream') with exit_gracefully_on_error('failed to retrieve session data', printer): - printer.info(jsonext.dumps(reporting.session_info( - options.describe_stored_sessions - ), indent=2)) + printer.info(reporting.session_info(options.describe_stored_sessions)) sys.exit(0) if options.describe_stored_testcases: diff --git a/reframe/frontend/reporting/__init__.py b/reframe/frontend/reporting/__init__.py index bfb8ec4fd9..bfcd3380a7 100644 --- a/reframe/frontend/reporting/__init__.py +++ b/reframe/frontend/reporting/__init__.py @@ -860,8 +860,10 @@ def testcase_data(spec, namepatt=None, test_filter=None): @time_function def session_info(query): '''Retrieve session details as JSON''' - - return StorageBackend.default().fetch_sessions(parse_query_spec(query)) + sessions = StorageBackend.default().fetch_sessions( + parse_query_spec(query), False + ) + return rf'[{",".join(sessions)}]' @time_function diff --git a/reframe/frontend/reporting/storage.py b/reframe/frontend/reporting/storage.py index f06d2dc515..e98ef164d3 100644 --- a/reframe/frontend/reporting/storage.py +++ b/reframe/frontend/reporting/storage.py @@ -54,12 +54,14 @@ def fetch_testcases(self, selector: QuerySelector, name_patt=None, ''' @abc.abstractmethod - def fetch_sessions(self, selector: QuerySelector): + def fetch_sessions(self, selector: QuerySelector, decode=True): '''Fetch sessions based on the specified query selector. :arg selector: an instance of :class:`QuerySelector` that will specify the actual type of query requested. - :returns: A list of matching sessions. + :arg decode: If set to :obj:`False`, do not decode the retuned + sessions and leave them JSON-encoded. + :returns: A list of matching sessions, either decoded or not. ''' @abc.abstractmethod @@ -241,10 +243,16 @@ def store(self, report, report_file=None): return self._db_store_report(conn, report, report_file) @time_function - def _decode_sessions(self, results, sess_filter): - '''Decode sessions from the raw DB results. + def _mass_json_decode(self, *json_objs): + data = rf'[{",".join(json_objs)}]' + getlogger().debug(f'decoding JSON raw data of length {len(data)}') + return json.loads(data) - Return a map of session uuids to decoded session data + @time_function + def _fetch_sessions(self, results, sess_filter): + '''Fetch JSON-encoded sessions from the DB by applying a filter. + + :returns: A list of the JSON-encoded valid sessions. ''' sess_info_patt = re.compile( r'\"session_info\":\s+(?P\{.*?\})' @@ -254,34 +262,31 @@ def _decode_sessions(self, results, sess_filter): def _extract_sess_info(s): return sess_info_patt.search(s).group('sess_info') - @time_function - def _mass_json_decode(json_objs): - data = f'[{",".join(json_objs)}]' - getlogger().debug( - f'decoding JSON raw data of length {len(data)}' - ) - return json.loads(data) - session_infos = {} sessions = {} for uuid, json_blob in results: sessions.setdefault(uuid, json_blob) session_infos.setdefault(uuid, _extract_sess_info(json_blob)) - # Find the UUIDs to decode fully by inspecting only the session info + # Find the relevant sessions by inspecting only the session info uuids = [] - for sess_info in _mass_json_decode(session_infos.values()): + infos = self._mass_json_decode(*session_infos.values()) + for sess_info in infos: try: if self._db_filter_json(sess_filter, sess_info): uuids.append(sess_info['uuid']) except Exception: continue - # Decode selected sessions - reports = _mass_json_decode(sessions[uuid] for uuid in uuids) + return [sessions[uuid] for uuid in uuids] + + def _decode_and_index_sessions(self, json_blobs): + '''Decode the sessions and index them by their uuid. - # Return only the selected sessions - return {rpt['session_info']['uuid']: rpt for rpt in reports} + :returns: A dictionary with uuids as keys and the sessions as values. + ''' + return {sess['session_info']['uuid']: sess + for sess in self._mass_json_decode(*json_blobs)} @time_function def _fetch_testcases_raw(self, condition): @@ -298,7 +303,11 @@ def _fetch_testcases_raw(self, condition): results = conn.execute(query).fetchall() getprofiler().exit_region() - sessions = self._decode_sessions(results, None) + + # Fetch, decode and index the sessions by their uuid + sessions = self._decode_and_index_sessions( + self._fetch_sessions(results, None) + ) # Extract the test case data by extracting their UUIDs getprofiler().enter_region('sqlite testcase query') @@ -328,8 +337,8 @@ def _fetch_testcases_raw(self, condition): return testcases @time_function - def _fetch_testcases_from_session(self, selector, - name_patt=None, test_filter=None): + def _fetch_testcases_from_session(self, selector, name_patt=None, + test_filter=None): query = 'SELECT uuid, json_blob from sessions' if selector.by_session_uuid(): query += f' WHERE uuid == "{selector.uuid}"' @@ -347,9 +356,11 @@ def _fetch_testcases_from_session(self, selector, if not results: return [] - sessions = self._decode_sessions( - results, - selector.sess_filter if selector.by_session_filter() else None + sessions = self._decode_and_index_sessions( + self._fetch_sessions( + results, + selector.sess_filter if selector.by_session_filter() else None + ) ) return [tc for sess in sessions.values() for run in sess['runs'] for tc in run['testcases'] @@ -375,7 +386,7 @@ def fetch_testcases(self, selector: QuerySelector, name_patt=None, test_filter=None): if selector.by_session(): return self._fetch_testcases_from_session( - selector, name_patt, test_filter + selector, name_patt, test_filter, ) else: return self._fetch_testcases_time_period( @@ -383,7 +394,7 @@ def fetch_testcases(self, selector: QuerySelector, ) @time_function - def fetch_sessions(self, selector: QuerySelector): + def fetch_sessions(self, selector: QuerySelector, decode=True): query = 'SELECT uuid, json_blob FROM sessions' if selector.by_time_period(): ts_start, ts_end = selector.time_period @@ -398,11 +409,14 @@ def fetch_sessions(self, selector: QuerySelector): results = conn.execute(query).fetchall() getprofiler().exit_region() - session = self._decode_sessions( + raw_sessions = self._fetch_sessions( results, selector.sess_filter if selector.by_session_filter() else None ) - return [*session.values()] + if decode: + return [*self._decode_and_index_sessions(raw_sessions).values()] + else: + return raw_sessions def _do_remove(self, conn, uuids): '''Remove sessions''' diff --git a/reframe/frontend/reporting/utility.py b/reframe/frontend/reporting/utility.py index 544811b0ea..7d127b9409 100644 --- a/reframe/frontend/reporting/utility.py +++ b/reframe/frontend/reporting/utility.py @@ -200,7 +200,9 @@ def by_session_filter(self): def __repr__(self): clsname = type(self).__name__ - return f'{clsname}(value={self.__value}, kind={self.__kind})' + return (f'{clsname}(uuid={self.__uuid!r}, ' + f'time_period={self.__time_period!r}, ' + f'sess_filter={self.__sess_filter!r})') def parse_time_period(s): From 590db46e3dafc9cf6ada8f1b757afdf1d0afb72e Mon Sep 17 00:00:00 2001 From: Vasileios Karakasis Date: Wed, 10 Sep 2025 17:06:11 +0200 Subject: [PATCH 4/4] Apply suggestion from @teojgo Co-authored-by: Theofilos Manitaras --- reframe/frontend/reporting/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reframe/frontend/reporting/storage.py b/reframe/frontend/reporting/storage.py index e98ef164d3..7175744691 100644 --- a/reframe/frontend/reporting/storage.py +++ b/reframe/frontend/reporting/storage.py @@ -59,7 +59,7 @@ def fetch_sessions(self, selector: QuerySelector, decode=True): :arg selector: an instance of :class:`QuerySelector` that will specify the actual type of query requested. - :arg decode: If set to :obj:`False`, do not decode the retuned + :arg decode: If set to :obj:`False`, do not decode the returned sessions and leave them JSON-encoded. :returns: A list of matching sessions, either decoded or not. '''