From 59a5a6a26341762a7e054407816c2db5a38d6e1f Mon Sep 17 00:00:00 2001 From: pbashyal-nmdp Date: Fri, 24 Sep 2021 16:52:05 -0500 Subject: [PATCH 1/2] Add `pyard-status` command to check the status of all tables in the databases. This will help to see if there are missing tables and also to compare number of data between versions. ``` ------------------------------------------- IMGT DB Version: 3450 ------------------------------------------- |Table Name |Rows | |-----------------------------------------| |dup_g | 50| |dup_lg | 2| |dup_lgx | 2| |g_group | 10841| |lg_group | 10841| |lgx_group | 10841| |exon_group | 9724| |p_group | 9724| |alleles | 33525| |xx_codes | 1690| |who_alleles | 31552| |who_group | 31930| ------------------------------------------- ``` The missing tables are noted and the database can be rebuilt with `pyard-import --re-install` ------------------------------------------- IMGT DB Version: 3290 ------------------------------------------- |Table Name |Rows | |-----------------------------------------| |dup_g | 17| |dup_lg | 0| |dup_lgx | 0| |g_group | 2786| |lg_group | 2786| |lgx_group | 2786| MISSING: exon_group table MISSING: p_group table |alleles | 18451| |xx_codes | 946| MISSING: who_alleles table MISSING: who_group table ------------------------------------------- ``` --- README.rst | 4 +++ pyard/db.py | 15 ++++++++ scripts/pyard-status | 81 ++++++++++++++++++++++++++++++++++++++++++++ setup.py | 1 + 4 files changed, 101 insertions(+) create mode 100755 scripts/pyard-status diff --git a/README.rst b/README.rst index 8bfbb10..a76a7bc 100644 --- a/README.rst +++ b/README.rst @@ -135,3 +135,7 @@ Command Line Tools $ pyard -v 3290 --gl 'A1' -r lgx A*01:01/A*01:02/A*01:03/A*01:06/A*01:07/A*01:08/A*01:09/A*01:10/A*01:12/ ... + + # Show the status of all py-ard databases + $ pyard-status + diff --git a/pyard/db.py b/pyard/db.py index 5ec02a9..aa88b7a 100644 --- a/pyard/db.py +++ b/pyard/db.py @@ -90,6 +90,21 @@ def tables_exist(connection: sqlite3.Connection, table_names: List[str]): return all([table_exists(connection, table_name) for table_name in table_names]) +def count_rows(connection: sqlite3.Connection, table_name: str) -> int: + """ + Count number of rows in the table. + + :param connection: db connection of type sqlite.Connection + :param table_name: table in the sqlite db + :return: bool indicating whether table_name exists as a table + """ + query = f"SELECT count(*) from '{table_name}'" + cursor = connection.execute(query) + result = cursor.fetchone() + cursor.close() + return result[0] + + def mac_code_to_alleles(connection: sqlite3.Connection, code: str) -> List[str]: """ Look up the MAC code in the database and return corresponding list diff --git a/scripts/pyard-status b/scripts/pyard-status new file mode 100755 index 0000000..91fcb09 --- /dev/null +++ b/scripts/pyard-status @@ -0,0 +1,81 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# py-ard +# Copyright (c) 2020 Be The Match operated by National Marrow Donor Program. All Rights Reserved. +# +# This library is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; either version 3 of the License, or (at +# your option) any later version. +# +# This library is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this library; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +# +# > http://www.fsf.org/licensing/licenses/lgpl.html +# > http://www.opensource.org/licenses/lgpl-license.php +# +import argparse +import os +import pathlib +import re + +from pyard import db, data_repository + + +def get_data_dir(data_dir): + if data_dir: + path = pathlib.Path(data_dir) + if not path.exists() or not path.is_dir(): + raise RuntimeError(f"{data_dir} is not a valid directory") + data_dir = path + else: + data_dir = db.get_pyard_db_install_directory() + return data_dir + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + usage=""" + [--data-dir ]\n + """, + description=""" + py-ard tool to provide a status report for reference SQLite databases. + """ + ) + parser.add_argument( + "--data-dir", + dest="data_dir" + ) + args = parser.parse_args() + data_dir = get_data_dir(args.data_dir) + # print(data_dir) + + imgt_regex = re.compile(r'pyard-(.+)\.sqlite3') + for _, _, filenames in os.walk(data_dir): + for filename in filenames: + # Get imgt version from the filename + # eg: get 3440 from 'pyard-3440.sqlite3' + match = imgt_regex.match(filename) + imgt_version = match.group(1) # Get first group + db_connection = db.create_db_connection(data_dir, imgt_version) + print('-' * 43) + print(f"IMGT DB Version: {imgt_version}") + print('-' * 43) + print(f"|{'Table Name':20}|{'Rows':20}|") + print(f"|{'-' * 41}|") + for table in data_repository.ars_mapping_tables + \ + data_repository.code_mapping_tables: + if db.table_exists(db_connection, table): + total_rows = db.count_rows(db_connection, table) + print(f"|{table:20}|{total_rows:20}|") + else: + print(f"MISSING: {table} table") + print('-' * 43) + db_connection.close() diff --git a/setup.py b/setup.py index cf70788..b149ead 100644 --- a/setup.py +++ b/setup.py @@ -55,6 +55,7 @@ scripts=[ 'scripts/pyard', 'scripts/pyard-import', + 'scripts/pyard-status', 'scripts/pyard-reduce-csv' ], install_requires=requirements, From c2995de50457e283a4a6ad0f50df3c9c217e2956 Mon Sep 17 00:00:00 2001 From: pbashyal-nmdp Date: Fri, 24 Sep 2021 16:53:08 -0500 Subject: [PATCH 2/2] =?UTF-8?q?Bump=20version:=200.6.8=20=E2=86=92=200.6.9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyard/__init__.py | 2 +- setup.cfg | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyard/__init__.py b/pyard/__init__.py index e8211f8..a419903 100644 --- a/pyard/__init__.py +++ b/pyard/__init__.py @@ -24,4 +24,4 @@ from .pyard import ARD __author__ = """NMDP Bioinformatics""" -__version__ = '0.6.8' +__version__ = '0.6.9' diff --git a/setup.cfg b/setup.cfg index ad90683..9917d39 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.8 +current_version = 0.6.9 commit = True tag = True diff --git a/setup.py b/setup.py index b149ead..f3cc342 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ setup( name='py-ard', - version='0.6.8', + version='0.6.9', description="ARD reduction for HLA with Python", long_description=readme + '\n\n' + history, author="CIBMTR",