From 280535deb103127ca73695382eca9731ecbf0d68 Mon Sep 17 00:00:00 2001 From: Martin Maiers Date: Wed, 16 Jun 2021 11:34:08 -0500 Subject: [PATCH 1/9] change C to Cw for serology; throw value error for invalid MAC --- pyard/broad_splits.py | 3 +++ pyard/data_repository.py | 2 ++ pyard/dna_relshp.csv | 2 ++ pyard/pyard.py | 26 +++++++++++++++----------- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/pyard/broad_splits.py b/pyard/broad_splits.py index e52c30c..36f0938 100644 --- a/pyard/broad_splits.py +++ b/pyard/broad_splits.py @@ -41,6 +41,8 @@ 'B*21': ['B*49', 'B*50'], 'B*22': ['B*54', 'B*55', 'B*56'], + 'C*10': ['C*03', 'C*04'], + 'DQB1*01': ['DQB1*05', 'DQB1*06'], 'DRB1*02': ['DRB1*15', 'DRB1*16'], 'DRB1*06': ['DRB1*13', 'DRB1*14'] @@ -59,6 +61,7 @@ 'B22': ['B54', 'B55', 'B56'], 'B40': ['B60', 'B61'], 'B70': ['B71', 'B72'], + 'Cw3': ['Cw9', 'Cw10'], 'DQ1': ['DQ5', 'DQ6'], 'DR2': ['DR15', 'DR16'], 'DR3': ['DR17', 'DR18'], diff --git a/pyard/data_repository.py b/pyard/data_repository.py index 6ad47c2..4742cbf 100644 --- a/pyard/data_repository.py +++ b/pyard/data_repository.py @@ -307,6 +307,8 @@ def to_serological_name(locus_name: str): """ locus, sero_number = locus_name.split('*') sero_locus = locus[:2] + if (sero_locus == "C"): + sero_locus = "Cw" sero_name = sero_locus + sero_number return sero_name diff --git a/pyard/dna_relshp.csv b/pyard/dna_relshp.csv index a1e026b..02a421f 100644 --- a/pyard/dna_relshp.csv +++ b/pyard/dna_relshp.csv @@ -26,6 +26,8 @@ B,21,50 B,22,54 B,22,55 B,22,56 +C,03,09 +C,03,10 DQB1,01,05 DQB1,01,06 DRB1,02,15 diff --git a/pyard/pyard.py b/pyard/pyard.py index 04368d0..e8d0803 100644 --- a/pyard/pyard.py +++ b/pyard/pyard.py @@ -197,16 +197,20 @@ def redux_gl(self, glstring: str, redux_type: str) -> str: return self.redux_gl("/".join(self.xx_codes[loc_antigen]), redux_type) # Handle MAC - if self.is_mac(glstring) and is_valid_mac_code(self.db_connection, code): - if HLA_regex.search(glstring): - # Remove HLA- prefix - allele_name = glstring.split("-")[1] - loc_antigen, code = allele_name.split(":") - alleles = self._get_alleles(code, loc_antigen) - alleles = ["HLA-" + a for a in alleles] + if self.is_mac(glstring): + if is_valid_mac_code(self.db_connection, code): + if HLA_regex.search(glstring): + # Remove HLA- prefix + allele_name = glstring.split("-")[1] + loc_antigen, code = allele_name.split(":") + alleles = self._get_alleles(code, loc_antigen) + alleles = ["HLA-" + a for a in alleles] + else: + alleles = self._get_alleles(code, loc_antigen) + return self.redux_gl("/".join(alleles), redux_type) else: - alleles = self._get_alleles(code, loc_antigen) - return self.redux_gl("/".join(alleles), redux_type) + raise ValueError + return self.redux(glstring, redux_type) @@ -229,12 +233,12 @@ def is_serology(allele: str) -> bool: return False locus = allele[0:2] - if locus in ['DR', 'DP', 'DQ']: + if locus in ['DR', 'DP', 'DQ', 'Cw']: antigen = allele[2:] return antigen.isdigit() locus = allele[0:1] - if locus in ['A', 'B', 'C', 'D']: + if locus in ['A', 'B']: antigen = allele[1:] return antigen.isdigit() From bd8f45c16c5dfb5130e4b6dbd824e5a4698f3f28 Mon Sep 17 00:00:00 2001 From: Martin Maiers Date: Wed, 16 Jun 2021 13:33:30 -0500 Subject: [PATCH 2/9] revert the raise error --- pyard/pyard.py | 8 ++++---- tests/features/mac.feature | 3 ++- tests/features/serology.feature | 1 + tests/steps/redux_allele.py | 2 ++ 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/pyard/pyard.py b/pyard/pyard.py index e8d0803..e7447b4 100644 --- a/pyard/pyard.py +++ b/pyard/pyard.py @@ -209,9 +209,8 @@ def redux_gl(self, glstring: str, redux_type: str) -> str: alleles = self._get_alleles(code, loc_antigen) return self.redux_gl("/".join(alleles), redux_type) else: - raise ValueError - - + # future: raise ValueError + return '' return self.redux(glstring, redux_type) def is_XX(self, glstring: str, loc_antigen: str = None, code: str = None) -> bool: @@ -223,8 +222,9 @@ def is_XX(self, glstring: str, loc_antigen: str = None, code: str = None) -> boo @staticmethod def is_serology(allele: str) -> bool: """ - A serology has the locus name (first 2 letters for DRB1, DRB3, DQB1, DQA1, DPB1 and DPA1) + A serology has the locus name (first 2 letters for DRB1, DQB1) of the allele followed by numerical antigen. + Cw is the serlogical designation for HLA-C :param allele: The allele to test for serology :return: True if serology diff --git a/tests/features/mac.feature b/tests/features/mac.feature index 7c65954..d60b969 100644 --- a/tests/features/mac.feature +++ b/tests/features/mac.feature @@ -21,4 +21,5 @@ Feature: MAC (Multiple Allele Code) | B*08:ASXJP | lgx | B*08:01 | | C*07:HTGM | lgx | C*07:01/C*07:150Q | | A*01:AC+A*01:AB | G | A*01:01:01G/A*01:02+A*01:01:01G/A*01:03:01G | - | A*01:01+A*01:AB | G | A*01:01:01G+A*01:01:01G/A*01:02 | \ No newline at end of file + | A*01:01+A*01:AB | G | A*01:01:01G+A*01:01:01G/A*01:02 | + | C*05:APUF | lg | X | diff --git a/tests/features/serology.feature b/tests/features/serology.feature index 371a51f..eb2f150 100644 --- a/tests/features/serology.feature +++ b/tests/features/serology.feature @@ -17,4 +17,5 @@ Feature: Serology | A10 | lgx | A*25:01/A*25:02/A*25:03/A*25:04/A*25:05/A*25:06/A*25:08/A*25:09/A*25:10/A*25:11/A*25:13/A*25:14/A*25:15/A*25:16/A*25:17/A*25:18/A*25:19/A*25:20/A*25:21/A*25:22/A*25:23/A*25:24/A*25:25/A*25:26/A*25:27/A*25:28/A*25:29/A*25:30/A*25:31/A*25:32/A*25:33/A*25:34/A*25:35/A*25:36/A*25:37/A*25:38/A*25:39/A*25:40/A*25:41/A*25:43/A*25:44/A*25:45/A*25:46/A*25:47/A*25:48/A*25:50/A*25:51/A*25:52/A*25:53/A*25:54/A*25:55/A*25:56/A*25:57/A*25:58/A*25:59/A*25:61/A*25:64/A*25:65/A*25:66/A*25:67/A*25:70/A*25:71/A*25:72/A*25:73/A*26:01/A*26:02/A*26:03/A*26:04/A*26:05/A*26:06/A*26:07/A*26:08/A*26:09/A*26:10/A*26:12/A*26:13/A*26:14/A*26:15/A*26:16/A*26:17/A*26:18/A*26:19/A*26:20/A*26:21/A*26:22/A*26:23/A*26:27/A*26:28/A*26:29/A*26:30/A*26:31/A*26:32/A*26:33/A*26:34/A*26:35/A*26:36/A*26:37/A*26:38/A*26:39/A*26:40/A*26:41/A*26:42/A*26:43/A*26:45/A*26:46/A*26:47/A*26:48/A*26:49/A*26:50/A*26:51/A*26:52/A*26:53/A*26:54/A*26:55/A*26:57/A*26:58/A*26:59/A*26:61/A*26:62/A*26:63/A*26:64/A*26:65/A*26:66/A*26:67/A*26:68/A*26:69/A*26:70/A*26:72/A*26:73/A*26:74/A*26:75/A*26:76/A*26:77/A*26:78/A*26:79/A*26:80/A*26:81/A*26:83/A*26:84/A*26:85/A*26:86/A*26:87/A*26:88/A*26:89/A*26:90/A*26:91/A*26:92/A*26:93/A*26:94/A*26:95/A*26:96/A*26:97/A*26:100/A*26:101/A*26:102/A*26:103/A*26:104/A*26:105/A*26:106/A*26:108/A*26:109/A*26:110/A*26:111/A*26:112/A*26:113/A*26:114/A*26:115/A*26:116/A*26:118/A*26:119/A*26:120/A*26:121/A*26:122/A*26:123/A*26:124/A*26:125/A*26:126/A*26:128/A*26:129/A*26:130/A*26:131/A*26:132/A*26:133/A*26:134/A*26:135/A*26:136/A*26:137/A*26:138/A*26:139/A*26:140/A*26:141/A*26:142/A*26:143/A*26:144/A*26:146/A*26:147/A*26:148/A*26:149/A*26:150/A*26:151/A*26:152/A*26:153/A*26:154/A*26:155/A*26:156/A*26:158/A*26:159/A*26:165/A*26:169/A*26:170/A*26:171/A*26:172/A*26:173/A*26:174/A*26:175/A*26:176/A*26:177/A*26:178/A*26:181/A*26:182/A*26:184/A*26:188/A*26:189/A*26:190/A*26:192/A*26:193/A*26:194/A*26:195/A*26:196/A*26:197/A*26:198/A*26:200/A*26:204/A*26:205/A*26:207/A*26:211/A*26:212/A*26:213/A*26:214/A*34:01/A*34:02/A*34:03/A*34:04/A*34:05/A*34:06/A*34:07/A*34:08/A*34:09/A*34:11/A*34:12/A*34:13/A*34:14/A*34:15/A*34:16/A*34:17/A*34:19/A*34:20/A*34:21/A*34:22/A*34:23/A*34:24/A*34:25/A*66:01/A*66:02/A*66:03/A*66:04/A*66:05/A*66:06/A*66:07/A*66:09/A*66:10/A*66:11/A*66:12/A*66:13/A*66:14/A*66:15/A*66:16/A*66:18/A*66:19/A*66:20/A*66:21/A*66:22/A*66:23/A*66:24/A*66:25/A*66:26Q/A*66:30/A*66:32/A*66:33/A*66:35/A*66:37/A*66:40/A*66:41/A*66:42/A*66:43 | | A19 | G | A*02:65/A*29:01:01G/A*29:01:02/A*29:01:03/A*29:01:04/A*29:01:06/A*29:01:07/A*29:01:08/A*29:01:09/A*29:01:10/A*29:01:11/A*29:01:12/A*29:01:13/A*29:02:01G/A*29:02:02/A*29:02:03/A*29:02:04/A*29:02:05/A*29:02:06/A*29:02:08/A*29:02:09/A*29:02:10/A*29:02:11/A*29:02:12/A*29:02:13/A*29:02:14/A*29:02:15/A*29:02:16/A*29:02:17G/A*29:02:18/A*29:02:19/A*29:02:21/A*29:02:22/A*29:02:23/A*29:02:25/A*29:02:29/A*29:02:31/A*29:02:32/A*29:02:33/A*29:03/A*29:04/A*29:05/A*29:06/A*29:07/A*29:09/A*29:10:01/A*29:10:02/A*29:11/A*29:12/A*29:13/A*29:14/A*29:15/A*29:16/A*29:17/A*29:18/A*29:19/A*29:20/A*29:21/A*29:22/A*29:23/A*29:24/A*29:25/A*29:27/A*29:28/A*29:29/A*29:30/A*29:31/A*29:32/A*29:33/A*29:34/A*29:35/A*29:36/A*29:37/A*29:38/A*29:39/A*29:40/A*29:41/A*29:42/A*29:43/A*29:44/A*29:45/A*29:47/A*29:48/A*29:49/A*29:50/A*29:51/A*29:52/A*29:53/A*29:54/A*29:55/A*29:56/A*29:57/A*29:58/A*29:59/A*29:60/A*29:61/A*29:62/A*29:63/A*29:64/A*29:65/A*29:66/A*29:67/A*29:68/A*29:69/A*29:70/A*29:71/A*29:72/A*29:73/A*29:74/A*29:76/A*29:77/A*29:79/A*29:80/A*29:81/A*29:82/A*29:83/A*29:84/A*29:85/A*29:86/A*29:87/A*29:88/A*29:89/A*29:90/A*29:91/A*29:92/A*29:93/A*29:94/A*29:96/A*29:97/A*29:98/A*29:99/A*29:101:01/A*29:101:02/A*29:102/A*29:103/A*29:104/A*29:105/A*29:106/A*29:107/A*29:108/A*29:109/A*29:110/A*29:111/A*29:113/A*29:114/A*29:115/A*29:117/A*29:118/A*29:122/A*29:123/A*29:124/A*29:125/A*29:126Q/A*29:127/A*29:128/A*29:129/A*29:132/A*29:133:01G/A*29:136/A*29:137/A*29:138/A*29:139/A*29:140/A*29:141/A*29:142/A*29:143/A*29:144/A*29:150/A*29:151/A*29:152/A*29:153/A*29:154/A*30:01:01G/A*30:01:03/A*30:01:04/A*30:01:05/A*30:01:06/A*30:01:07/A*30:01:08/A*30:01:09/A*30:01:10/A*30:01:11/A*30:01:12/A*30:01:14/A*30:01:15/A*30:01:16/A*30:01:17/A*30:01:18/A*30:02:01G/A*30:02:03/A*30:02:04/A*30:02:05/A*30:02:06/A*30:02:07/A*30:02:08/A*30:02:09/A*30:02:10/A*30:02:11/A*30:02:12/A*30:02:13/A*30:02:14/A*30:02:15/A*30:02:16/A*30:02:17/A*30:02:18/A*30:02:19/A*30:02:22/A*30:02:24/A*30:02:26/A*30:03/A*30:04:01G/A*30:04:02/A*30:04:03/A*30:06/A*30:07/A*30:08:01/A*30:08:02/A*30:09:01G/A*30:10/A*30:11:01/A*30:11:02/A*30:12:01/A*30:12:02/A*30:13/A*30:14L/A*30:15/A*30:16/A*30:17/A*30:18/A*30:19/A*30:20/A*30:22/A*30:23/A*30:25/A*30:26/A*30:28/A*30:29/A*30:30/A*30:31/A*30:32/A*30:34/A*30:35/A*30:36/A*30:37/A*30:38/A*30:39:01/A*30:39:02/A*30:40/A*30:41/A*30:42/A*30:43/A*30:44/A*30:45/A*30:46/A*30:47/A*30:48/A*30:49/A*30:50/A*30:51/A*30:52/A*30:53/A*30:54/A*30:55/A*30:56/A*30:57/A*30:58/A*30:60/A*30:61/A*30:62/A*30:63/A*30:64/A*30:65/A*30:66/A*30:67/A*30:68/A*30:69/A*30:71/A*30:72/A*30:74/A*30:75/A*30:79/A*30:80/A*30:82/A*30:83/A*30:84/A*30:85/A*30:86/A*30:87/A*30:88/A*30:89/A*30:90/A*30:91/A*30:92/A*30:93/A*30:94/A*30:96/A*30:97/A*30:98/A*30:99/A*30:101Q/A*30:102/A*30:103/A*30:104/A*30:106/A*30:107/A*30:108/A*30:109/A*30:110/A*30:111/A*30:113/A*30:116/A*30:117/A*30:118/A*30:119/A*30:120/A*30:122/A*30:124/A*30:125/A*30:126/A*30:127/A*30:128/A*30:129/A*30:131/A*30:133/A*30:134/A*30:139/A*30:140/A*30:143/A*30:150/A*30:152/A*30:153/A*30:154/A*30:155/A*30:157/A*30:159/A*30:160/A*30:161/A*30:162/A*30:163/A*30:164/A*30:165/A*30:166/A*30:168/A*30:172/A*30:174/A*30:176/A*30:177/A*30:179/A*30:180/A*30:182/A*30:183/A*30:184Q/A*30:185/A*30:186/A*30:188/A*31:01:02G/A*31:01:03/A*31:01:04/A*31:01:05/A*31:01:06/A*31:01:07/A*31:01:08/A*31:01:09/A*31:01:10/A*31:01:11/A*31:01:12/A*31:01:14/A*31:01:15/A*31:01:16/A*31:01:17/A*31:01:18/A*31:01:19/A*31:01:20/A*31:01:21/A*31:01:22/A*31:01:23/A*31:01:24/A*31:01:25/A*31:01:26/A*31:01:27/A*31:01:29/A*31:01:30/A*31:01:31/A*31:01:32/A*31:01:34/A*31:01:36/A*31:01:37/A*31:01:38/A*31:01:39/A*31:01:40/A*31:01:41/A*31:01:44/A*31:02:01G/A*31:03/A*31:04:01G/A*31:04:02/A*31:05/A*31:06/A*31:07/A*31:08/A*31:09/A*31:10/A*31:11/A*31:12/A*31:13/A*31:15/A*31:16/A*31:17/A*31:18/A*31:19/A*31:20/A*31:21/A*31:22/A*31:24/A*31:25/A*31:26/A*31:27/A*31:28/A*31:29/A*31:30/A*31:31/A*31:32/A*31:33/A*31:34/A*31:35/A*31:36/A*31:37/A*31:38/A*31:39/A*31:40/A*31:41/A*31:42/A*31:43/A*31:44/A*31:45/A*31:47/A*31:49/A*31:50/A*31:51/A*31:52/A*31:53/A*31:54/A*31:57/A*31:58/A*31:61/A*31:62/A*31:63/A*31:64/A*31:65/A*31:66/A*31:67/A*31:68/A*31:69/A*31:70/A*31:73/A*31:74/A*31:75/A*31:76/A*31:77/A*31:78/A*31:79/A*31:80/A*31:82/A*31:83/A*31:84/A*31:85/A*31:86/A*31:87/A*31:88/A*31:89/A*31:90/A*31:91/A*31:92/A*31:93/A*31:94/A*31:96/A*31:97/A*31:98/A*31:99/A*31:100/A*31:101/A*31:102/A*31:103/A*31:104/A*31:105/A*31:106/A*31:107/A*31:108/A*31:109/A*31:110/A*31:112/A*31:113/A*31:114/A*31:115/A*31:116/A*31:117/A*31:118/A*31:120/A*31:121/A*31:122/A*31:123/A*31:124/A*31:127/A*31:129/A*31:130/A*31:133/A*31:134/A*31:136/A*31:137/A*31:138/A*31:139/A*31:140/A*31:142/A*31:144/A*31:145/A*31:146/A*31:147/A*31:148/A*31:150/A*31:154/A*31:161/A*31:162/A*31:163/A*31:164/A*31:165/A*31:168/A*31:169/A*31:170/A*31:171/A*31:172/A*31:173/A*31:174/A*31:175/A*31:176/A*31:179/A*31:180/A*31:183/A*31:185/A*31:187/A*31:189/A*31:190/A*31:191/A*31:192/A*31:193/A*31:195/A*32:01:01G/A*32:01:03/A*32:01:04/A*32:01:05/A*32:01:06/A*32:01:07/A*32:01:08/A*32:01:09/A*32:01:10/A*32:01:11/A*32:01:12/A*32:01:13/A*32:01:14/A*32:01:15/A*32:01:16/A*32:01:17/A*32:01:18/A*32:01:19/A*32:01:20/A*32:01:21/A*32:01:22/A*32:01:24/A*32:01:25/A*32:01:26/A*32:01:28/A*32:01:30/A*32:01:31/A*32:01:32/A*32:01:33/A*32:01:37/A*32:01:38/A*32:01:39/A*32:01:40/A*32:01:41/A*32:01:43/A*32:01:45/A*32:02/A*32:03:01G/A*32:04/A*32:05/A*32:06/A*32:07/A*32:08/A*32:09/A*32:10/A*32:11Q/A*32:12/A*32:13/A*32:14/A*32:15/A*32:16/A*32:17/A*32:18/A*32:20/A*32:21/A*32:22/A*32:23/A*32:24/A*32:25/A*32:26:01/A*32:26:02/A*32:28/A*32:29/A*32:30:01/A*32:30:02/A*32:31/A*32:32/A*32:33:01/A*32:33:02/A*32:33:03/A*32:34/A*32:35/A*32:36/A*32:37/A*32:38/A*32:39/A*32:40/A*32:41/A*32:42/A*32:43:01/A*32:43:02/A*32:44/A*32:46:01/A*32:46:02/A*32:47/A*32:49/A*32:50/A*32:51/A*32:52/A*32:55:01/A*32:55:02/A*32:55:03/A*32:57/A*32:58/A*32:59/A*32:60/A*32:62/A*32:63/A*32:64/A*32:65/A*32:66/A*32:67/A*32:69/A*32:70/A*32:71/A*32:72/A*32:73/A*32:75/A*32:76/A*32:77/A*32:78/A*32:79/A*32:80/A*32:81/A*32:82/A*32:83/A*32:84/A*32:85/A*32:86/A*32:87/A*32:88/A*32:89/A*32:90/A*32:91/A*32:93/A*32:94/A*32:95/A*32:96/A*32:97/A*32:98/A*32:99/A*32:100/A*32:101Q/A*32:102/A*32:104/A*32:105/A*32:107/A*32:108/A*32:109/A*32:113/A*32:115/A*32:118/A*32:119/A*32:120/A*32:123/A*32:125/A*32:127/A*32:128/A*32:129/A*32:131/A*32:136/A*32:137/A*32:138/A*32:140/A*32:141/A*32:142/A*32:143/A*32:144/A*32:145/A*32:146/A*32:150/A*32:151/A*33:01:01G/A*33:01:02/A*33:01:03/A*33:01:04/A*33:01:05/A*33:01:06/A*33:01:07/A*33:01:08/A*33:01:10/A*33:01:11/A*33:01:12/A*33:03:01G/A*33:03:02/A*33:03:04/A*33:03:05/A*33:03:06/A*33:03:07/A*33:03:08/A*33:03:09/A*33:03:10/A*33:03:11/A*33:03:12/A*33:03:14/A*33:03:15/A*33:03:16/A*33:03:19/A*33:03:20/A*33:03:21/A*33:03:24/A*33:03:25/A*33:03:26/A*33:03:27/A*33:03:28/A*33:03:29/A*33:03:30/A*33:03:31/A*33:03:32/A*33:03:33/A*33:03:34/A*33:03:35/A*33:03:36/A*33:03:38/A*33:03:39/A*33:03:40/A*33:03:41/A*33:03:42/A*33:03:45/A*33:03:46/A*33:03:47/A*33:04/A*33:05/A*33:06/A*33:07/A*33:08/A*33:09/A*33:10/A*33:11/A*33:12/A*33:13/A*33:14/A*33:16/A*33:17/A*33:18:01/A*33:18:02/A*33:19/A*33:20/A*33:21/A*33:22/A*33:23/A*33:24/A*33:26/A*33:27/A*33:28/A*33:29/A*33:30/A*33:32:01/A*33:32:02/A*33:33/A*33:34/A*33:35/A*33:36/A*33:37/A*33:40:01/A*33:40:02/A*33:41/A*33:42/A*33:43/A*33:45/A*33:46/A*33:47/A*33:48/A*33:49/A*33:50/A*33:51/A*33:52/A*33:53/A*33:54/A*33:55/A*33:56/A*33:57/A*33:58/A*33:59/A*33:60/A*33:61/A*33:62/A*33:63/A*33:64/A*33:65/A*33:66/A*33:67/A*33:68/A*33:69/A*33:70/A*33:71/A*33:72/A*33:75/A*33:76/A*33:78/A*33:79/A*33:81/A*33:86/A*33:87/A*33:88/A*33:89/A*33:90/A*33:91/A*33:92/A*33:93/A*33:94/A*33:95/A*33:97/A*33:98/A*33:99/A*33:100/A*33:101/A*33:102/A*33:103/A*33:104/A*33:105/A*33:106/A*33:107/A*33:108/A*33:109/A*33:110/A*33:112/A*33:113/A*33:114/A*33:115/A*33:116/A*33:117/A*33:118/A*33:119/A*33:120/A*33:121/A*33:122/A*33:124/A*33:125/A*33:126/A*33:127/A*33:128/A*33:130/A*33:131/A*33:132/A*33:133/A*33:134/A*33:135/A*33:137/A*33:138/A*33:139/A*33:141/A*33:142/A*33:144/A*33:147/A*33:149/A*33:150/A*33:152/A*33:155/A*33:158/A*33:159/A*33:161/A*33:162/A*33:164/A*33:165/A*33:166/A*33:167/A*33:168/A*33:173/A*33:175Q/A*33:177/A*33:178/A*33:179/A*33:180/A*33:183/A*33:184/A*33:185/A*33:186/A*33:187/A*33:188/A*33:189/A*33:190/A*33:192/A*33:195/A*33:196/A*33:197/A*33:199/A*33:200/A*33:201/A*33:206/A*33:208/A*33:211/A*33:212/A*74:01:01G/A*74:01:02/A*74:01:03/A*74:01:04/A*74:01:05/A*74:01:06/A*74:01:07/A*74:03:01G/A*74:03:02/A*74:04/A*74:05/A*74:06:01G/A*74:07/A*74:08/A*74:09/A*74:10/A*74:11/A*74:13/A*74:15/A*74:16:01/A*74:16:02/A*74:17/A*74:18/A*74:19/A*74:20/A*74:21/A*74:22/A*74:23/A*74:24/A*74:25/A*74:26/A*74:27/A*74:28/A*74:29/A*74:30/A*74:33/A*74:35/A*74:37/A*74:38/A*74:40/A*74:41 | | DR1403 | G | DRB1*14:03:01G/DRB1*14:03:02 | + | Cw10 | lg | C*03:02g/C*03:04g/C*03:06g/C*03:26g/C*03:28g/C*03:46g | | DR2 | G | DRB1*15:01:01G/DRB1*15:01:02/DRB1*15:01:03/DRB1*15:01:04/DRB1*15:01:05/DRB1*15:01:06/DRB1*15:01:07/DRB1*15:01:08G/DRB1*15:01:09/DRB1*15:01:10/DRB1*15:01:11/DRB1*15:01:12/DRB1*15:01:13/DRB1*15:01:14/DRB1*15:01:15/DRB1*15:01:16/DRB1*15:01:18/DRB1*15:01:19/DRB1*15:01:20/DRB1*15:01:21/DRB1*15:01:22/DRB1*15:01:23/DRB1*15:01:24/DRB1*15:01:25/DRB1*15:01:26/DRB1*15:01:27/DRB1*15:01:28/DRB1*15:01:29/DRB1*15:01:30/DRB1*15:01:31/DRB1*15:01:33/DRB1*15:01:34/DRB1*15:02:01G/DRB1*15:02:02G/DRB1*15:02:03/DRB1*15:02:04/DRB1*15:02:05/DRB1*15:02:06/DRB1*15:02:07/DRB1*15:02:08/DRB1*15:02:09/DRB1*15:02:10/DRB1*15:02:11/DRB1*15:02:12/DRB1*15:02:13/DRB1*15:02:14/DRB1*15:02:15/DRB1*15:02:16/DRB1*15:02:17/DRB1*15:02:18/DRB1*15:02:19/DRB1*15:03:01G/DRB1*15:03:02/DRB1*15:03:05/DRB1*15:04/DRB1*15:05/DRB1*15:06:01/DRB1*15:06:02/DRB1*15:06:03/DRB1*15:06:04/DRB1*15:07:01/DRB1*15:07:02/DRB1*15:07:03/DRB1*15:08/DRB1*15:09/DRB1*15:10/DRB1*15:11:01/DRB1*15:11:02/DRB1*15:12/DRB1*15:13/DRB1*15:14/DRB1*15:15:01/DRB1*15:15:02/DRB1*15:15:03/DRB1*15:16/DRB1*15:18/DRB1*15:19/DRB1*15:20/DRB1*15:21/DRB1*15:22/DRB1*15:23/DRB1*15:24/DRB1*15:25/DRB1*15:26/DRB1*15:27/DRB1*15:28/DRB1*15:29/DRB1*15:30/DRB1*15:31:01/DRB1*15:31:02/DRB1*15:32/DRB1*15:33/DRB1*15:34/DRB1*15:35/DRB1*15:36/DRB1*15:37:01/DRB1*15:37:02/DRB1*15:38/DRB1*15:39/DRB1*15:40/DRB1*15:41/DRB1*15:42/DRB1*15:43/DRB1*15:44/DRB1*15:45/DRB1*15:46/DRB1*15:47/DRB1*15:48/DRB1*15:49/DRB1*15:51/DRB1*15:52/DRB1*15:53/DRB1*15:54/DRB1*15:55/DRB1*15:56/DRB1*15:57/DRB1*15:58/DRB1*15:59/DRB1*15:60/DRB1*15:61/DRB1*15:62/DRB1*15:63/DRB1*15:64/DRB1*15:65/DRB1*15:66:01/DRB1*15:66:02/DRB1*15:67/DRB1*15:68/DRB1*15:69/DRB1*15:70/DRB1*15:71/DRB1*15:72/DRB1*15:73/DRB1*15:74/DRB1*15:75/DRB1*15:76/DRB1*15:77/DRB1*15:78/DRB1*15:79/DRB1*15:81/DRB1*15:82/DRB1*15:83/DRB1*15:84/DRB1*15:85/DRB1*15:86/DRB1*15:87/DRB1*15:88/DRB1*15:89/DRB1*15:90/DRB1*15:91/DRB1*15:92/DRB1*15:93/DRB1*15:94/DRB1*15:95/DRB1*15:96/DRB1*15:97/DRB1*15:98/DRB1*15:99/DRB1*15:100/DRB1*15:101/DRB1*15:102/DRB1*15:103/DRB1*15:104:01/DRB1*15:104:02/DRB1*15:104:03/DRB1*15:105:01/DRB1*15:105:02/DRB1*15:106/DRB1*15:107/DRB1*15:108/DRB1*15:109/DRB1*15:110/DRB1*15:111/DRB1*15:112/DRB1*15:114/DRB1*15:116/DRB1*15:117/DRB1*15:118/DRB1*15:119/DRB1*15:120/DRB1*15:121/DRB1*15:122/DRB1*15:123/DRB1*15:124/DRB1*15:125/DRB1*15:126/DRB1*15:127/DRB1*15:128/DRB1*15:130/DRB1*15:131/DRB1*15:132/DRB1*15:133/DRB1*15:135/DRB1*15:136/DRB1*15:139/DRB1*15:142/DRB1*15:143/DRB1*15:144/DRB1*15:147/DRB1*15:150/DRB1*15:152/DRB1*15:153/DRB1*15:155/DRB1*15:156/DRB1*15:157/DRB1*15:158/DRB1*15:161/DRB1*15:162/DRB1*15:164Q/DRB1*15:165/DRB1*15:167/DRB1*15:168/DRB1*15:169/DRB1*15:170/DRB1*15:172/DRB1*15:175/DRB1*15:181/DRB1*15:187/DRB1*15:189/DRB1*15:190/DRB1*15:192/DRB1*16:01:01G/DRB1*16:01:02/DRB1*16:01:03/DRB1*16:01:04/DRB1*16:01:05/DRB1*16:01:06/DRB1*16:01:07/DRB1*16:01:08/DRB1*16:01:09/DRB1*16:01:10/DRB1*16:01:11/DRB1*16:01:12/DRB1*16:01:13/DRB1*16:01:14/DRB1*16:01:16/DRB1*16:02:01G/DRB1*16:02:02/DRB1*16:02:03/DRB1*16:02:04/DRB1*16:02:05/DRB1*16:02:06/DRB1*16:02:07/DRB1*16:02:08/DRB1*16:02:10/DRB1*16:03/DRB1*16:04:01/DRB1*16:04:02/DRB1*16:05:01/DRB1*16:05:02/DRB1*16:07/DRB1*16:08/DRB1*16:09:01/DRB1*16:09:02/DRB1*16:10:01/DRB1*16:10:02/DRB1*16:11/DRB1*16:12/DRB1*16:14/DRB1*16:15/DRB1*16:16/DRB1*16:17/DRB1*16:18/DRB1*16:19/DRB1*16:20/DRB1*16:22/DRB1*16:23/DRB1*16:24/DRB1*16:25/DRB1*16:26/DRB1*16:27/DRB1*16:28/DRB1*16:29/DRB1*16:30/DRB1*16:31/DRB1*16:32/DRB1*16:33/DRB1*16:34/DRB1*16:35/DRB1*16:36/DRB1*16:37/DRB1*16:38:01/DRB1*16:38:02/DRB1*16:39/DRB1*16:40/DRB1*16:42/DRB1*16:43/DRB1*16:44/DRB1*16:45/DRB1*16:46/DRB1*16:47/DRB1*16:48/DRB1*16:49/DRB1*16:50/DRB1*16:53/DRB1*16:56/DRB1*16:58/DRB1*16:59Q/DRB1*16:60/DRB1*16:61/DRB1*16:65/DRB1*16:66/DRB1*16:67/DRB1*16:68 | diff --git a/tests/steps/redux_allele.py b/tests/steps/redux_allele.py index 0d43506..fffc37b 100644 --- a/tests/steps/redux_allele.py +++ b/tests/steps/redux_allele.py @@ -21,6 +21,8 @@ def step_impl(context, level): @then('the reduced allele is found to be {redux_allele}') def step_impl(context, redux_allele): + if redux_allele == 'X': + redux_allele = '' assert_that(context.redux_allele, is_(redux_allele)) From 796acfe137bccc3619caf5b181a2683e115008b6 Mon Sep 17 00:00:00 2001 From: Martin Maiers Date: Wed, 16 Jun 2021 13:57:01 -0500 Subject: [PATCH 3/9] =?UTF-8?q?Bump=20version:=200.6.3=20=E2=86=92=200.6.4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyard/__init__.py | 2 +- setup.cfg | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyard/__init__.py b/pyard/__init__.py index 518f990..5341080 100644 --- a/pyard/__init__.py +++ b/pyard/__init__.py @@ -24,4 +24,4 @@ from .pyard import ARD __author__ = """NMDP Bioinformatics""" -__version__ = '0.6.3' +__version__ = '0.6.4' diff --git a/setup.cfg b/setup.cfg index dc03b28..09c0516 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.3 +current_version = 0.6.4 commit = True tag = True diff --git a/setup.py b/setup.py index 90f6cc5..af4889e 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ setup( name='py-ard', - version='0.6.3', + version='0.6.4', description="ARD reduction for HLA with Python", long_description=readme + '\n\n' + history, author="CIBMTR", From edb498e5527b952d626a93fac61b0a4eec739061 Mon Sep 17 00:00:00 2001 From: Martin Maiers Date: Thu, 29 Jul 2021 20:32:21 -0500 Subject: [PATCH 4/9] W level rollups --- pyard/data_repository.py | 53 +++++++++++++++++++++++++++++++++++++--- pyard/pyard.py | 24 +++++++++++++++--- 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/pyard/data_repository.py b/pyard/data_repository.py index 4742cbf..ccd85ca 100644 --- a/pyard/data_repository.py +++ b/pyard/data_repository.py @@ -76,7 +76,7 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version): lg_group = db.load_dict(db_connection, table_name='lg_group', columns=('allele', 'lg')) lgx_group = db.load_dict(db_connection, table_name='lgx_group', columns=('allele', 'lgx')) return ARSMapping(dup_g=dup_g, dup_lg=dup_lg, dup_lgx=dup_lgx, - g_group=g_group, lg_group=lg_group, lgx_group=lgx_group) + g_group=g_group, lg_group=lg_group, lgx_group=lgx_group) ars_url = f'{IMGT_HLA_URL}{imgt_version}/wmda/hla_nom_g.txt' df = pd.read_csv(ars_url, skiprows=6, names=["Locus", "A", "G"], sep=";").dropna() @@ -154,7 +154,7 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version): g_group=g_group, lg_group=lg_group, lgx_group=lgx_group) -def generate_alleles_and_xx_codes(db_connection: sqlite3.Connection, imgt_version): +def generate_alleles_and_xx_codes_and_who(db_connection: sqlite3.Connection, imgt_version): """ Checks to see if there's already an allele list file for the `imgt_version` in the `data_dir` directory. If not, will download the file and create @@ -186,10 +186,17 @@ def generate_alleles_and_xx_codes(db_connection: sqlite3.Connection, imgt_versio if db.table_exists(db_connection, 'alleles'): valid_alleles = db.load_set(db_connection, 'alleles') + who_alleles = db.load_set(db_connection, 'who_alleles') + + who_codes = db.load_dict(db_connection, 'who_group', + ('who', 'allele_list')) + who_codes = {k: v.split('/') for k, v in who_codes.items()} + xx_codes = db.load_dict(db_connection, 'xx_codes', ('allele_1d', 'allele_list')) xx_codes = {k: v.split('/') for k, v in xx_codes.items()} - return valid_alleles, xx_codes + + return valid_alleles, who_alleles, xx_codes, who_codes # Create a Pandas DataFrame from the mac_code list file # Skip the header (first 6 lines) and use only the Allele column @@ -203,6 +210,8 @@ def generate_alleles_and_xx_codes(db_connection: sqlite3.Connection, imgt_versio # All 2-field, 3-field and the original Alleles are considered valid alleles allele_df['2d'] = allele_df['Allele'].apply(get_2field_allele) allele_df['3d'] = allele_df['Allele'].apply(get_3field_allele) + # this says all 3rd and 2nd field versions of longer alleles are valid + who_alleles = set(allele_df['Allele']) valid_alleles = set(allele_df['Allele']). \ union(set(allele_df['2d'])). \ union(set(allele_df['3d'])) @@ -216,6 +225,9 @@ def generate_alleles_and_xx_codes(db_connection: sqlite3.Connection, imgt_versio .apply(lambda x: list(x['Allele'])) \ .to_dict() + # Save this version of the who + #db.save_set(db_connection, 'who', valid_alleles, 'allele') + # Update xx codes with broads and splits for broad, splits in broad_splits_dna_mapping.items(): for split in splits: @@ -226,13 +238,46 @@ def generate_alleles_and_xx_codes(db_connection: sqlite3.Connection, imgt_versio # Save this version of the valid alleles db.save_set(db_connection, 'alleles', valid_alleles, 'allele') + # Save this version of the who alleles + db.save_set(db_connection, 'who_alleles', who_alleles, 'allele') # Save this version of xx codes flat_xx_codes = {k: '/'.join(sorted(v, key=functools.cmp_to_key(smart_sort_comparator))) for k, v in xx_codes.items()} db.save_dict(db_connection, 'xx_codes', flat_xx_codes, ('allele_1d', 'allele_list')) - return valid_alleles, xx_codes + # W H O + + # Create who mapping from the unique alleles in the 2-field column + who_df1 = pd.DataFrame(allele_df['Allele'].unique(), columns=['Allele']) + who_df1['1d'] = allele_df['Allele'].apply(lambda x: x.split(":")[0]) + who_df2 = pd.DataFrame(allele_df['Allele'].unique(), columns=['Allele']) + who_df2['2d'] = allele_df['Allele'].apply(get_2field_allele) + who_df3 = pd.DataFrame(allele_df['Allele'].unique(), columns=['Allele']) + who_df3['3d'] = allele_df['Allele'].apply(get_3field_allele) + + # make one df + who_df1.rename(columns = {'1d':'input'}, inplace = True) + who_df2.rename(columns = {'2d':'input'}, inplace = True) + who_df3.rename(columns = {'3d':'input'}, inplace = True) + who_codes = pd.concat([who_df1, who_df2, who_df3]) + + # remove valid alleles from who_codes to avoid recursion + # there is a more pythonic way to do this for sure + for k in who_alleles: + if k in who_codes['input']: + who_codes.drop(labels=k, axis='index') + + # who_codes maps a first field name to its 2 field expansion + who_group = who_codes.groupby(['input']).apply(lambda x: list(x['Allele'])).to_dict() + + # dictionary + flat_who_group= {k: '/'.join(sorted(v, key=functools.cmp_to_key(smart_sort_comparator))) + for k, v in who_group.items()} + db.save_dict(db_connection, table_name='who_group', dictionary=flat_who_group, columns=('who', 'allele_list')) + + + return valid_alleles, who_alleles, xx_codes, who_codes def generate_mac_codes(db_connection: sqlite3.Connection, refresh_mac: bool): diff --git a/pyard/pyard.py b/pyard/pyard.py index e7447b4..cc9f81a 100644 --- a/pyard/pyard.py +++ b/pyard/pyard.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- # # py-ard -# Copyright (c) 2020 Be The Match operated by National Marrow Donor Program. All Rights Reserved. +# Copyright (c) 2020 Be The Match operated by National Marrow Donor Program. +# All Rights Reserved. # # This library is free software; you can redistribute it and/or modify it # under the terms of the GNU Lesser General Public License as published @@ -27,7 +28,7 @@ from . import db from .data_repository import generate_ars_mapping, \ - generate_mac_codes, generate_alleles_and_xx_codes, \ + generate_mac_codes, generate_alleles_and_xx_codes_and_who, \ generate_serology_mapping, generate_v2_to_v3_mapping from .db import is_valid_mac_code, mac_code_to_alleles, v2_to_v3_allele from .smart_sort import smart_sort_comparator @@ -62,7 +63,7 @@ def __init__(self, imgt_version: str = 'Latest', # Load MAC codes generate_mac_codes(self.db_connection, refresh_mac) # Load Alleles and XX Codes - self.valid_alleles, self.xx_codes = generate_alleles_and_xx_codes(self.db_connection, imgt_version) + self.valid_alleles, self.who_alleles, self.xx_codes, self.who_group = generate_alleles_and_xx_codes_and_who(self.db_connection, imgt_version) # Load ARS mappings self.ars_mappings = generate_ars_mapping(self.db_connection, imgt_version) # Load Serology mappings @@ -98,7 +99,6 @@ def redux(self, allele: str, ars_type: str) -> str: :return: ARS reduced allele :rtype: str """ - # deal with leading 'HLA-' if HLA_regex.search(allele): hla, allele_name = allele.split("-") @@ -135,6 +135,14 @@ def redux(self, allele: str, ars_type: str) -> str: # for 'lgx' when allele is not in G group, # return allele with only first 2 field return ':'.join(allele.split(':')[0:2]) + elif ars_type == "W": + # new ars_type which is full WHO expansion + if self._is_who_allele(allele): + return allele + if allele in self.who_group: + return self.redux_gl("/".join(self.who_group[allele]), ars_type) + else: + return allele else: if self._remove_invalid: if self._is_valid_allele(allele): @@ -265,6 +273,14 @@ def is_v2(allele: str) -> bool: """ return '*' in allele and ':' not in allele + def _is_who_allele(self, allele): + """ + Test if allele is a WHO allele in the current imgt database + :param allele: Allele to test + :return: bool to indicate if allele is valid + """ + return allele in self.who_alleles + def _is_valid_allele(self, allele): """ Test if allele is valid in the current imgt database From 8fabb961bf571fca9cdea64cd8811c692877242b Mon Sep 17 00:00:00 2001 From: Martin Maiers Date: Thu, 29 Jul 2021 23:36:21 -0500 Subject: [PATCH 5/9] remove v2 rule due to it leads to infinite recursion --- pyard/pyard.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pyard/pyard.py b/pyard/pyard.py index cc9f81a..2feec0f 100644 --- a/pyard/pyard.py +++ b/pyard/pyard.py @@ -381,9 +381,12 @@ def isvalid(self, allele: str) -> bool: """ if allele == '': return False + + # this leads to an infinte recursion if the input matches these patterns + # but is not ultimately valid + if not self.is_mac(allele) and \ - not self.is_serology(allele) and \ - not self.is_v2(allele): + not self.is_serology(allele): # Alleles ending with P or G are valid_alleles if allele.endswith(('P', 'G')): # remove the last character From 168504acb6b884bd7b12928f4eec5daaa13c1788 Mon Sep 17 00:00:00 2001 From: Martin Maiers Date: Thu, 29 Jul 2021 23:40:05 -0500 Subject: [PATCH 6/9] remove freeze because its give smodule 'gc' has no attribute 'freeze' error --- pyard/pyard.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pyard/pyard.py b/pyard/pyard.py index 2feec0f..d73cbb9 100644 --- a/pyard/pyard.py +++ b/pyard/pyard.py @@ -74,9 +74,6 @@ def __init__(self, imgt_version: str = 'Latest', # Close the current read-write db connection self.db_connection.close() - # reference data is read-only and can be frozen - gc.freeze() - # Re-open the connection in read-only mode as we're not updating it anymore self.db_connection = db.create_db_connection(data_dir, imgt_version, ro=True) @@ -382,8 +379,9 @@ def isvalid(self, allele: str) -> bool: if allele == '': return False + # removed the test for is_v2() # this leads to an infinte recursion if the input matches these patterns - # but is not ultimately valid + # but is not ultimately valid e.g. DRB3*NNNN if not self.is_mac(allele) and \ not self.is_serology(allele): From 2b7797d12da97d4ed37f45b35461eedd37126804 Mon Sep 17 00:00:00 2001 From: Martin Maiers Date: Thu, 5 Aug 2021 10:14:21 -0500 Subject: [PATCH 7/9] added exon resolution group --- pyard/data_repository.py | 13 ++++++++----- pyard/pyard.py | 6 ++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/pyard/data_repository.py b/pyard/data_repository.py index 6ad47c2..2b55c84 100644 --- a/pyard/data_repository.py +++ b/pyard/data_repository.py @@ -38,7 +38,7 @@ # List of expression characters expression_chars = ['N', 'Q', 'L', 'S'] -ars_mapping_tables = ['dup_g', 'dup_lg', 'dup_lgx', 'g_group', 'lg_group', 'lgx_group'] +ars_mapping_tables = ['dup_g', 'dup_lg', 'dup_lgx', 'g_group', 'lg_group', 'lgx_group', 'exon_group'] ARSMapping = namedtuple("ARSMapping", ars_mapping_tables) @@ -75,8 +75,8 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version): g_group = db.load_dict(db_connection, table_name='g_group', columns=('allele', 'g')) lg_group = db.load_dict(db_connection, table_name='lg_group', columns=('allele', 'lg')) lgx_group = db.load_dict(db_connection, table_name='lgx_group', columns=('allele', 'lgx')) - return ARSMapping(dup_g=dup_g, dup_lg=dup_lg, dup_lgx=dup_lgx, - g_group=g_group, lg_group=lg_group, lgx_group=lgx_group) + exon_group = db.load_dict(db_connection, table_name='exon_group', columns=('allele', 'exon')) + return ARSMapping(dup_g=dup_g, dup_lg=dup_lg, dup_lgx=dup_lgx, g_group=g_group, lg_group=lg_group, lgx_group=lgx_group, exon_group=exon_group) ars_url = f'{IMGT_HLA_URL}{imgt_version}/wmda/hla_nom_g.txt' df = pd.read_csv(ars_url, skiprows=6, names=["Locus", "A", "G"], sep=";").dropna() @@ -143,15 +143,18 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version): ]) lgx_group = df_lgx.set_index('A')['lgx'].to_dict() + df_exon = pd.concat([ df[['A', '3d']].rename(columns={'3d': 'exon'}), ]) + exon_group = df_exon.set_index('A')['exon'].to_dict() + db.save_dict(db_connection, table_name='dup_g', dictionary=dup_g, columns=('allele', 'g_group')) db.save_dict(db_connection, table_name='dup_lg', dictionary=dup_lg, columns=('allele', 'lg_group')) db.save_dict(db_connection, table_name='dup_lgx', dictionary=dup_lgx, columns=('allele', 'lgx_group')) db.save_dict(db_connection, table_name='g_group', dictionary=g_group, columns=('allele', 'g')) db.save_dict(db_connection, table_name='lg_group', dictionary=lg_group, columns=('allele', 'lg')) db.save_dict(db_connection, table_name='lgx_group', dictionary=lgx_group, columns=('allele', 'lgx')) + db.save_dict(db_connection, table_name='exon_group', dictionary=exon_group, columns=('allele', 'exon')) - return ARSMapping(dup_g=dup_g, dup_lg=dup_lg, dup_lgx=dup_lgx, - g_group=g_group, lg_group=lg_group, lgx_group=lgx_group) + return ARSMapping(dup_g=dup_g, dup_lg=dup_lg, dup_lgx=dup_lgx, g_group=g_group, lg_group=lg_group, lgx_group=lgx_group, exon_group=exon_group) def generate_alleles_and_xx_codes(db_connection: sqlite3.Connection, imgt_version): diff --git a/pyard/pyard.py b/pyard/pyard.py index 04368d0..4e14709 100644 --- a/pyard/pyard.py +++ b/pyard/pyard.py @@ -135,6 +135,12 @@ def redux(self, allele: str, ars_type: str) -> str: # for 'lgx' when allele is not in G group, # return allele with only first 2 field return ':'.join(allele.split(':')[0:2]) + elif ars_type == "exon": + if allele in self.ars_mappings.exon_group: + return self.ars_mappings.exon_group[allele] + else: + # for 'exon' return allele with only first 3 fields + return ':'.join(allele.split(':')[0:3]) else: if self._remove_invalid: if self._is_valid_allele(allele): From ff392f1b7d36b1c728cefd884a9ba41b2fdcb57d Mon Sep 17 00:00:00 2001 From: Martin Maiers Date: Thu, 5 Aug 2021 10:36:15 -0500 Subject: [PATCH 8/9] increase cache size --- pyard/pyard.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyard/pyard.py b/pyard/pyard.py index af99f4d..0d0af5c 100644 --- a/pyard/pyard.py +++ b/pyard/pyard.py @@ -84,7 +84,7 @@ def __del__(self): """ self.db_connection.close() - @functools.lru_cache(maxsize=1000) + @functools.lru_cache(maxsize=1000000) def redux(self, allele: str, ars_type: str) -> str: """ Does ARS reduction with allele and ARS type @@ -155,7 +155,7 @@ def redux(self, allele: str, ars_type: str) -> str: else: return allele - @functools.lru_cache(maxsize=1000) + @functools.lru_cache(maxsize=1000000) def redux_gl(self, glstring: str, redux_type: str) -> str: """ Does ARS reduction with gl string and ARS type From 5212b0084ff1e0a04d6e9c7ba586371f430e6d3f Mon Sep 17 00:00:00 2001 From: Martin Maiers Date: Mon, 9 Aug 2021 22:08:35 -0500 Subject: [PATCH 9/9] expression characters don't go on shortened names --- pyard/data_repository.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyard/data_repository.py b/pyard/data_repository.py index 455626d..ba58d4e 100644 --- a/pyard/data_repository.py +++ b/pyard/data_repository.py @@ -54,7 +54,10 @@ def get_n_field_allele(allele: str, n: int) -> str: last_char = allele[-1] fields = allele.split(':') if last_char in expression_chars and len(fields) > n: - return ':'.join(fields[0:n]) + last_char + + # don't actually do this; it makes things like A*02:01:01L which is invalid + #return ':'.join(fields[0:n]) + last_char + return ':'.join(fields[0:n]) else: return ':'.join(fields[0:n])