From ce9aabb236e0448e4199c23a8d6607a1ed12a15a Mon Sep 17 00:00:00 2001
From: Martin Maiers <mmaiers@nmdp.org>
Date: Tue, 31 May 2022 07:36:02 -0500
Subject: [PATCH 1/7] works but not for G mode

---
 pyard/data_repository.py | 35 ++++++++++++++++++++++-------------
 pyard/misc.py            |  8 ++++++++
 pyard/pyard.py           |  3 ++-
 3 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/pyard/data_repository.py b/pyard/data_repository.py
index 806ffe7..5700cba 100644
--- a/pyard/data_repository.py
+++ b/pyard/data_repository.py
@@ -62,7 +62,7 @@ def expression_reduce(df):
     return None
 
 
-def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
+def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version, ping):
     if db.tables_exist(db_connection, ars_mapping_tables):
         dup_g = db.load_dict(db_connection, table_name='dup_g', columns=('allele', 'g_group'))
         dup_lg = db.load_dict(db_connection, table_name='dup_lg', columns=('allele', 'lg_group'))
@@ -76,8 +76,24 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
                           g_group=g_group, lg_group=lg_group,
                           lgx_group=lgx_group, exon_group=exon_group, p_group=p_group)
 
+    # P groups
+    ars_P_url = f'{IMGT_HLA_URL}{imgt_version}/wmda/hla_nom_p.txt'
+    df_P = pd.read_csv(ars_P_url, skiprows=6, names=["Locus", "A", "P"], sep=";").dropna()
+    df_P['A'] = df_P['A'].apply(lambda a: a.split('/'))
+    df_P = df_P.explode('A')
+    df_P['A'] = df_P['Locus'] + df_P['A']
+    df_P['P'] = df_P['Locus'] + df_P['P']
+    p_group = df_P.set_index('A')['P'].to_dict()
+
+
     ars_G_url = f'{IMGT_HLA_URL}{imgt_version}/wmda/hla_nom_g.txt'
-    df = pd.read_csv(ars_G_url, skiprows=6, names=["Locus", "A", "G"], sep=";").dropna()
+    df_G = pd.read_csv(ars_G_url, skiprows=6, names=["Locus", "A", "G"], sep=";").dropna()
+    if ping:
+        # put the P codes in the G-codes early to catch C*06:17 -> C*06:02
+        df_PinG = pd.read_csv(ars_P_url, skiprows=6, names=["Locus", "A", "G"], sep=";").dropna()
+        df = pd.concat([df_PinG, df_G])
+    else:
+        df = df_G
 
     df['A'] = df['A'].apply(lambda a: a.split('/'))
     df = df.explode('A')
@@ -86,8 +102,10 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
 
     df['2d'] = df['A'].apply(get_2field_allele)
     df['3d'] = df['A'].apply(get_3field_allele)
-    df['lg'] = df['G'].apply(lambda a: ":".join(a.split(":")[0:2]) + "g")
-    df['lgx'] = df['G'].apply(lambda a: ":".join(a.split(":")[0:2]))
+    #df['lg'] = df['G'].apply(lambda a: ":".join(a.split(":")[0:2]) + "g")
+    df['lg'] = df['G'].apply(lambda a: get_2field_allele(a) + "g")
+    #df['lgx'] = df['G'].apply(lambda a: ":".join(a.split(":")[0:2]))
+    df['lgx'] = df['G'].apply(get_2field_allele)
 
     # multiple Gs
     mg = df.drop_duplicates(['2d', 'G'])['2d'].value_counts()
@@ -145,15 +163,6 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
     df_exon = pd.concat([df[['A', '3d']].rename(columns={'3d': 'exon'}), ])
     exon_group = df_exon.set_index('A')['exon'].to_dict()
 
-    # P groups
-    ars_P_url = f'{IMGT_HLA_URL}{imgt_version}/wmda/hla_nom_p.txt'
-    df_P = pd.read_csv(ars_P_url, skiprows=6, names=["Locus", "A", "P"], sep=";").dropna()
-    df_P['A'] = df_P['A'].apply(lambda a: a.split('/'))
-    df_P = df_P.explode('A')
-    df_P['A'] = df_P['Locus'] + df_P['A']
-    df_P['P'] = df_P['Locus'] + df_P['P']
-    p_group = df_P.set_index('A')['P'].to_dict()
-
     # save
     db.save_dict(db_connection, table_name='dup_g', dictionary=dup_g, columns=('allele', 'g_group'))
     db.save_dict(db_connection, table_name='dup_lg', dictionary=dup_lg, columns=('allele', 'lg_group'))
diff --git a/pyard/misc.py b/pyard/misc.py
index 5e212c9..c2886a3 100644
--- a/pyard/misc.py
+++ b/pyard/misc.py
@@ -1,5 +1,6 @@
 # List of expression characters
 expression_chars = ['N', 'Q', 'L', 'S']
+PandG_chars = ['P', 'G']
 
 
 def get_n_field_allele(allele: str, n: int, preserve_expression=False) -> str:
@@ -21,10 +22,17 @@ def get_n_field_allele(allele: str, n: int, preserve_expression=False) -> str:
 
 
 def get_3field_allele(a: str) -> str:
+    last_char = a[-1]
+    if last_char in PandG_chars:
+        a = a[:-1]
+    
     return get_n_field_allele(a, 3)
 
 
 def get_2field_allele(a: str) -> str:
+    last_char = a[-1]
+    if last_char in PandG_chars:
+        a = a[:-1]
     return get_n_field_allele(a, 2)
 
 
diff --git a/pyard/pyard.py b/pyard/pyard.py
index 7d2945e..2284728 100644
--- a/pyard/pyard.py
+++ b/pyard/pyard.py
@@ -48,6 +48,7 @@
     "reduce_XX": True,
     "reduce_MAC": True,
     "reduce_shortnull": True,
+    "ping": False,
     "map_drb345_to_drbx": True,
     "verbose_log": True
 }
@@ -99,7 +100,7 @@ def __init__(self, imgt_version: str = 'Latest', data_dir: str = None, config: d
         # Load MAC codes
         dr.generate_mac_codes(self.db_connection, False)
         # Load ARS mappings
-        self.ars_mappings = dr.generate_ars_mapping(self.db_connection, imgt_version)
+        self.ars_mappings = dr.generate_ars_mapping(self.db_connection, imgt_version, self._config['ping'])
         # Load Alleles and XX Codes
         self.valid_alleles, self.who_alleles, self.xx_codes, self.who_group, self.shortnulls, self.exp_alleles = \
             dr.generate_alleles_and_xx_codes_and_who(self.db_connection, imgt_version, self.ars_mappings)

From 65105ce01e14bae33658c6ec0244c61419e2d942 Mon Sep 17 00:00:00 2001
From: Martin Maiers <mmaiers@nmdp.org>
Date: Tue, 27 Sep 2022 08:20:08 -0500
Subject: [PATCH 2/7] ping mode

---
 pyard/data_repository.py | 75 ++++++++++++++++++++++++++++++++--------
 pyard/misc.py            | 24 +++++++++++++
 pyard/pyard.py           | 13 +++++--
 3 files changed, 95 insertions(+), 17 deletions(-)

diff --git a/pyard/data_repository.py b/pyard/data_repository.py
index edd1563..8b2638b 100644
--- a/pyard/data_repository.py
+++ b/pyard/data_repository.py
@@ -30,7 +30,7 @@
 from .broad_splits import broad_splits_dna_mapping
 from .broad_splits import broad_splits_ser_mapping
 from .misc import get_2field_allele, get_3field_allele, number_of_fields
-from .misc import expression_chars
+from .misc import expression_chars, get_G_name, get_P_name
 
 # GitHub URL where IMGT HLA files are downloaded.
 from pyard.smart_sort import smart_sort_comparator
@@ -46,6 +46,7 @@
     "lgx_group",
     "exon_group",
     "p_group",
+    "p_not_g",
 ]
 ARSMapping = namedtuple("ARSMapping", ars_mapping_tables)
 
@@ -76,7 +77,7 @@ def expression_reduce(df):
     return None
 
 
-def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version, ping):
+def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
     if db.tables_exist(db_connection, ars_mapping_tables):
         dup_g = db.load_dict(
             db_connection, table_name="dup_g", columns=("allele", "g_group")
@@ -102,6 +103,9 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version, ping):
         p_group = db.load_dict(
             db_connection, table_name="p_group", columns=("allele", "p")
         )
+        p_not_g = db.load_dict(
+            db_connection, table_name="p_not_g", columns=("allele", "lgx")
+        )
         return ARSMapping(
             dup_g=dup_g,
             dup_lg=dup_lg,
@@ -111,28 +115,45 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version, ping):
             lgx_group=lgx_group,
             exon_group=exon_group,
             p_group=p_group,
+            p_not_g=p_not_g,
         )
 
-    # P groups
+
+    # load the hla_nom_g.txt
+    ars_G_url = f'{IMGT_HLA_URL}{imgt_version}/wmda/hla_nom_g.txt'
+    df = pd.read_csv(ars_G_url, skiprows=6, names=["Locus", "A", "G"], sep=";").dropna()
+
+    # the G-group is named for its first allele
+    df["G"] = df["A"].apply(lambda a: get_G_name(a)) 
+
+    # load the hla_nom_p.txt
     ars_P_url = f'{IMGT_HLA_URL}{imgt_version}/wmda/hla_nom_p.txt'
+    # example: C*;06:06:01:01/06:06:01:02/06:271;06:06P
     df_P = pd.read_csv(ars_P_url, skiprows=6, names=["Locus", "A", "P"], sep=";").dropna()
-    df_P['A'] = df_P['A'].apply(lambda a: a.split('/'))
-    df_P = df_P.explode('A')
+
+    # the P-group is named for its first allele
+    df_P["P"] = df_P["A"].apply(lambda a: get_P_name(a)) 
+
+    # convert slash delimited string to a list
+    df_P["A"] = df_P["A"].apply(lambda a: a.split("/"))
+    df_P = df_P.explode("A")
+    # C* 06:06:01:01/06:06:01:02/06:271 06:06P
     df_P['A'] = df_P['Locus'] + df_P['A']
     df_P['P'] = df_P['Locus'] + df_P['P']
+    # C* 06:06:01:01 06:06P
+    # C* 06:06:01:02 06:06P
+    # C* 06:271 06:06P
     p_group = df_P.set_index('A')['P'].to_dict()
+    df_P["2d"] = df_P["A"].apply(get_2field_allele)
+    # lgx has the P-group name without the P for comparison 
+    df_P["lgx"] = df_P["P"].apply(get_2field_allele)
 
-    ars_G_url = f'{IMGT_HLA_URL}{imgt_version}/wmda/hla_nom_g.txt'
-    df_G = pd.read_csv(ars_G_url, skiprows=6, names=["Locus", "A", "G"], sep=";").dropna()
-    if ping:
-        # put the P codes in the G-codes early to catch C*06:17 -> C*06:02
-        df_PinG = pd.read_csv(ars_P_url, skiprows=6, names=["Locus", "A", "G"], sep=";").dropna()
-        df = pd.concat([df_PinG, df_G])
-    else:
-        df = df_G
-
+    # convert slash delimited string to a list
     df["A"] = df["A"].apply(lambda a: a.split("/"))
+    # convert the list into separate rows for each element
     df = df.explode("A")
+
+    #  A*   + 02:01   = A*02:01
     df["A"] = df["Locus"] + df["A"]
     df["G"] = df["Locus"] + df["G"]
 
@@ -141,8 +162,25 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version, ping):
     df["lg"] = df["G"].apply(lambda a: ":".join(a.split(":")[0:2]) + "g")
     df["lgx"] = df["G"].apply(lambda a: ":".join(a.split(":")[0:2]))
 
+    # compare df_P["2d"] with df["2d"] to find 2-field alleles in the 
+    # P-group that aren't in the G-group
+    PnotinG = set(df_P["2d"]) - set(df["2d"])
+
+    # filter to find these 2-field alleles (2d) in the P-group data frame
+    df_PnotG = df_P[df_P["2d"].isin(PnotinG)]
+
+    # dictionary which will define the table
+    p_not_g = df_PnotG.set_index("A")["lgx"].to_dict()
+
+
     # multiple Gs
+    # goal: identify 2-field alleles that are in multiple G-groups
+
+    # group by 2d and G, and select the 2d column and count the columns
     mg = df.drop_duplicates(["2d", "G"])["2d"].value_counts()
+    # filter out the mg with count > 1, leaving only duplicates
+    # take the index from the 2d version the data frame, make that a column
+    # and turn that into a list
     multiple_g_list = mg[mg > 1].reset_index()["index"].to_list()
 
     # Keep only the alleles that have more than 1 mapping
@@ -218,6 +256,12 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version, ping):
     exon_group = df_exon.set_index("A")["exon"].to_dict()
 
     # save
+    db.save_dict(
+        db_connection,
+        table_name="p_not_g",
+        dictionary=p_not_g,
+        columns=("allele", "lgx"),
+    )
     db.save_dict(
         db_connection,
         table_name="dup_g",
@@ -260,7 +304,7 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version, ping):
     db.save_dict(
         db_connection,
         table_name="p_group",
-        dictionary=exon_group,
+        dictionary=p_group,
         columns=("allele", "p"),
     )
 
@@ -273,6 +317,7 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version, ping):
         lgx_group=lgx_group,
         exon_group=exon_group,
         p_group=p_group,
+        p_not_g=p_not_g,
     )
 
 
diff --git a/pyard/misc.py b/pyard/misc.py
index 976f775..58a0229 100644
--- a/pyard/misc.py
+++ b/pyard/misc.py
@@ -37,3 +37,27 @@ def get_2field_allele(a: str) -> str:
 
 def number_of_fields(allele: str) -> int:
     return len(allele.split(":"))
+
+# computes a valid G name based on the ambiguity string
+def get_G_name(a: str) -> str:
+    a = a.split('/')[0]
+    last_char = a[-1]
+    if last_char in PandG_chars + expression_chars:
+        a = a[:-1]
+    if len(a.split(':')) ==2:
+      return ':'.join([a,"01"])
+    else:
+      return ':'.join(a.split(':')[0:3]) + "G"
+      
+# computes a valid P name based on the ambiguity string
+def get_P_name(a: str) -> str:
+    a = a.split('/')[0]
+    last_char = a[-1]
+    if last_char in PandG_chars + expression_chars:
+        a = a[:-1]
+    return ':'.join(a.split(':')[0:2]) + "P"
+
+
+def number_of_fields(allele: str) -> int:
+    return len(allele.split(":"))
+
diff --git a/pyard/pyard.py b/pyard/pyard.py
index 71d1ff3..c86afee 100644
--- a/pyard/pyard.py
+++ b/pyard/pyard.py
@@ -102,7 +102,7 @@ def __init__(
         # Load MAC codes
         dr.generate_mac_codes(self.db_connection, False)
         # Load ARS mappings
-        self.ars_mappings = dr.generate_ars_mapping(self.db_connection, imgt_version, self._config['ping'])
+        self.ars_mappings = dr.generate_ars_mapping(self.db_connection, imgt_version)
         # Load Alleles and XX Codes
         (
             self.valid_alleles,
@@ -141,7 +141,7 @@ def __del__(self):
         self.db_connection.close()
 
     @functools.lru_cache(maxsize=max_cache_size)
-    def redux(self, allele: str, redux_type: VALID_REDUCTION_TYPES) -> str:
+    def redux(self, allele: str, redux_type: VALID_REDUCTION_TYPES, reping=True) -> str:
         """
         Does ARS reduction with allele and ARS type
 
@@ -173,6 +173,15 @@ def redux(self, allele: str, redux_type: VALID_REDUCTION_TYPES) -> str:
         if allele.endswith(("P", "G")):
             if redux_type in ["lg", "lgx", "G"]:
                 allele = allele[:-1]
+        if self._config["ping"] and reping:
+            if redux_type in ("lg", "lgx", "U2"):
+                if allele in self.ars_mappings.p_not_g:
+                    # return a joined
+                    return self.ars_mappings.p_not_g[allele]
+                else: 
+                    return self.redux(allele, redux_type, False)
+          
+            
         if redux_type == "G" and allele in self.ars_mappings.g_group:
             if allele in self.ars_mappings.dup_g:
                 return self.ars_mappings.dup_g[allele]

From 8f173f802beb3dcd2c7a8f8d2e45e1520fd2467a Mon Sep 17 00:00:00 2001
From: Martin Maiers <mmaiers@nmdp.org>
Date: Tue, 27 Sep 2022 08:34:50 -0500
Subject: [PATCH 3/7] updated test to fix its behavior

---
 pyard/misc.py                 | 2 +-
 tests/features/allele.feature | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/pyard/misc.py b/pyard/misc.py
index 58a0229..2313c7a 100644
--- a/pyard/misc.py
+++ b/pyard/misc.py
@@ -45,7 +45,7 @@ def get_G_name(a: str) -> str:
     if last_char in PandG_chars + expression_chars:
         a = a[:-1]
     if len(a.split(':')) ==2:
-      return ':'.join([a,"01"])
+      return ':'.join([a,"01"]) + "G"
     else:
       return ':'.join(a.split(':')[0:3]) + "G"
       
diff --git a/tests/features/allele.feature b/tests/features/allele.feature
index 21d0564..732b369 100644
--- a/tests/features/allele.feature
+++ b/tests/features/allele.feature
@@ -21,5 +21,7 @@ Feature: Alleles
 
       | DRB1*14:06:01  | lgx   | DRB1*14:06        |
       | DRB1*14:06:01  | lg    | DRB1*14:06g       |
-      | C*02:02        | lg    | C*02:02g/C*02:10g |
-      | C*02:02        | lgx   | C*02:02/C*02:10   |
+      | C*02:02        | lg    | C*02:02g          |
+      | C*02:02        | lgx   | C*02:02           |
+      | C*02:10        | lg    | C*02:02g          |
+      | C*02:10        | lgx   | C*02:02           |

From b8b56e195ac344698da41c4cb4c85b8f8b9b84d0 Mon Sep 17 00:00:00 2001
From: Martin Maiers <mmaiers@nmdp.org>
Date: Wed, 28 Sep 2022 09:54:59 -0500
Subject: [PATCH 4/7] remove lambdas, add and remove comments

---
 pyard/data_repository.py | 4 ++--
 pyard/misc.py            | 1 +
 pyard/pyard.py           | 1 -
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyard/data_repository.py b/pyard/data_repository.py
index 8b2638b..45dd160 100644
--- a/pyard/data_repository.py
+++ b/pyard/data_repository.py
@@ -124,7 +124,7 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
     df = pd.read_csv(ars_G_url, skiprows=6, names=["Locus", "A", "G"], sep=";").dropna()
 
     # the G-group is named for its first allele
-    df["G"] = df["A"].apply(lambda a: get_G_name(a)) 
+    df["G"] = df["A"].apply(get_G_name) 
 
     # load the hla_nom_p.txt
     ars_P_url = f'{IMGT_HLA_URL}{imgt_version}/wmda/hla_nom_p.txt'
@@ -132,7 +132,7 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
     df_P = pd.read_csv(ars_P_url, skiprows=6, names=["Locus", "A", "P"], sep=";").dropna()
 
     # the P-group is named for its first allele
-    df_P["P"] = df_P["A"].apply(lambda a: get_P_name(a)) 
+    df_P["P"] = df_P["A"].apply(get_P_name) 
 
     # convert slash delimited string to a list
     df_P["A"] = df_P["A"].apply(lambda a: a.split("/"))
diff --git a/pyard/misc.py b/pyard/misc.py
index 2313c7a..0725892 100644
--- a/pyard/misc.py
+++ b/pyard/misc.py
@@ -1,5 +1,6 @@
 # List of expression characters
 expression_chars = ['N', 'Q', 'L', 'S']
+# List of P and G characters
 PandG_chars = ['P', 'G']
 
 
diff --git a/pyard/pyard.py b/pyard/pyard.py
index c86afee..8d05845 100644
--- a/pyard/pyard.py
+++ b/pyard/pyard.py
@@ -176,7 +176,6 @@ def redux(self, allele: str, redux_type: VALID_REDUCTION_TYPES, reping=True) ->
         if self._config["ping"] and reping:
             if redux_type in ("lg", "lgx", "U2"):
                 if allele in self.ars_mappings.p_not_g:
-                    # return a joined
                     return self.ars_mappings.p_not_g[allele]
                 else: 
                     return self.redux(allele, redux_type, False)

From 0c756706943486bf32d9357d8cde157813bbd2d2 Mon Sep 17 00:00:00 2001
From: Martin Maiers <mmaiers@nmdp.org>
Date: Wed, 28 Sep 2022 13:59:26 -0500
Subject: [PATCH 5/7] fixed test expected results to new correct answer

---
 tests/test_pyard.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_pyard.py b/tests/test_pyard.py
index c011667..ff2847b 100644
--- a/tests/test_pyard.py
+++ b/tests/test_pyard.py
@@ -156,4 +156,4 @@ def test_allele_duplicated(self):
         # https://github.com/nmdp-bioinformatics/py-ard/issues/135
         allele_code = "C*02:ACMGS"
         allele_code_rx = self.ard.redux_gl(allele_code, "lgx")
-        self.assertEqual(allele_code_rx, "C*02:02/C*02:10")
+        self.assertEqual(allele_code_rx, "C*02:02")

From 1a5c148414f7b5d822f22d2d40347e1888122b8f Mon Sep 17 00:00:00 2001
From: Martin Maiers <mmaiers@nmdp.org>
Date: Thu, 29 Sep 2022 14:12:47 -0500
Subject: [PATCH 6/7] ping tests

---
 tests/environment.py          |  6 ++++++
 tests/features/allele.feature | 17 ++++++++++++++++-
 tests/steps/redux_allele.py   |  6 ++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/tests/environment.py b/tests/environment.py
index 4b93784..f778cd1 100644
--- a/tests/environment.py
+++ b/tests/environment.py
@@ -3,3 +3,9 @@
 
 def before_all(context):
     context.ard = ARD("3440", data_dir="/tmp/py-ard")
+
+    # an ard with ping set to True
+    my_config = {
+        "ping": True,
+    }
+    context.ard_ping = ARD("3440", data_dir="/tmp/py-ard", config=my_config)
diff --git a/tests/features/allele.feature b/tests/features/allele.feature
index 732b369..2e78a79 100644
--- a/tests/features/allele.feature
+++ b/tests/features/allele.feature
@@ -1,6 +1,20 @@
 Feature: Alleles
 
-  Scenario Outline:
+  Scenario Outline: allele reduction with ping
+
+    Given the allele as <Allele>
+    When reducing on the <Level> level with ping
+    Then the reduced allele is found to be <Redux Allele>
+
+    Examples:
+      | Allele         | Level | Redux Allele      |
+      | C*02:02        | lg    | C*02:02g          |
+      | C*02:02        | lgx   | C*02:02           |
+      | C*02:10        | lg    | C*02:02g          |
+      | C*02:10        | lgx   | C*02:02           |
+      | C*06:17        | lgx   | C*06:02           |
+
+  Scenario Outline: allele reduction
 
     Given the allele as <Allele>
     When reducing on the <Level> level
@@ -25,3 +39,4 @@ Feature: Alleles
       | C*02:02        | lgx   | C*02:02           |
       | C*02:10        | lg    | C*02:02g          |
       | C*02:10        | lgx   | C*02:02           |
+      | C*06:17        | lgx   | C*06:17           |
diff --git a/tests/steps/redux_allele.py b/tests/steps/redux_allele.py
index f7da113..d0d15e0 100644
--- a/tests/steps/redux_allele.py
+++ b/tests/steps/redux_allele.py
@@ -15,6 +15,12 @@ def step_impl(context, level):
     context.redux_allele = context.ard.redux(context.allele, level)
 
 
+@when("reducing on the {level} level with ping")
+def step_impl(context, level):
+    context.level = level
+    context.redux_allele = context.ard_ping.redux(context.allele, level)
+
+
 @when("reducing on the {level} level (ambiguous)")
 def step_impl(context, level):
     context.level = level

From ffebb519c170d60e9f7f42f75caf98333500f9b9 Mon Sep 17 00:00:00 2001
From: Martin Maiers <mmaiers@nmdp.org>
Date: Thu, 29 Sep 2022 14:19:38 -0500
Subject: [PATCH 7/7] lint

---
 pyard/data_repository.py | 25 ++++++++++++-------------
 pyard/misc.py            | 23 ++++++++++++-----------
 pyard/pyard.py           |  5 ++---
 3 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/pyard/data_repository.py b/pyard/data_repository.py
index 45dd160..c8a80ea 100644
--- a/pyard/data_repository.py
+++ b/pyard/data_repository.py
@@ -23,7 +23,6 @@
 from collections import namedtuple
 import functools
 import sqlite3
-
 import pandas as pd
 
 from . import db
@@ -118,34 +117,35 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
             p_not_g=p_not_g,
         )
 
-
     # load the hla_nom_g.txt
-    ars_G_url = f'{IMGT_HLA_URL}{imgt_version}/wmda/hla_nom_g.txt'
+    ars_G_url = f"{IMGT_HLA_URL}{imgt_version}/wmda/hla_nom_g.txt"
     df = pd.read_csv(ars_G_url, skiprows=6, names=["Locus", "A", "G"], sep=";").dropna()
 
     # the G-group is named for its first allele
-    df["G"] = df["A"].apply(get_G_name) 
+    df["G"] = df["A"].apply(get_G_name)
 
     # load the hla_nom_p.txt
-    ars_P_url = f'{IMGT_HLA_URL}{imgt_version}/wmda/hla_nom_p.txt'
+    ars_P_url = f"{IMGT_HLA_URL}{imgt_version}/wmda/hla_nom_p.txt"
     # example: C*;06:06:01:01/06:06:01:02/06:271;06:06P
-    df_P = pd.read_csv(ars_P_url, skiprows=6, names=["Locus", "A", "P"], sep=";").dropna()
+    df_P = pd.read_csv(
+        ars_P_url, skiprows=6, names=["Locus", "A", "P"], sep=";"
+    ).dropna()
 
     # the P-group is named for its first allele
-    df_P["P"] = df_P["A"].apply(get_P_name) 
+    df_P["P"] = df_P["A"].apply(get_P_name)
 
     # convert slash delimited string to a list
     df_P["A"] = df_P["A"].apply(lambda a: a.split("/"))
     df_P = df_P.explode("A")
     # C* 06:06:01:01/06:06:01:02/06:271 06:06P
-    df_P['A'] = df_P['Locus'] + df_P['A']
-    df_P['P'] = df_P['Locus'] + df_P['P']
+    df_P["A"] = df_P["Locus"] + df_P["A"]
+    df_P["P"] = df_P["Locus"] + df_P["P"]
     # C* 06:06:01:01 06:06P
     # C* 06:06:01:02 06:06P
     # C* 06:271 06:06P
-    p_group = df_P.set_index('A')['P'].to_dict()
+    p_group = df_P.set_index("A")["P"].to_dict()
     df_P["2d"] = df_P["A"].apply(get_2field_allele)
-    # lgx has the P-group name without the P for comparison 
+    # lgx has the P-group name without the P for comparison
     df_P["lgx"] = df_P["P"].apply(get_2field_allele)
 
     # convert slash delimited string to a list
@@ -162,7 +162,7 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
     df["lg"] = df["G"].apply(lambda a: ":".join(a.split(":")[0:2]) + "g")
     df["lgx"] = df["G"].apply(lambda a: ":".join(a.split(":")[0:2]))
 
-    # compare df_P["2d"] with df["2d"] to find 2-field alleles in the 
+    # compare df_P["2d"] with df["2d"] to find 2-field alleles in the
     # P-group that aren't in the G-group
     PnotinG = set(df_P["2d"]) - set(df["2d"])
 
@@ -172,7 +172,6 @@ def generate_ars_mapping(db_connection: sqlite3.Connection, imgt_version):
     # dictionary which will define the table
     p_not_g = df_PnotG.set_index("A")["lgx"].to_dict()
 
-
     # multiple Gs
     # goal: identify 2-field alleles that are in multiple G-groups
 
diff --git a/pyard/misc.py b/pyard/misc.py
index 0725892..4d0addb 100644
--- a/pyard/misc.py
+++ b/pyard/misc.py
@@ -1,7 +1,7 @@
 # List of expression characters
-expression_chars = ['N', 'Q', 'L', 'S']
+expression_chars = ["N", "Q", "L", "S"]
 # List of P and G characters
-PandG_chars = ['P', 'G']
+PandG_chars = ["P", "G"]
 
 
 def get_n_field_allele(allele: str, n: int, preserve_expression=False) -> str:
@@ -25,7 +25,7 @@ def get_3field_allele(a: str) -> str:
     last_char = a[-1]
     if last_char in PandG_chars:
         a = a[:-1]
-    
+
     return get_n_field_allele(a, 3)
 
 
@@ -39,26 +39,27 @@ def get_2field_allele(a: str) -> str:
 def number_of_fields(allele: str) -> int:
     return len(allele.split(":"))
 
+
 # computes a valid G name based on the ambiguity string
 def get_G_name(a: str) -> str:
-    a = a.split('/')[0]
+    a = a.split("/")[0]
     last_char = a[-1]
     if last_char in PandG_chars + expression_chars:
         a = a[:-1]
-    if len(a.split(':')) ==2:
-      return ':'.join([a,"01"]) + "G"
+    if len(a.split(":")) == 2:
+        return ":".join([a, "01"]) + "G"
     else:
-      return ':'.join(a.split(':')[0:3]) + "G"
-      
+        return ":".join(a.split(":")[0:3]) + "G"
+
+
 # computes a valid P name based on the ambiguity string
 def get_P_name(a: str) -> str:
-    a = a.split('/')[0]
+    a = a.split("/")[0]
     last_char = a[-1]
     if last_char in PandG_chars + expression_chars:
         a = a[:-1]
-    return ':'.join(a.split(':')[0:2]) + "P"
+    return ":".join(a.split(":")[0:2]) + "P"
 
 
 def number_of_fields(allele: str) -> int:
     return len(allele.split(":"))
-
diff --git a/pyard/pyard.py b/pyard/pyard.py
index 8d05845..1381806 100644
--- a/pyard/pyard.py
+++ b/pyard/pyard.py
@@ -177,10 +177,9 @@ def redux(self, allele: str, redux_type: VALID_REDUCTION_TYPES, reping=True) ->
             if redux_type in ("lg", "lgx", "U2"):
                 if allele in self.ars_mappings.p_not_g:
                     return self.ars_mappings.p_not_g[allele]
-                else: 
+                else:
                     return self.redux(allele, redux_type, False)
-          
-            
+
         if redux_type == "G" and allele in self.ars_mappings.g_group:
             if allele in self.ars_mappings.dup_g:
                 return self.ars_mappings.dup_g[allele]