From 7414a86189d070de7d3d44aad8caef7c26b58dab Mon Sep 17 00:00:00 2001 From: Laura Luebbert Date: Wed, 28 Jun 2023 18:39:45 -0700 Subject: [PATCH] more mino edits --- tests/fixtures/test_elm.json | 74 ++---------------------------------- tests/test_elm.py | 58 ++++++++++++---------------- 2 files changed, 28 insertions(+), 104 deletions(-) diff --git a/tests/fixtures/test_elm.json b/tests/fixtures/test_elm.json index 2566380b..9392f1d8 100644 --- a/tests/fixtures/test_elm.json +++ b/tests/fixtures/test_elm.json @@ -54,76 +54,10 @@ "test2": { "type": "assert_equal", "args": { - "sequence": "DIEFRVLH" + "sequence": "DIEFRVLH", + "json": true }, - "expected_result": [ - [ - "LIG_LIR_Nem_3", - "ELME000370", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "Based on multiple sequence alignments, the LIR motif appears to be less specific in Nematoda: in the last position phenylalanine and tyrosine are allowed in addition to the aliphatic hydrophobic amino acids.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY]..[ILVFY]", - "0.0063621", - "Nematoda", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1)", - "EFRVL", - "1", - "7", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_LIR_Nem_3", - "ELME000370", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "Based on multiple sequence alignments, the LIR motif appears to be less specific in Nematoda: in the last position phenylalanine and tyrosine are allowed in addition to the aliphatic hydrophobic amino acids.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY]..[ILVFY]", - "0.0063621", - "Nematoda", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1)", - "EFRVL", - "3", - "7", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "DEG_Nend_UBRbox_2", - "ELME000352", - "N-degron", - "The N-end rule pathway regulates protein stability by targeting proteins for ubiquitin-dependent proteasomal degradation. Polyubiquitylation of N-end rule substrates depends on their recognition by N-recognins, specific E3 ubiquitin ligases that use their conserved UBR-box and N-box domains to bind specific N-terminal protein motifs, called N-degrons, in their target proteins. N-degrons are defined by a destabilizing N-terminal residue. Type I destabilizing residues can either occur as primary destabilizing residues, which are positively charged amino acids directly recognized by N-recognins, or as secondary and tertiary destabilizing amino acids, which can be conjugated to a primary destabilizing residue. N-degrons containing type I destabilizing residues are specifically bound by the UBR-box of N-recognins. In contrast, type II destabilizing residues, which comprise bulky hydrophobic amino acids, initiate protein degradation by binding to the N-box of N-recognins.", - "This class of N-degrons is defined by a negatively charged type I secondary destabilizing Asp or Glu residue in the N-terminal position that is required to be arginylated for recognition by the UBR-box of N-recognins (Tasaki,2012). Asp- or Glu-containing pre-N-degrons can be generated by internal cleavage of a protein. Generation by Met excision has not been investigated yet as the known N-terminal Met-aminopeptidases that catalyze Met excision show no activity towards larger amino acids like Glu or Asp (Varshavsky,2011). It is important to note that the ELM prediction tool will only return internal N-degrons if the sequence of the cleavage product is entered for analysis.Once the secondary destabilizing Asp or Glu residue is exposed at the N-terminus of the protein, it is targeted by ATE1-encoded arginyl transferases (R-transferases) that transfer Arg from an Arg-t-RNA to the N-terminal amino group of the pre-N-degron. This N-terminal arginylation of the protein results in generation of a functional N-degron (Balzi,1990; Sriram,2011). In Mammals, six isoforms of R-transferase have been detected, differing in cellular location, tissue distribution and activity (Tasaki,2007). Their specificity depends on the N-terminal acidic residue in the substrate and is not affected by adjacent amino acids (Rai,2005; Hu,2005). The active N-degron generated after arginylation specifically binds to the UBR-box of N-recognins, initiating the degradation of the protein. The UBR-box is a highly conserved region whose tertiary structure is stabilized by two zinc fingers, which form a negatively charged binding pocket that rigidly binds the positively charged N-terminal amino acid. In addition, the UBR-box forms electrostatic interactions and hydrogen bonds with the free alpha amino group, the side chain of the acidic residue in the second position and the backbone of the first three residues (Choi,2010; Tasaki,2012) (3NIK; 3NIL).", - "DEG_Nend_Nbox_1 DEG_Nend_UBRbox_1 DEG_Nend_UBRbox_2 DEG_Nend_UBRbox_3 DEG_Nend_UBRbox_4", - "^M{0,1}([ED]).", - "0.0002537", - "Eukaryota", - "zf-UBR (PF02207) Putative zinc finger in N-recognin (UBR box) (Stochiometry: 1 : 1)", - "DI", - "1", - "2", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ] - ] + "expected_result": [] }, "test3": { "type": "assert_equal", @@ -5662,6 +5596,6 @@ "args": { "sequence": "banana" }, - "expected_result": "KeyError" + "expected_result": "None" } } \ No newline at end of file diff --git a/tests/test_elm.py b/tests/test_elm.py index d55f4dfe..d546023a 100644 --- a/tests/test_elm.py +++ b/tests/test_elm.py @@ -3,66 +3,56 @@ import json import time from gget.gget_elm import elm -from bs4 import BeautifulSoup # Load dictionary containing arguments and expected results with open("./tests/fixtures/test_elm.json") as json_file: elm_dict = json.load(json_file) # Sleep time (in seconds) between tests to prevent surpassing the server rate limit -sleep_time = 65 +sleep_time = 120 + class Testelm(unittest.TestCase): def test_elm_aa_seq(self): test = "test1" expected_result = elm_dict[test]["expected_result"] - time.sleep(sleep_time + 5) - result_to_test = elm(**elm_dict[test]["args"]) - # If result is a DataFrame, convert to list - if isinstance(result_to_test, pd.DataFrame): - #replace \xa0 with a space. - result_to_test.replace(u'\xa0',u' ', regex=True, inplace=True) - # cast all values to str add astype - result_to_test = result_to_test.astype(str).values.tolist() - + time.sleep(sleep_time) + result_to_test = elm(**elm_dict[test]["args"]) + + # replace \xa0 with a space. + result_to_test.replace("\xa0", " ", regex=True, inplace=True) + # cast all values to str add astype + result_to_test = result_to_test.astype(str).values.tolist() + self.assertListEqual(result_to_test, expected_result) def test_elm_aa_seq_2(self): test = "test2" expected_result = elm_dict[test]["expected_result"] - time.sleep(sleep_time + 5) - result_to_test = elm(**elm_dict[test]["args"]) - # If result is a DataFrame, convert to list - if isinstance(result_to_test, pd.DataFrame): - #replace \xa0 with a space. - result_to_test.replace(u'\xa0',u' ', regex=True, inplace=True) - # cast all values to str add astype - result_to_test = result_to_test.astype(str).values.tolist() - + time.sleep(sleep_time) - self.assertListEqual(result_to_test, expected_result) + result_to_test = elm(**elm_dict[test]["args"]) + self.assertEqual(result_to_test, expected_result) def test_elm_uniprot_id(self): test = "test3" expected_result = elm_dict[test]["expected_result"] - time.sleep(sleep_time * 3 + 5) + + time.sleep(sleep_time * 3) result_to_test = elm(**elm_dict[test]["args"]) - # If result is a DataFrame, convert to list - if isinstance(result_to_test, pd.DataFrame): - #replace \xa0 with a space. - result_to_test.replace(u'\xa0',u' ', regex=True, inplace=True) - # cast all values to str add astype - result_to_test = result_to_test.astype(str).values.tolist() - - time.sleep(sleep_time * 3 + 5) + + # replace \xa0 with a space. + result_to_test.replace("\xa0", " ", regex=True, inplace=True) + # cast all values to str add astype + result_to_test = result_to_test.astype(str).values.tolist() + self.assertListEqual(result_to_test, expected_result) def test_elm_bad_aa_seq(self): - time.sleep(sleep_time) test = "test5" - with self.assertRaises(ValueError): - elm(**elm_dict[test]["args"]) - time.sleep(sleep_time * 3 + 5) + time.sleep(sleep_time) + result_to_test = elm(**elm_dict[test]["args"]) + self.assertIsNone(result_to_test, "Bad AA sequence result is not None.")