From b7f17ea1bcc704dc3bdcaecd7286b5fedb300151 Mon Sep 17 00:00:00 2001 From: Alexey Strokach Date: Wed, 15 Apr 2015 20:32:50 -0400 Subject: [PATCH] Creating the provean supporting set can fail if the protein starts with a weird amino acid (e.g. 'X', etc.). Fixed. --- elaspic/domain_alignment.py | 17 ++++++++++++++--- elaspic/helper_functions.py | 1 + tests/test_database.py | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/elaspic/domain_alignment.py b/elaspic/domain_alignment.py index 67ffdda..0774ca4 100755 --- a/elaspic/domain_alignment.py +++ b/elaspic/domain_alignment.py @@ -160,8 +160,11 @@ def build_provean_supporting_set( """ """ # Get the required parameters - first_aa = uniprot_sequence[0] - domain_mutation = '{0}1{0}'.format(first_aa) + any_position = 0 + while uniprot_sequence[any_position] not in hf.canonical_amino_acids: + any_position += 1 + first_aa = uniprot_sequence[any_position] + domain_mutation = '{0}{1}{0}'.format(first_aa, any_position+1) uniprot_seqrecord = SeqRecord( seq=Seq(uniprot_sequence), id=str(uniprot_id), description=uniprot_name) @@ -182,10 +185,18 @@ def build_provean_supporting_set( logger.error(error_message) raise errors.ProveanError(error_message) + provean_supset_length = None for line in result.split('\n'): if 'Number of supporting sequences used:' in line: provean_supset_length = int(line.split()[-1]) - + if provean_supset_length is None: + logger.error('Provean supporting set length could not be estimated. This is an error!') + logger.error('Provean result: {}'.format(result)) + logger.error('Provean error_message: {}'.format(error_message)) + logger.error('Provean return_code: {}'.format(return_code)) + logger.error('Uniprot sequence: {}'.format(uniprot_sequence)) + logger.error('First amino acid: {}'.format(first_aa)) + logger.error('Domain mutation: {}'.format(domain_mutation)) return provean_supset_filename, provean_supset_length diff --git a/elaspic/helper_functions.py b/elaspic/helper_functions.py index 960a0cb..862b017 100755 --- a/elaspic/helper_functions.py +++ b/elaspic/helper_functions.py @@ -24,6 +24,7 @@ #%% +canonical_amino_acids = 'ARNDCEQGHILKMFPSTWYV' uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' logger = None diff --git a/tests/test_database.py b/tests/test_database.py index 28986db..52b55a9 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -55,7 +55,7 @@ psql_command_template = r""" \ copy {db_schema}.{table_name} \ from '{organism_folder}/{table_name}.tsv' \ -with csv delimiter E'\t' null '\N' escape '\\' +with csv delimiter E'\t' null '\N' escape '\\\\' """ def _format_configs(configs, table_name):