From 9f1c49e127758613af859004bea55abf7cf7f6bf Mon Sep 17 00:00:00 2001 From: krassowski Date: Sun, 7 Mar 2021 16:23:28 +0000 Subject: [PATCH] Replace ambiguous names, document divide_muts_by_sites --- website/views/network.py | 33 ++++++++++++++++++++++----------- website/views/pathway.py | 10 +++++++--- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/website/views/network.py b/website/views/network.py index 6f3fac157..0e626abb9 100644 --- a/website/views/network.py +++ b/website/views/network.py @@ -45,6 +45,7 @@ def __init__(self, protein, **kwargs): def divide_muts_by_sites(mutations, sites): + """Aggregates mutations that are within +/-7 positions from sites by site""" from collections import defaultdict muts_by_site = defaultdict(list) @@ -54,19 +55,29 @@ def divide_muts_by_sites(mutations, sites): sites.sort(key=lambda site: site.position) mutations.sort(key=lambda mutation: mutation.position) - m = 0 + mutation_index = 0 for site in sites: - l = site.position - 7 - p = site.position + 7 - while mutations[m].position < l: - m += 1 - if m == len(mutations): + site_flank_start = site.position - 7 + site_flank_end = site.position + 7 + # while the mutation that we look at is is far before the site we look at + while mutations[mutation_index].position < site_flank_start: + # we can just look at the next mutation + mutation_index += 1 + # unless we already checked all mutations + if mutation_index == len(mutations): + # and in that case our work is done; this is the case because + # sites and mutations are sorted by positions (see above), + # so it is guaranteed that all mutations are before all the sites return muts_by_site - ms = m - while mutations[ms].position <= p: - muts_by_site[site].append(mutations[ms]) - ms += 1 - if ms == len(mutations): + # we now look at a mutations which are either within the range of the site, + # or maybe are behind them (we do not know yet) + candidate_mut_index = mutation_index + # while the candidate mutation is in range of the site flanks: + while mutations[candidate_mut_index].position <= site_flank_end: + # append it to the mutations associated with site that we currently analyze + muts_by_site[site].append(mutations[candidate_mut_index]) + candidate_mut_index += 1 + if candidate_mut_index == len(mutations): break return muts_by_site diff --git a/website/views/pathway.py b/website/views/pathway.py index 3d26bdddb..6f8ad101b 100644 --- a/website/views/pathway.py +++ b/website/views/pathway.py @@ -87,10 +87,14 @@ def index(self): for gene_list in gene_lists: matched = False - for l in matched_lists: - if gene_list.mutation_source_name == l['mutation_source'] and gene_list.site_type == l['site_type']: + for candidate_match in matched_lists: + if ( + gene_list.mutation_source_name == candidate_match['mutation_source'] + and + gene_list.site_type == candidate_match['site_type'] + ): matched = True - l['gene_list'] = gene_list + candidate_match['gene_list'] = gene_list if not matched: matched_lists.append({ 'pathways_list': None,