Fix issues with user-uploaded mutations (#147)

* Fix issues with user-uploaded mutations; backport of 0c77dd6 By better separation of ORM and the user uploaded content, the two mechanisms should no longer interfere and cause problems. * Fix get_relationship method * Ignore user uploaded mutations in statistics * Fix to previous commit * Filter out user mutations in source fields * Final portion of backporting - search view * Amend chromosome.py * Try to use newer nodejs for travis to pass * Do not attempt to install npm * Improve sequence view * Improve filters
reimandlab · Jul 12, 2018 · 6cebf94 · 6cebf94
1 parent a69ca0d
commit 6cebf94
Show file tree

Hide file tree

Showing 12 changed files with 127 additions and 80 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -20,7 +20,9 @@ before_install:
 
 
 install:
-  - sudo apt-get install -y pigz npm
+  - curl -sL https://deb.nodesource.com/setup_8.x | sudo -E bash -
+  - sudo apt-get install -y nodejs
+  - sudo apt-get install -y pigz
   - sudo npm config set strict-ssl false
   - bash setup.sh
   - cd website

diff --git a/website/genomic_mappings.py b/website/genomic_mappings.py
@@ -34,6 +34,7 @@ def get_genomic_muts(self, chrom, dna_pos, dna_ref, dna_alt):
         Returns:
             list of items where each item contains Mutation object and additional metadata
         """
+        from search.mutation_result import SearchResult
 
         from models import Protein, Mutation
         from database import get_or_create
@@ -46,22 +47,28 @@ def get_genomic_muts(self, chrom, dna_pos, dna_ref, dna_alt):
         ]
 
         # this could be speed up by: itemgetters, accumulative queries and so on
-        for item in items:
+        results = []
 
+        for item in items:
             protein = Protein.query.get(item['protein_id'])
-            item['protein'] = protein
-
             mutation, created = get_or_create(
                 Mutation,
                 protein=protein,
                 protein_id=protein.id,  # TODO: should use either protein or protein_id
                 position=item['pos'],
                 alt=item['alt']
             )
-            item['mutation'] = mutation
-            item['type'] = 'genomic'
+            results.append(
+                SearchResult(
+                    protein=protein,
+                    mutation=mutation,
+                    is_mutation_novel=created,
+                    type='genomic',
+                    **item
+                )
+            )
 
-        return items
+        return results
 
     def iterate_known_muts(self):
         from models import Mutation

diff --git a/website/models/bio.py b/website/models/bio.py
@@ -1067,21 +1067,17 @@ def mutation_id(cls):
         return db.Column(db.Integer, db.ForeignKey('mutation.id'))
 
 
-class UserUploadedMutation(MutationDetails, BioModel):
+class UserUploadedMutation(MutationDetails):
 
     name = 'user'
     display_name = 'User\'s mutations'
 
     value_type = 'count'
 
-    def __init__(self, **kwargs):
-        self.count = kwargs.pop('count', 0)
-        super().__init__(**kwargs)
-
-    # having count not mapped with SQLAlchemy prevents useless attempts
-    # to update recodrs which are not stored in database at all:
-    # count = db.Column(db.Integer, default=0)
-    query = db.Column(db.Text)
+    def __init__(self, count, query, mutation):
+        self.count = count
+        self.query = query
+        self.mutation = mutation
 
     def get_value(self, filter=lambda x: x):
         return self.count
@@ -1568,6 +1564,7 @@ class Mutation(BioModel):
             mutation_details_relationship(model)
         )
         for model in source_specific_data
+        if model != UserUploadedMutation
     }
 
     vars().update(source_data_relationships)
@@ -1582,10 +1579,17 @@ def get_source_model(cls, name):
     def get_relationship(cls, mutation_class, class_relation_map={}):
         if not class_relation_map:
             for model in cls.source_specific_data:
-                class_relation_map[model] = getattr(cls, 'meta_' + model.name)
+                if model != UserUploadedMutation:
+                    class_relation_map[model] = getattr(cls, 'meta_' + model.name)
         return class_relation_map[mutation_class]
 
     source_fields = OrderedDict(
+        (model.name, 'meta_' + model.name)
+        for model in source_specific_data
+        if model != MIMPMutation and model != UserUploadedMutation
+    )
+
+    visible_fields = OrderedDict(
         (model.name, 'meta_' + model.name)
         for model in source_specific_data
         if model != MIMPMutation

diff --git a/website/models/cms.py b/website/models/cms.py
@@ -132,7 +132,6 @@ def data(self):
         if not hasattr(self, '_data'):
             try:
                 self._data = self._load_from_file()
-                self._bind_to_session()
             except FileNotFoundError:
                 # None if associated file was deleted.
                 # Be aware of this line when debugging.
@@ -145,12 +144,6 @@ def data(self, data):
         uri = self._save_to_file(data, self.uri)
         self.uri = uri
 
-        # to be refactored after November 3, when all datasets
-        # will already have the following properties defined:
-        if data:
-            self.query_count = self.query_size
-            self.results_count = self.mutations_count
-
     def remove(self, commit=True):
         """Performs hard-delete of dataset.
 
@@ -248,7 +241,7 @@ def mutations(self):
         results = self.data.results
         for results in results.values():
             for result in results:
-                mutations.append(result['mutation'])
+                mutations.append(result.mutation)
         return mutations
 
     @property
@@ -258,25 +251,8 @@ def mutations_count(self):
         return self.results_count
 
     def get_mutation_details(self, protein, pos, alt):
-        for mutation in self.mutations:
-            if (
-                mutation.protein == protein and
-                mutation.position == pos and
-                mutation.alt == alt
-            ):
-                return mutation.meta_user
-
-    def _bind_to_session(self):
-        results = self.data.results
-        proteins = {}
-        for name, results in results.items():
-            for result in results:
-                protein = result['protein']
-                if protein.refseq not in proteins:
-                    proteins[protein.refseq] = db.session.merge(result['protein'])
-
-                result['protein'] = proteins[protein.refseq]
-                result['mutation'] = db.session.merge(result['mutation'])
+        protein_results = self.data.results_by_refseq[protein.refseq]
+        return protein_results[pos, alt].meta_user
 
 
 class User(CMSModel):

diff --git a/website/search/mutation_result.py b/website/search/mutation_result.py
@@ -0,0 +1,46 @@
+from models import Protein, Mutation
+
+
+class SearchResult:
+
+    def __init__(self, protein, mutation, is_mutation_novel, type, **kwargs):
+        self.protein = protein
+        self.mutation = mutation
+        self.is_mutation_novel = is_mutation_novel
+        self.type = type
+        self.meta_user = None
+        self.__dict__.update(kwargs)
+
+    def __getstate__(self):
+        state = self.__dict__.copy()
+
+        state['protein_refseq'] = self.protein.refseq
+        del state['protein']
+
+        state['mutation_kwargs'] = {
+            'position': self.mutation.position,
+            'alt': self.mutation.alt
+        }
+        del state['mutation']
+
+        state['meta_user'].mutation = None
+
+        return state
+
+    def __setstate__(self, state):
+
+        state['protein'] = Protein.query.filter_by(
+            refseq=state['protein_refseq']
+        ).one()
+        del state['protein_refseq']
+
+        state['mutation'] = Mutation.query.filter_by(
+            protein=state['protein'],
+            **state['mutation_kwargs']
+        ).one()
+        del state['mutation_kwargs']
+
+        state['meta_user'].mutation = state['mutation']
+        state['mutation'].meta_user = state['meta_user']
+
+        self.__dict__.update(state)
diff --git a/website/stats.py b/website/stats.py
@@ -150,6 +150,8 @@ def count_by_source(self, sources):
     def __init__(self):
 
         for model in Mutation.source_specific_data:
+            if model == models.UserUploadedMutation:
+                continue
             # dirty trick: 1KGenomes is not a valid name in Python
             name = 'mutations_' + model.name.replace('1', 'T')
 

diff --git a/website/views/_commons.py b/website/views/_commons.py
@@ -5,6 +5,7 @@
 from helpers.bioinf import decode_raw_mutation
 from models import Mutation, Drug, Gene
 from models import Protein
+from search.mutation_result import SearchResult
 
 
 def iterate_affected_isoforms(gene_name, ref, pos, alt):
@@ -76,14 +77,15 @@ def get_protein_muts(gene_name, mut):
         )
 
         items.append(
-            {
-                'protein': isoform,
-                'ref': ref,
-                'alt': alt,
-                'pos': pos,
-                'mutation': mutation,
-                'type': 'proteomic'
-            }
+            SearchResult(
+                protein=isoform,
+                mutation=mutation,
+                is_mutation_novel=created,
+                type='proteomic',
+                ref=ref,
+                alt=alt,
+                pos=pos,
+            )
         )
     return items
 

diff --git a/website/views/chromosome.py b/website/views/chromosome.py
@@ -98,7 +98,7 @@ def mutation(self, chrom, dna_pos, dna_ref, dna_alt):
         items = bdb.get_genomic_muts(chrom, dna_pos, dna_ref, dna_alt)
 
         raw_mutations = filter_manager.apply([
-            item['mutation'] for
+            item.mutation for
             item in items
         ])
 

diff --git a/website/views/filters.py b/website/views/filters.py
@@ -94,6 +94,8 @@ def source_filter_to_sqlalchemy(source_filter, target):
 
 
 def source_to_sa_filter(source_name, target=Mutation):
+    if source_name == 'user':
+        return True
     field_name = Mutation.source_fields[source_name]
     field = getattr(target, field_name)
     return has_or_any(field)
@@ -148,7 +150,7 @@ def common_filters(
     return [
         Filter(
             Mutation, 'sources', comparators=['in'],
-            choices=list(Mutation.source_fields.keys()),
+            choices=list(Mutation.visible_fields.keys()),
             default=default_source, nullable=source_nullable,
             as_sqlalchemy=source_filter_to_sqlalchemy
         ),

diff --git a/website/views/mutation.py b/website/views/mutation.py
@@ -3,8 +3,6 @@
 from flask_login import current_user
 from models import Protein
 from models import Mutation
-#from models import Domain
-#from models import Site
 
 
 class MutationView(FlaskView):
@@ -38,12 +36,11 @@ def show(self, refseq, position, alt):
 
         for source in mutation.source_fields:
             model = mutation.get_source_model(source)
-            if source != 'user':
-                datasets.append({
-                    'filter': 'Mutation.sources:in:' + source,
-                    'name': model.display_name,
-                    'mutation_present': sources_with_mutation.get(source, False)
-                })
+            datasets.append({
+                'filter': 'Mutation.sources:in:' + source,
+                'name': model.display_name,
+                'mutation_present': sources_with_mutation.get(source, False)
+            })
 
         user_datasets = []
 

diff --git a/website/views/search.py b/website/views/search.py
@@ -1,6 +1,6 @@
 import json
 from collections import defaultdict
-from operator import itemgetter
+from operator import attrgetter
 from urllib.parse import unquote
 
 from flask import make_response, redirect, abort
@@ -137,6 +137,7 @@ def __init__(self, vcf_file=None, text_query=None, filter_manager=None):
         """
         self.query = ''
         self.results = {}
+        self.results_by_refseq = defaultdict(dict)
         self.without_mutations = []
         self.badly_formatted = []
         self.hidden_results_cnt = 0
@@ -154,7 +155,7 @@ def __init__(self, vcf_file=None, text_query=None, filter_manager=None):
             def data_filter(elements):
                 return filter_manager.apply(
                     elements,
-                    itemgetter=itemgetter('mutation')
+                    itemgetter=attrgetter('mutation')
                 )
         else:
             def data_filter(elements):
@@ -196,14 +197,20 @@ def add_mutation_items(self, items, query_line):
             return False
 
         if query_line in self.results:
-            for item in self.results[query_line]:
-                item['mutation'].meta_user.count += 1
+            for result in self.results[query_line]:
+                result.meta_user.count += 1
+                mutation = result.mutation
+                self.results_by_refseq[mutation.protein.refseq][mutation.position, mutation.alt] = result
         else:
-            for item in items:
-                item['mutation'].meta_user = UserUploadedMutation(
+            for result in items:
+                mutation = result.mutation
+                result.meta_user = UserUploadedMutation(
                     count=1,
-                    query=query_line
+                    query=query_line,
+                    mutation=result.mutation
                 )
+                mutation.meta_user = result.meta_user
+                self.results_by_refseq[mutation.protein.refseq][mutation.position, mutation.alt] = result
             self.results[query_line] = items
 
     def parse_vcf(self, vcf_file):
@@ -532,9 +539,6 @@ def mutations(self):
                     'success'
                 )
 
-            for items in mutation_search.results.values():
-                for item in items:
-                    db.session.add(item['mutation'])
             celery_task.forget()
         else:
             mutation_search = MutationSearch()
@@ -961,13 +965,18 @@ def match_aa_mutation(gene, mut, query):
     return prepare_items(items, query, 'aminoacid mutation')
 
 
-def prepare_items(items, query, value_type):
-    for item in items:
-        item['protein'] = item['protein'].to_json()
-        item['mutation'] = item['mutation'].to_json()
-        item['input'] = query
-        item['type'] = value_type
-    return items
+def prepare_items(results, query, value_type):
+    return [
+        {
+            'protein': result.protein.to_json(),
+            'mutation': result.mutation.to_json(),
+            'input': query,
+            'type': value_type,
+            'pos': result.pos,
+            'alt': result.alt
+        }
+        for result in results
+    ]
 
 
 def autocomplete_mutation(query, limit=None):