Skip to content

Commit

Permalink
Fix issues with user-uploaded mutations (#147)
Browse files Browse the repository at this point in the history
* Fix issues with user-uploaded mutations; backport of 0c77dd6

By better separation of ORM and the user uploaded content,
the two mechanisms should no longer interfere and cause problems.

* Fix get_relationship method
* Ignore user uploaded mutations in statistics
* Fix to previous commit
* Filter out user mutations in source fields
* Final portion of backporting - search view
* Amend chromosome.py
* Try to use newer nodejs for travis to pass
* Do not attempt to install npm
* Improve sequence view
* Improve filters
  • Loading branch information
krassowski committed Jul 12, 2018
1 parent a69ca0d commit 6cebf94
Show file tree
Hide file tree
Showing 12 changed files with 127 additions and 80 deletions.
4 changes: 3 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ before_install:


install:
- sudo apt-get install -y pigz npm
- curl -sL https://deb.nodesource.com/setup_8.x | sudo -E bash -
- sudo apt-get install -y nodejs
- sudo apt-get install -y pigz
- sudo npm config set strict-ssl false
- bash setup.sh
- cd website
Expand Down
19 changes: 13 additions & 6 deletions website/genomic_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def get_genomic_muts(self, chrom, dna_pos, dna_ref, dna_alt):
Returns:
list of items where each item contains Mutation object and additional metadata
"""
from search.mutation_result import SearchResult

from models import Protein, Mutation
from database import get_or_create
Expand All @@ -46,22 +47,28 @@ def get_genomic_muts(self, chrom, dna_pos, dna_ref, dna_alt):
]

# this could be speed up by: itemgetters, accumulative queries and so on
for item in items:
results = []

for item in items:
protein = Protein.query.get(item['protein_id'])
item['protein'] = protein

mutation, created = get_or_create(
Mutation,
protein=protein,
protein_id=protein.id, # TODO: should use either protein or protein_id
position=item['pos'],
alt=item['alt']
)
item['mutation'] = mutation
item['type'] = 'genomic'
results.append(
SearchResult(
protein=protein,
mutation=mutation,
is_mutation_novel=created,
type='genomic',
**item
)
)

return items
return results

def iterate_known_muts(self):
from models import Mutation
Expand Down
24 changes: 14 additions & 10 deletions website/models/bio.py
Original file line number Diff line number Diff line change
Expand Up @@ -1067,21 +1067,17 @@ def mutation_id(cls):
return db.Column(db.Integer, db.ForeignKey('mutation.id'))


class UserUploadedMutation(MutationDetails, BioModel):
class UserUploadedMutation(MutationDetails):

name = 'user'
display_name = 'User\'s mutations'

value_type = 'count'

def __init__(self, **kwargs):
self.count = kwargs.pop('count', 0)
super().__init__(**kwargs)

# having count not mapped with SQLAlchemy prevents useless attempts
# to update recodrs which are not stored in database at all:
# count = db.Column(db.Integer, default=0)
query = db.Column(db.Text)
def __init__(self, count, query, mutation):
self.count = count
self.query = query
self.mutation = mutation

def get_value(self, filter=lambda x: x):
return self.count
Expand Down Expand Up @@ -1568,6 +1564,7 @@ class Mutation(BioModel):
mutation_details_relationship(model)
)
for model in source_specific_data
if model != UserUploadedMutation
}

vars().update(source_data_relationships)
Expand All @@ -1582,10 +1579,17 @@ def get_source_model(cls, name):
def get_relationship(cls, mutation_class, class_relation_map={}):
if not class_relation_map:
for model in cls.source_specific_data:
class_relation_map[model] = getattr(cls, 'meta_' + model.name)
if model != UserUploadedMutation:
class_relation_map[model] = getattr(cls, 'meta_' + model.name)
return class_relation_map[mutation_class]

source_fields = OrderedDict(
(model.name, 'meta_' + model.name)
for model in source_specific_data
if model != MIMPMutation and model != UserUploadedMutation
)

visible_fields = OrderedDict(
(model.name, 'meta_' + model.name)
for model in source_specific_data
if model != MIMPMutation
Expand Down
30 changes: 3 additions & 27 deletions website/models/cms.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ def data(self):
if not hasattr(self, '_data'):
try:
self._data = self._load_from_file()
self._bind_to_session()
except FileNotFoundError:
# None if associated file was deleted.
# Be aware of this line when debugging.
Expand All @@ -145,12 +144,6 @@ def data(self, data):
uri = self._save_to_file(data, self.uri)
self.uri = uri

# to be refactored after November 3, when all datasets
# will already have the following properties defined:
if data:
self.query_count = self.query_size
self.results_count = self.mutations_count

def remove(self, commit=True):
"""Performs hard-delete of dataset.
Expand Down Expand Up @@ -248,7 +241,7 @@ def mutations(self):
results = self.data.results
for results in results.values():
for result in results:
mutations.append(result['mutation'])
mutations.append(result.mutation)
return mutations

@property
Expand All @@ -258,25 +251,8 @@ def mutations_count(self):
return self.results_count

def get_mutation_details(self, protein, pos, alt):
for mutation in self.mutations:
if (
mutation.protein == protein and
mutation.position == pos and
mutation.alt == alt
):
return mutation.meta_user

def _bind_to_session(self):
results = self.data.results
proteins = {}
for name, results in results.items():
for result in results:
protein = result['protein']
if protein.refseq not in proteins:
proteins[protein.refseq] = db.session.merge(result['protein'])

result['protein'] = proteins[protein.refseq]
result['mutation'] = db.session.merge(result['mutation'])
protein_results = self.data.results_by_refseq[protein.refseq]
return protein_results[pos, alt].meta_user


class User(CMSModel):
Expand Down
46 changes: 46 additions & 0 deletions website/search/mutation_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from models import Protein, Mutation


class SearchResult:

def __init__(self, protein, mutation, is_mutation_novel, type, **kwargs):
self.protein = protein
self.mutation = mutation
self.is_mutation_novel = is_mutation_novel
self.type = type
self.meta_user = None
self.__dict__.update(kwargs)

def __getstate__(self):
state = self.__dict__.copy()

state['protein_refseq'] = self.protein.refseq
del state['protein']

state['mutation_kwargs'] = {
'position': self.mutation.position,
'alt': self.mutation.alt
}
del state['mutation']

state['meta_user'].mutation = None

return state

def __setstate__(self, state):

state['protein'] = Protein.query.filter_by(
refseq=state['protein_refseq']
).one()
del state['protein_refseq']

state['mutation'] = Mutation.query.filter_by(
protein=state['protein'],
**state['mutation_kwargs']
).one()
del state['mutation_kwargs']

state['meta_user'].mutation = state['mutation']
state['mutation'].meta_user = state['meta_user']

self.__dict__.update(state)
2 changes: 2 additions & 0 deletions website/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ def count_by_source(self, sources):
def __init__(self):

for model in Mutation.source_specific_data:
if model == models.UserUploadedMutation:
continue
# dirty trick: 1KGenomes is not a valid name in Python
name = 'mutations_' + model.name.replace('1', 'T')

Expand Down
18 changes: 10 additions & 8 deletions website/views/_commons.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from helpers.bioinf import decode_raw_mutation
from models import Mutation, Drug, Gene
from models import Protein
from search.mutation_result import SearchResult


def iterate_affected_isoforms(gene_name, ref, pos, alt):
Expand Down Expand Up @@ -76,14 +77,15 @@ def get_protein_muts(gene_name, mut):
)

items.append(
{
'protein': isoform,
'ref': ref,
'alt': alt,
'pos': pos,
'mutation': mutation,
'type': 'proteomic'
}
SearchResult(
protein=isoform,
mutation=mutation,
is_mutation_novel=created,
type='proteomic',
ref=ref,
alt=alt,
pos=pos,
)
)
return items

Expand Down
2 changes: 1 addition & 1 deletion website/views/chromosome.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def mutation(self, chrom, dna_pos, dna_ref, dna_alt):
items = bdb.get_genomic_muts(chrom, dna_pos, dna_ref, dna_alt)

raw_mutations = filter_manager.apply([
item['mutation'] for
item.mutation for
item in items
])

Expand Down
4 changes: 3 additions & 1 deletion website/views/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ def source_filter_to_sqlalchemy(source_filter, target):


def source_to_sa_filter(source_name, target=Mutation):
if source_name == 'user':
return True
field_name = Mutation.source_fields[source_name]
field = getattr(target, field_name)
return has_or_any(field)
Expand Down Expand Up @@ -148,7 +150,7 @@ def common_filters(
return [
Filter(
Mutation, 'sources', comparators=['in'],
choices=list(Mutation.source_fields.keys()),
choices=list(Mutation.visible_fields.keys()),
default=default_source, nullable=source_nullable,
as_sqlalchemy=source_filter_to_sqlalchemy
),
Expand Down
13 changes: 5 additions & 8 deletions website/views/mutation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
from flask_login import current_user
from models import Protein
from models import Mutation
#from models import Domain
#from models import Site


class MutationView(FlaskView):
Expand Down Expand Up @@ -38,12 +36,11 @@ def show(self, refseq, position, alt):

for source in mutation.source_fields:
model = mutation.get_source_model(source)
if source != 'user':
datasets.append({
'filter': 'Mutation.sources:in:' + source,
'name': model.display_name,
'mutation_present': sources_with_mutation.get(source, False)
})
datasets.append({
'filter': 'Mutation.sources:in:' + source,
'name': model.display_name,
'mutation_present': sources_with_mutation.get(source, False)
})

user_datasets = []

Expand Down
43 changes: 26 additions & 17 deletions website/views/search.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from collections import defaultdict
from operator import itemgetter
from operator import attrgetter
from urllib.parse import unquote

from flask import make_response, redirect, abort
Expand Down Expand Up @@ -137,6 +137,7 @@ def __init__(self, vcf_file=None, text_query=None, filter_manager=None):
"""
self.query = ''
self.results = {}
self.results_by_refseq = defaultdict(dict)
self.without_mutations = []
self.badly_formatted = []
self.hidden_results_cnt = 0
Expand All @@ -154,7 +155,7 @@ def __init__(self, vcf_file=None, text_query=None, filter_manager=None):
def data_filter(elements):
return filter_manager.apply(
elements,
itemgetter=itemgetter('mutation')
itemgetter=attrgetter('mutation')
)
else:
def data_filter(elements):
Expand Down Expand Up @@ -196,14 +197,20 @@ def add_mutation_items(self, items, query_line):
return False

if query_line in self.results:
for item in self.results[query_line]:
item['mutation'].meta_user.count += 1
for result in self.results[query_line]:
result.meta_user.count += 1
mutation = result.mutation
self.results_by_refseq[mutation.protein.refseq][mutation.position, mutation.alt] = result
else:
for item in items:
item['mutation'].meta_user = UserUploadedMutation(
for result in items:
mutation = result.mutation
result.meta_user = UserUploadedMutation(
count=1,
query=query_line
query=query_line,
mutation=result.mutation
)
mutation.meta_user = result.meta_user
self.results_by_refseq[mutation.protein.refseq][mutation.position, mutation.alt] = result
self.results[query_line] = items

def parse_vcf(self, vcf_file):
Expand Down Expand Up @@ -532,9 +539,6 @@ def mutations(self):
'success'
)

for items in mutation_search.results.values():
for item in items:
db.session.add(item['mutation'])
celery_task.forget()
else:
mutation_search = MutationSearch()
Expand Down Expand Up @@ -961,13 +965,18 @@ def match_aa_mutation(gene, mut, query):
return prepare_items(items, query, 'aminoacid mutation')


def prepare_items(items, query, value_type):
for item in items:
item['protein'] = item['protein'].to_json()
item['mutation'] = item['mutation'].to_json()
item['input'] = query
item['type'] = value_type
return items
def prepare_items(results, query, value_type):
return [
{
'protein': result.protein.to_json(),
'mutation': result.mutation.to_json(),
'input': query,
'type': value_type,
'pos': result.pos,
'alt': result.alt
}
for result in results
]


def autocomplete_mutation(query, limit=None):
Expand Down

0 comments on commit 6cebf94

Please sign in to comment.