-
Notifications
You must be signed in to change notification settings - Fork 24
/
models.py
982 lines (817 loc) · 33.6 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
'''
Created on May 10, 2012
@author: nils
'''
from datetime import datetime
import logging
from django.conf import settings
from django.db import models
from django.db.models.signals import pre_delete
from django.dispatch import receiver
from celery.result import AsyncResult
from django_extensions.db.fields import UUIDField
import requests
from requests.exceptions import HTTPError
import core
from core.utils import delete_analysis_index, skip_if_test_run
import data_set_manager
from file_store.models import FileStoreItem
"""
TODO: Refactor import data_set_manager. Importing
data_set_manager.tasks.generate_auxiliary_file()
results in a circular import error (see comments on PR #1590)
"""
logger = logging.getLogger(__name__)
"""
General:
- xyz_term = accession number of an ontology term
- xyz_source = reference to the ontology where xyz_term originates (defined in
the investigation)
"""
class NodeCollection(models.Model):
"""Base class for Investigation and Study
"""
uuid = UUIDField(unique=True, auto=True)
identifier = models.TextField(blank=True, null=True)
title = models.TextField(blank=True, null=True)
description = models.TextField(blank=True, null=True)
submission_date = models.DateField(blank=True, null=True)
release_date = models.DateField(blank=True, null=True)
def __init__(self, *args, **kwargs):
# change dates from empty string to None (to pass validation)
if "submission_date" in kwargs:
if kwargs["submission_date"] == "":
kwargs["submission_date"] = None
else:
kwargs["submission_date"] = self.normalize_date(
kwargs["submission_date"]
)
if "release_date" in kwargs:
if kwargs["release_date"] == "":
kwargs["release_date"] = None
else:
kwargs["release_date"] = self.normalize_date(
kwargs["release_date"]
)
super(NodeCollection, self).__init__(*args, **kwargs)
def __unicode__(self):
return (
unicode(self.identifier) +
(
": " +
unicode(self.title) if unicode(self.title) != "" else ""
) +
": " +
unicode(self.id)
)
def normalize_date(self, dateString):
"""Normalizes date strings in dd/mm/yyyy format to yyyy-mm-dd.
Returns normalized date string if in expected unnormalized format or
unnormalized date string.
"""
logger.info("Converting date " + str(dateString) + " ...")
try:
# try reformatting incorrect date format used by Nature Scientific
# Data
return str(
datetime.strftime(
datetime.strptime(
dateString,
"%d/%m/%Y"
),
"%Y-%m-%d"
)
)
except ValueError:
# ignore - date either in correct format or in format not supported
# (will cause a validation error handled separately)
logger.info("Failed to convert date " + str(dateString))
return dateString
class Publication(models.Model):
"""Investigation or Study Publication (ISA-Tab Spec 4.1.2.2, 4.1.3.3)"""
collection = models.ForeignKey(NodeCollection)
title = models.TextField(blank=True, null=True)
authors = models.TextField(blank=True, null=True)
pubmed_id = models.TextField(blank=True, null=True)
doi = models.TextField(blank=True, null=True)
status = models.TextField(blank=True, null=True)
# TODO: do we really want to store ontology information for this?
status_accession = models.TextField(blank=True, null=True)
status_source = models.TextField(blank=True, null=True)
def __unicode__(self):
return unicode(self.authors) + ": " + unicode(self.title)
class Contact(models.Model):
"""Investigation or Study Contact (ISA-Tab Spec 4.1.2.3, 4.1.3.7)"""
collection = models.ForeignKey(NodeCollection)
last_name = models.TextField(blank=True, null=True)
first_name = models.TextField(blank=True, null=True)
middle_initials = models.TextField(blank=True, null=True)
email = models.EmailField(blank=True, null=True)
phone = models.TextField(blank=True, null=True)
fax = models.TextField(blank=True, null=True)
address = models.TextField(blank=True, null=True)
affiliation = models.TextField(blank=True, null=True)
# TODO: split on semicolon
roles = models.TextField(blank=True, null=True)
# TODO: do we really want to store ontology information for this?
roles_accession = models.TextField(blank=True, null=True)
roles_source = models.TextField(blank=True, null=True)
def __unicode__(self):
return (
unicode(self.first_name) +
" " +
unicode(self.last_name) +
" (" +
unicode(self.email) +
")"
)
class Investigation(NodeCollection):
isarchive_file = UUIDField(blank=True, null=True, auto=False)
pre_isarchive_file = UUIDField(blank=True, null=True, auto=False)
"""easily retrieves the proper NodeCollection fields"""
def is_isa_tab_based(self):
return bool(self.isarchive_file)
def get_file_store_item(self):
"""Get an Investigation's corresponding FileStoreItem instance"""
file_store_item_uuid = (
self.isarchive_file if self.is_isa_tab_based()
else self.pre_isarchive_file
)
try:
return FileStoreItem.objects.get(uuid=file_store_item_uuid)
except (FileStoreItem.DoesNotExist,
FileStoreItem.MultipleObjectsReturned) as e:
logger.error(
"Couldn't fetch Investigation: %s's FileStoreItem from UUID: "
"%s %s", self, file_store_item_uuid, e
)
def get_identifier(self):
if (self.identifier is None) or (self.identifier.strip() == ""):
# if there's no investigation identifier, then there's only 1 study
study = self.study_set.all()[0]
return study.identifier
return self.identifier
def get_title(self):
if self.title == '' or self.title is None:
study = self.study_set.all()[0]
return study.title
return self.title
def get_description(self):
if self.description is None or self.description == '':
study = self.study_set.all()[0]
return study.description
return self.description
def get_study_count(self):
return self.study_set.count()
def get_assay_count(self):
studies = self.study_set.all()
assay_count = 0
for study in studies:
assay_count += study.assay_set.count()
return assay_count
@receiver(pre_delete, sender=Investigation)
def _investigation_delete(sender, instance, **kwargs):
"""
Removes an Investigation's associated FileStoreItem upon deletion being
triggered.
Having these extra checks is favored within a signal so that this logic
is picked up on bulk deletes as well.
See: https://docs.djangoproject.com/en/1.8/topics/db/models/
#overriding-model-methods
"""
instance.get_file_store_item().delete()
class Ontology(models.Model):
"""Ontology Source Reference (ISA-Tab Spec 4.1.1)"""
investigation = models.ForeignKey(Investigation)
name = models.TextField(blank=True, null=True)
file_name = models.TextField(blank=True, null=True)
version = models.TextField(blank=True, null=True)
description = models.TextField(blank=True, null=True)
def __unicode__(self):
return unicode(self.name) + " (" + unicode(self.file_name) + ")"
class Study(NodeCollection):
investigation = models.ForeignKey(Investigation)
# TODO: should we support an archive file here? (see ISA-Tab Spec 4.1.3.2)
file_name = models.TextField()
def get_dataset(self):
investigation_uuid = self.investigation.uuid
try:
data_set = core.models.InvestigationLink.objects.filter(
investigation__uuid=investigation_uuid
).order_by("version").reverse()[0].data_set
except (AttributeError, IndexError) as e:
raise RuntimeError(
"Couldn't fetch DataSet for Investigation {}: {}".format(
investigation_uuid, e
)
)
return data_set
def __unicode__(self):
return unicode(self.identifier) + ": " + unicode(self.title)
@receiver(pre_delete, sender=Study)
def _study_delete(sender, instance, **kwargs):
"""
Removes a Study's related objects upon deletion being triggered.
Having these extra checks is favored within a signal so that this logic
is picked up on bulk deletes as well.
See: https://docs.djangoproject.com/en/1.8/topics/db/models/
#overriding-model-methods
"""
Node.objects.filter(study=instance).delete()
class Design(models.Model):
"""Study Design Descriptor (ISA-Tab Spec 4.1.3.2)"""
study = models.ForeignKey(Study)
type = models.TextField(blank=True, null=True)
type_accession = models.TextField(blank=True, null=True)
type_source = models.TextField(blank=True, null=True)
def __unicode__(self):
return unicode(self.type)
class Factor(models.Model):
"""Study Factor (ISA-Tab Spec 4.1.3.4)"""
study = models.ForeignKey(Study)
name = models.TextField(blank=True, null=True)
type = models.TextField(blank=True, null=True)
type_accession = models.TextField(blank=True, null=True)
type_source = models.TextField(blank=True, null=True)
def __unicode__(self):
return unicode(self.name) + ": " + unicode(self.type)
class Assay(models.Model):
"""Study Assay (ISA-Tab Spec 4.1.3.5)"""
uuid = UUIDField(unique=True, auto=True)
study = models.ForeignKey(Study)
measurement = models.TextField(blank=True, null=True)
measurement_accession = models.TextField(blank=True, null=True)
measurement_source = models.TextField(blank=True, null=True)
technology = models.TextField(blank=True, null=True)
technology_accession = models.TextField(blank=True, null=True)
technology_source = models.TextField(blank=True, null=True)
platform = models.TextField(blank=True, null=True)
file_name = models.TextField()
def __unicode__(self):
retstr = ""
if self.measurement:
retstr += "Measurement: %s; " % unicode(self.measurement)
if self.technology:
retstr += "Technology: %s; " % unicode(self.technology)
if self.platform:
retstr += "Platform: %s; " % unicode(self.platform)
retstr += "File: %s" % unicode(self.file_name)
return retstr
class Protocol(models.Model):
"""Study Protocol (ISA-Tab Spec 4.1.3.6)"""
study = models.ForeignKey(Study)
uuid = UUIDField(unique=True, auto=True)
# workflow_uuid can be used to associate the protocol with a workflow
# TODO: should this be the analysis uuid? (problem: technically an analysis
# is the execution of a protocol)
workflow_uuid = UUIDField(unique=True, auto=True)
version = models.TextField(blank=True, null=True)
name = models.TextField(blank=True, null=True)
name_accession = models.TextField(blank=True, null=True)
name_source = models.TextField(blank=True, null=True)
type = models.TextField(blank=True, null=True)
type_accession = models.TextField(blank=True, null=True)
type_source = models.TextField(blank=True, null=True)
description = models.TextField(blank=True, null=True)
uri = models.TextField(blank=True, null=True)
# protocol parameters: via FK
# protocol components: via FK
def __unicode__(self):
return unicode(self.name) + ": " + unicode(self.type)
class Meta:
ordering = ['id']
class ProtocolParameter(models.Model):
study = models.ForeignKey(Study)
protocol = models.ForeignKey(Protocol)
name = models.TextField(blank=True, null=True)
name_accession = models.TextField(blank=True, null=True)
name_source = models.TextField(blank=True, null=True)
class ProtocolComponent(models.Model):
study = models.ForeignKey(Study)
protocol = models.ForeignKey(Protocol)
name = models.TextField(blank=True, null=True)
type = models.TextField(blank=True, null=True)
type_accession = models.TextField(blank=True, null=True)
type_source = models.TextField(blank=True, null=True)
class Node(models.Model):
# allowed node types
SOURCE = "Source Name"
SAMPLE = "Sample Name"
EXTRACT = "Extract Name"
LABELED_EXTRACT = "Labeled Extract Name"
SCAN = "Scan Name"
NORMALIZATION = "Normalization Name"
DATA_TRANSFORMATION = "Data Transformation Name"
ASSAY = "Assay Name"
HYBRIDIZATION_ASSAY = "Hybridization Assay Name"
GEL_ELECTROPHORESIS_ASSAY = "Gel Electrophoresis Assay Name"
NMR_ASSAY = "NMR Assay Name"
MS_ASSAY = "MS Assay Name"
ARRAY_DESIGN_FILE = "Array Design File"
ARRAY_DESIGN_FILE_REF = "Array Design File REF"
IMAGE_FILE = "Image File"
RAW_DATA_FILE = "Raw Data File"
DERIVED_DATA_FILE = "Derived Data File"
ARRAY_DATA_FILE = "Array Data File"
DERIVED_ARRAY_DATA_FILE = "Derived Array Data File"
ARRAY_DATA_MATRIX_FILE = "Array Data Matrix File"
DERIVED_ARRAY_DATA_MATRIX_FILE = "Derived Array Data Matrix File"
SPOT_PICKING_FILE = "Spot Picking File"
RAW_SPECTRAL_DATA_FILE = "Raw Spectral Data File"
DERIVED_SPECTRAL_DATA_FILE = "Derived Spectral Data File"
PEPTIDE_ASSIGNMENT_FILE = "Peptide Assignment File"
PROTEIN_ASSIGNMENT_FILE = "Protein Assignment File"
PTM_ASSIGNMENT_FILE = "Post Translational Modification Assignment File"
FREE_INDUCTION_DECAY_DATA_FILE = "Free Induction Decay Data File"
ACQUISITION_PARAMETER_DATA_FILE = "Aquisition Parameter Data File"
METABOLITE_ASSIGNMENT_FILE = "Metabolite Assignment File"
ASSAYS = {
ASSAY,
HYBRIDIZATION_ASSAY,
MS_ASSAY,
NMR_ASSAY,
GEL_ELECTROPHORESIS_ASSAY
}
FILES = {
ARRAY_DESIGN_FILE,
ARRAY_DESIGN_FILE_REF,
IMAGE_FILE,
RAW_DATA_FILE,
DERIVED_DATA_FILE,
ARRAY_DATA_FILE,
DERIVED_ARRAY_DATA_FILE,
ARRAY_DATA_MATRIX_FILE,
DERIVED_ARRAY_DATA_MATRIX_FILE,
SPOT_PICKING_FILE,
RAW_SPECTRAL_DATA_FILE,
DERIVED_SPECTRAL_DATA_FILE,
PEPTIDE_ASSIGNMENT_FILE,
PROTEIN_ASSIGNMENT_FILE,
PTM_ASSIGNMENT_FILE,
FREE_INDUCTION_DECAY_DATA_FILE,
ACQUISITION_PARAMETER_DATA_FILE,
METABOLITE_ASSIGNMENT_FILE
}
INDEXED_FILES = {
RAW_DATA_FILE, DERIVED_DATA_FILE,
ARRAY_DATA_FILE, DERIVED_ARRAY_DATA_FILE,
ARRAY_DATA_MATRIX_FILE, DERIVED_ARRAY_DATA_MATRIX_FILE
}
TYPES = ASSAYS | FILES | {
SOURCE, SAMPLE, EXTRACT, LABELED_EXTRACT, SCAN, NORMALIZATION,
DATA_TRANSFORMATION}
uuid = UUIDField(unique=True, auto=True)
study = models.ForeignKey(Study, db_index=True)
assay = models.ForeignKey(Assay, db_index=True, blank=True, null=True)
children = models.ManyToManyField(
"self", symmetrical=False, related_name="parents_set")
parents = models.ManyToManyField(
"self", symmetrical=False, related_name="children_set")
type = models.TextField(db_index=True)
name = models.TextField(db_index=True)
# only used for nodes representing files
file_uuid = UUIDField(default=None, blank=True, null=True, auto=False)
# Refinery internal "attributes" (exported as comment attributes)
genome_build = models.TextField(db_index=True, null=True)
species = models.IntegerField(db_index=True, null=True)
is_annotation = models.BooleanField(default=False)
analysis_uuid = UUIDField(default=None, blank=True, null=True, auto=False)
is_auxiliary_node = models.BooleanField(default=False)
subanalysis = models.IntegerField(null=True, blank=False)
workflow_output = models.CharField(null=True, blank=False, max_length=500)
def __unicode__(self):
return unicode(self.type) + ": " + unicode(self.name) + " (" +\
unicode(self.parents.count()) + " parents, " +\
unicode(self.children.count()) + " children " + "| " +\
"species: " + unicode(self.species) +\
", genome build: " + unicode(self.genome_build) + ")"
def add_child(self, node):
if node is None:
return None
self.children.add(node)
self.save()
node.parents.add(self)
node.save()
return self
def get_analysis(self):
try:
return core.models.Analysis.objects.get(uuid=self.analysis_uuid)
except core.models.Analysis.DoesNotExist:
return None
except core.models.Analysis.MultipleObjectsReturned as e:
logger.error("Multiple Analyses found for Node with UUID: %s %s",
self.uuid, e)
return None
def _get_derived_node_types(self):
"""
This finds all node types which are "derived"
:returns: a list of all node types which are "derived"
"""
derived_node_types = []
for item in self.FILES:
if "derived" in item.lower():
derived_node_types.append(item)
return derived_node_types
def is_derived(self):
"""
This is a helper method to aid in the checking of if a Node has
been analyzed further when we delete an Analysis.
:returns True if the node in question's type exists in the list of
Node types which are "derived"
"""
return self.type in self._get_derived_node_types()
def is_orphan(self):
return self.parents.count() == 0
def get_analysis_node_connections(self):
return core.models.AnalysisNodeConnection.objects.filter(node=self)
def get_file_store_item(self):
"""
Returns the FileStoreItem associated with a given Node or None if
there isn't one
"""
try:
return FileStoreItem.objects.get(
uuid=self.file_uuid)
except (FileStoreItem.DoesNotExist,
FileStoreItem.MultipleObjectsReturned) as e:
logger.error(e)
return None
def _create_and_associate_auxiliary_node(self, filestore_item_uuid):
"""
Tries to create and associate an auxiliary Node with a parent
node.
If said auxiliary Node already exists, we just do a get()
and do not re-add it as a child
"""
node = Node.objects.get_or_create(
study=self.study,
assay=self.assay,
name="auxiliary Node for: {}".format(self.name),
is_auxiliary_node=True,
file_uuid=filestore_item_uuid
)
# get_or_create() returns a tuple:
# (<Node_object>, Boolean: <created>)
# So, if this Node is newly created, we will associate it as a
# child to its parent, otherwise nothing happens
# https://docs.djangoproject.com/en/1.10/ref/models/querysets/#get-or-create
node_object = node[0]
is_newly_created_node = node[1]
if is_newly_created_node:
self.add_child(node_object)
return node_object
def get_children(self):
"""
Return a list of child Node's uuids for a given Node
"""
return [child.uuid for child in self.children.all()]
def get_parents(self):
"""
Return a list of parent Node's uuids for a given Node
"""
return [parent.uuid for parent in self.parents.all()]
def get_auxiliary_nodes(self):
"""
Return a list of uuids of auxiliary Nodes for a Given Node
"""
child_nodes = self.get_children()
aux_nodes = []
for uuid in child_nodes:
try:
node = Node.objects.get(uuid=uuid)
if node.is_auxiliary_node:
aux_nodes.append(node.uuid)
except (Node.DoesNotExist, Node.MultipleObjectsReturned) as e:
logger.error(e)
return aux_nodes
def get_relative_file_store_item_url(self):
"""
Return relative path to a Node's FileStoreItem's
datafile if one exists, otherwise return None
"""
try:
file_store_item = FileStoreItem.objects.get(
uuid=self.file_uuid
)
return file_store_item.get_datafile_url()
except (FileStoreItem.DoesNotExist,
FileStoreItem.MultipleObjectsReturned) as e:
logger.error(e)
return None
def run_generate_auxiliary_node_task(self):
"""
This method is initiated after a task_success signal is returned
from the `import_file` task.
Here we check if the imported FileStoreItem returned from the
`import_file` task is in need of the creation of some auxiliary
File/Node. If this is the case, we create auxiliary Node and
FileStoreItem objects, and then proceed to run the
generate_auxiliary_file task, and associate said task's id with the
newly created FileStoreItems `import_task_id` field so that we can
monitor the task state.
"""
# Check if the Django setting to generate auxiliary file has been
# set to run when FileStoreItems are imported into Refinery
logger.debug("Checking if some auxiliary Node should be generated")
file_store_item = self.get_file_store_item()
# Check if we pass the logic to generate aux. Files/Nodes
if (file_store_item and file_store_item.filetype and
file_store_item.filetype.used_for_visualization and
file_store_item.is_local() and
settings.REFINERY_AUXILIARY_FILE_GENERATION ==
"on_file_import"):
datafile_path = file_store_item.get_absolute_path()
# Create an empty FileStoreItem (we do the datafile association
# within the generate_auxiliary_file task
auxiliary_file_store_item = FileStoreItem.objects.create()
auxiliary_node = self._create_and_associate_auxiliary_node(
auxiliary_file_store_item.uuid)
result = data_set_manager.tasks.generate_auxiliary_file.delay(
auxiliary_node, datafile_path, file_store_item)
auxiliary_file_store_item.import_task_id = result.task_id
auxiliary_file_store_item.save()
def get_auxiliary_file_generation_task_state(self):
"""
Return the generate_auxiliary_file task state for a given auxiliary
Node.
Return None if a regular Node
"""
if self.is_auxiliary_node:
return AsyncResult(self.get_file_store_item().import_task_id).state
else:
return None
@receiver(pre_delete, sender=Node)
def _node_delete(sender, instance, *args, **kwargs):
"""
Removes a Node's related objects upon deletion being triggered.
Having these extra checks is favored within a signal so that this logic
is picked up on bulk deletes as well.
See: https://docs.djangoproject.com/en/1.8/topics/db/models/
#overriding-model-methods
"""
# remove a Node's FileStoreItem upon deletion, if one exists
file_store_item = instance.get_file_store_item()
if file_store_item is not None:
file_store_item.delete()
delete_analysis_index(instance)
class Attribute(models.Model):
# allowed attribute types
MATERIAL_TYPE = "Material Type"
CHARACTERISTICS = "Characteristics"
FACTOR_VALUE = "Factor Value"
LABEL = "Label"
COMMENT = "Comment"
TYPES = {MATERIAL_TYPE, CHARACTERISTICS, FACTOR_VALUE, LABEL, COMMENT}
ALL_FIELDS = [
"id",
"type",
"subtype",
"value",
"value_unit",
"value_accession",
"value_source",
"node"
]
NON_ONTOLOGY_FIELDS = [
"id",
"type",
"subtype",
"value",
"value_unit",
"node"
]
node = models.ForeignKey(Node, db_index=True)
type = models.TextField(db_index=True)
# subtype further qualifies the attribute type, e.g. type = factor value
# and subtype = age
subtype = models.TextField(blank=True, null=True, db_index=True)
value = models.TextField(blank=True, null=True, db_index=True)
value_unit = models.TextField(blank=True, null=True)
# if value_unit is not null value is numeric and value_accession and
# value_source refer to value_unit (rather than value)
value_accession = models.TextField(blank=True, null=True)
value_source = models.TextField(blank=True, null=True)
def __unicode__(self):
return (
unicode(self.type) + (
"" if self.subtype is None else " (" + unicode(
self.subtype
) + ")"
) +
" = " +
unicode(self.value)
)
# non-ISA Tab
class AttributeDefinition(models.Model):
study = models.ForeignKey(Study, db_index=True)
assay = models.ForeignKey(Assay, db_index=True, blank=True, null=True)
type = models.TextField(db_index=True)
subtype = models.TextField(blank=True, null=True, db_index=True)
# attribute value to match on
value = models.TextField(blank=True, null=True, db_index=True)
# definition of the attribute value
definition = models.TextField(blank=True, null=True, db_index=True)
value_accession = models.TextField(blank=True, null=True)
value_source = models.TextField(blank=True, null=True)
# non-ISA Tab
class AttributeOrder(models.Model):
study = models.ForeignKey(Study, db_index=True)
assay = models.ForeignKey(Assay, db_index=True, blank=True, null=True)
solr_field = models.TextField(db_index=True)
# position of the attribute in the facet list and table
rank = models.IntegerField(blank=True, null=True)
# should this attribute be exposed to the user? if false the attribute will
# never be shown to non-owner users
is_exposed = models.BooleanField(default=True)
# should this attribute be used as a facet?
is_facet = models.BooleanField(default=True)
# should be shown in the table by default?
is_active = models.BooleanField(default=True)
# is this an internal attribute? (retrieved by solr by never exposed to any
# user)
is_internal = models.BooleanField(default=False)
def __unicode__(self):
return unicode(
self.solr_field +
" [facet = " +
str(self.is_facet) +
" exp = " +
str(self.is_exposed) +
" act = " +
str(self.is_active) +
" int = " + str(self.is_internal) + "] = "
) + str(self.rank)
class AnnotatedNodeRegistry(models.Model):
study = models.ForeignKey(Study)
assay = models.ForeignKey(Assay, blank=True, null=True)
node_type = models.TextField()
creation_date = models.DateTimeField(auto_now_add=True)
modification_date = models.DateTimeField(auto_now=True)
class AnnotatedNode(models.Model):
node = models.ForeignKey(Node, db_index=True)
attribute = models.ForeignKey(Attribute)
study = models.ForeignKey(Study)
assay = models.ForeignKey(Assay, blank=True, null=True)
node_uuid = UUIDField()
node_file_uuid = UUIDField(blank=True, null=True)
node_type = models.TextField()
node_name = models.TextField()
attribute_type = models.TextField()
# subtype further qualifies the attribute type, e.g. type = factor value
# and subtype = age
attribute_subtype = models.TextField(blank=True, null=True)
attribute_value = models.TextField(blank=True, null=True)
attribute_value_unit = models.TextField(blank=True, null=True)
# genome information
node_species = models.IntegerField(null=True)
node_genome_build = models.TextField(null=True)
node_analysis_uuid = UUIDField(
default=None,
blank=True,
null=True,
auto=False
)
node_subanalysis = models.IntegerField(null=True, blank=False)
node_workflow_output = models.CharField(
null=True,
blank=False,
max_length=100
)
# other information
is_annotation = models.BooleanField(default=False)
def __unicode__(self):
return unicode(self.node_uuid)
def _is_internal_attribute(attribute):
return attribute in ["uuid",
"study_uuid",
"assay_uuid",
"file_uuid",
"type",
"is_annotation",
"species",
"genome_build",
"name",
"analysis_uuid",
"subanalysis",
"output_type"]
def _is_active_attribute(attribute):
return not _is_internal_attribute(attribute)
def _is_exposed_attribute(attribute):
return True # TODO: Could we get rid of this?
def _is_ignored_attribute(attribute):
"""Ignore Django internal Solr fields"""
return attribute in ["django_ct", "django_id", "id"]
def _query_solr(study, assay, attribute=None):
types = ' OR '.join(
'"{0}"'.format(type) for type in Node.FILES
)
url = '{base_url}data_set_manager/select'.format(
base_url=settings.REFINERY_SOLR_BASE_URL
)
params = {
'fq': 'study_uuid:{study_uuid} AND '
'assay_uuid: {assay_uuid} AND '
'is_annotation:false AND '
'type:({types})'.format(
study_uuid=study.uuid,
assay_uuid=assay.uuid,
types=types
),
'q': 'django_ct:data_set_manager.node',
'rows': 1,
'start': 0,
'wt': 'json'
}
if attribute is not None:
params.update({
'facet': 'true',
'facet.field': attribute,
'facet.sort': 'count',
'facet.limit': '-1'
})
# This log tends to be massive and spams the log file. Turn on only when
# needed.
# logger.debug('Query parameters: %s', params)
headers = {'Accept': 'application/json'}
try:
response = requests.get(url, params=params, headers=headers)
response.raise_for_status()
except HTTPError as e:
logger.error(e)
raise
results = response.json()
# This log tends to be massive and spams the log file. Turn on only when
# needed.
# logger.debug('Query results: %s', results)
if results['response']['numFound'] == 0:
raise ValueError('No results.')
return results
def _is_facet_attribute(attribute, study, assay):
"""Tests if a an attribute should be used as a facet by default.
:param attribute: The name of the attribute.
:type attribute: string
:returns: True if the ratio between items in the data set and the number of
facet attribute values is smaller than
settings.DEFAULT_FACET_ATTRIBUTE_VALUES_RATIO, false otherwise.
"""
ratio = 0.5
results = _query_solr(attribute=attribute, study=study, assay=assay)
items = results['response']['numFound']
attribute_values = len(
results['facet_counts']['facet_fields'][attribute]
) / 2
return (attribute_values / items) < ratio
@skip_if_test_run
def initialize_attribute_order(study, assay):
"""Initializes the AttributeOrder table after all nodes for the given study
and assay have been indexed by Solr.
:param study: Study object to query for in AnnotatedNode.
:type study: Study
:param assay: Assay object to query for in AnnotatedNode.
:type assay: Assay
:returns: Number of attributes that were indexed.
"""
results = _query_solr(study=study, assay=assay)
attribute_order_objects = []
for key in results['response']['docs'][0]:
is_facet = _is_facet_attribute(key, study, assay)
is_exposed = _is_exposed_attribute(key)
is_internal = _is_internal_attribute(key)
is_active = _is_active_attribute(key)
if not _is_ignored_attribute(key):
attribute_order_objects.append(
AttributeOrder(
study=study,
assay=assay,
solr_field=key,
rank=0,
is_facet=is_facet,
is_exposed=is_exposed,
is_internal=is_internal,
is_active=is_active
)
)
# insert AttributeOrder objects into database
AttributeOrder.objects.bulk_create(attribute_order_objects)
return len(attribute_order_objects)
class ProtocolReference(models.Model):
node = models.ForeignKey(Node)
protocol = models.ForeignKey(Protocol)
performer = models.TextField(blank=True, null=True)
# performer_uuid can be used to associate the execution with a user account
performer_uuid = UUIDField(blank=True, null=True)
date = models.DateField(blank=True, null=True)
comment = models.TextField(blank=True, null=True)
def __unicode__(self):
return unicode(self.protocol) + " (reference)"
class ProtocolReferenceParameter(models.Model):
protocol_reference = models.ForeignKey(ProtocolReference)
name = models.TextField(blank=True, null=True)
value = models.TextField(blank=True, null=True)
value_unit = models.TextField(blank=True, null=True)
# if value_unit is not null value is numeric and value_accession and
# value_source refer to value_unit (rather than value)
value_accession = models.TextField(blank=True, null=True)
value_source = models.TextField(blank=True, null=True)
def __unicode__(self):
return unicode(self.name) + " = " + unicode(self.value)