Skip to content

Commit

Permalink
Merge pull request #282 from mskcc/release/1.4.0
Browse files Browse the repository at this point in the history
Release/1.4.0
  • Loading branch information
allanbolipata committed May 13, 2020
2 parents afab536 + 2969db9 commit c06d505
Show file tree
Hide file tree
Showing 53 changed files with 1,530 additions and 805 deletions.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,9 @@ runserver: check-env
MIGRATION_ARGS?=
migrate: check-env
python manage.py migrate $(MIGRATION_ARGS)
shell : check-env
python manage.py shell_plus --notebook


dumpdata: check-env
python manage.py dumpdata
Expand Down
2 changes: 1 addition & 1 deletion beagle/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
VERSION="1.3.0"
__version__="1.4.0"
7 changes: 5 additions & 2 deletions beagle/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True

ALLOWED_HOSTS = ['silo', 'localhost']
ALLOWED_HOSTS = os.environ.get('BEAGLE_ALLOWED_HOSTS', 'localhost').split(',')

CORS_ORIGIN_ALLOW_ALL = True

Expand All @@ -50,10 +50,11 @@
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'django_extensions',
'rest_framework',
'corsheaders',
'drf_multiple_model',
'rest_framework_swagger'
'drf_yasg'
]


Expand Down Expand Up @@ -250,6 +251,8 @@
# https://docs.djangoproject.com/en/2.2/howto/static-files/

STATIC_URL = '/static/'
LOGIN_URL='/admin/login/'
LOGOUT_URL='/admin/logout/'

RABIX_URL = os.environ.get('BEAGLE_RABIX_URL')
RABIX_PATH = os.environ.get('BEAGLE_RABIX_PATH')
Expand Down
22 changes: 15 additions & 7 deletions beagle/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,24 @@
TokenVerifyView,
)
from core.views import BeagleTokenObtainPairView


from rest_framework_swagger.views import get_swagger_view


schema_view = get_swagger_view(title='Beagle API')
from drf_yasg.views import get_schema_view
from drf_yasg import openapi
from rest_framework import permissions
from beagle import __version__

schema_view = get_schema_view(
openapi.Info(
title="Beagle API",
default_version=__version__
),
public=True,
permission_classes=(permissions.AllowAny,),
)


urlpatterns = [
url(r'^$', schema_view),
url(r'^$', schema_view.with_ui('swagger', cache_timeout=0), name='schema-swagger-ui'),
url(r'^swagger(?P<format>\.json|\.yaml)$', schema_view.without_ui(cache_timeout=0), name='schema-json'),
path('v0/fs/', include('file_system.urls')),
path('v0/run/', include('runner.urls')),
path('v0/etl/', include('beagle_etl.urls')),
Expand Down
35 changes: 32 additions & 3 deletions beagle_etl/admin.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import os
from django.contrib import admin
from django.contrib.admin import ModelAdmin, SimpleListFilter
from django.utils.safestring import mark_safe
from .models import Job, JobStatus, Operator
from lib.admin import pretty_python_exception, pretty_json
Expand All @@ -18,9 +20,26 @@ def restart(modeladmin, request, queryset):

restart.short_description = "Restart"

class RecipeFilter(SimpleListFilter):
title = 'Recipe'
parameter_name = 'recipe'

class JobAdmin(admin.ModelAdmin):
list_display = ('id', 'run', 'retry_count',
def lookups(self, request, model_admin):
options = set()

for o in Operator.objects.values("recipes"):
for recipe in o["recipes"]:
options.add((recipe, recipe))
return options

def queryset(self, request, queryset):
if self.value():
return queryset.filter(args__request_metadata__recipe=self.value())
return queryset


class JobAdmin(ModelAdmin):
list_display = ('id', 'get_short_run', 'retry_count',
pretty_json('args'),
'children',
'status',
Expand All @@ -31,8 +50,18 @@ class JobAdmin(admin.ModelAdmin):
readonly_fields = ('message',)
actions = (restart,)
ordering = ('-created_date',)
list_filter = (RecipeFilter, )

def get_short_run(self, obj):
if obj.run:
(_, run) = os.path.splitext(obj.run)
return mark_safe("<span title='%s'>%s</span>" % (obj.run, run[1:]))
else:
return '--'

get_short_run.short_description = 'Run'

class OperatorAdmin(admin.ModelAdmin):
class OperatorAdmin(ModelAdmin):
list_display = ('id', 'class_name', 'recipes', 'active')


Expand Down
41 changes: 23 additions & 18 deletions beagle_etl/jobs/lims_etl_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
from django.conf import settings
from django.db.models import Prefetch
from notifier.models import JobGroup
from notifier.events import ETLSetRecipeEvent, OperatorRequestEvent, SetCIReviewEvent
from notifier.events import ETLSetRecipeEvent, OperatorRequestEvent, SetCIReviewEvent, SetLabelEvent
from notifier.tasks import send_notification, notifier_start
from beagle_etl.models import JobStatus, Job, Operator
from file_system.serializers import UpdateFileSerializer
from file_system.exceptions import MetadataValidationException
from file_system.repository.file_repository import FileRepository
from file_system.models import File, FileGroup, FileMetadata, FileType
from file_system.metadata.validator import MetadataValidator, METADATA_SCHEMA
from beagle_etl.exceptions import FailedToFetchFilesException, FailedToSubmitToOperatorException
Expand Down Expand Up @@ -74,12 +75,7 @@ def request_callback(request_id, job_group=None):
except JobGroup.DoesNotExist:
logger.debug("[RequestCallback] JobGroup not set")
job_group_id = str(jg.id) if jg else None
queryset = File.objects.prefetch_related(Prefetch('filemetadata_set',
queryset=FileMetadata.objects.select_related('file').order_by(
'-created_date'))).order_by('file_name').filter(
filemetadata__metadata__requestId=request_id)
ret_str = 'filemetadata__metadata__recipe'
recipes = queryset.values_list(ret_str, flat=True).order_by(ret_str).distinct(ret_str)
recipes = FileRepository.filter(metadata={'requestId': request_id}, ret='recipe')
if not recipes:
raise FailedToSubmitToOperatorException(
"Not enough metadata to choose the operator for requestId:%s" % request_id)
Expand All @@ -98,6 +94,8 @@ def request_callback(request_id, job_group=None):
logger.info("Submitting request_id %s to %s operator" % (request_id, operator.class_name))
e = OperatorRequestEvent(job_group_id, "Operator %s inactive" % operator.class_name).to_dict()
send_notification.delay(e)
error_label = SetLabelEvent(job_group_id, 'operator_inactive').to_dict()
send_notification.delay(error_label)
ci_review_e = SetCIReviewEvent(job_group_id).to_dict()
send_notification.delay(ci_review_e)
raise FailedToSubmitToOperatorException("Operator %s not active: %s" % operator.class_name)
Expand Down Expand Up @@ -235,7 +233,7 @@ def create_pooled_normal(filepath, file_group_id):
- flowCellId = HCYYWBBXY
- [A|B] might be the flowcell bay the flowcell is placed into
"""
if File.objects.filter(path=filepath):
if FileRepository.filter(path=filepath):
logger.info("Pooled normal already created filepath")
file_group_obj = FileGroup.objects.get(id=file_group_id)
file_type_obj = FileType.objects.filter(name='fastq').first()
Expand Down Expand Up @@ -322,22 +320,24 @@ def fetch_sample_metadata(sample_id, igocomplete, request_id, request_metadata):
invalid_number_of_fastq = True
failed_runs.append(run['runId'])
else:
file_search = File.objects.filter(path=fastqs[0]).first()
file_search = FileRepository.filter(path=fastqs[0]).first()
if not file_search:
create_file(fastqs[0], request_id, settings.IMPORT_FILE_GROUP, 'fastq', igocomplete, data, library, run,
request_metadata, R1_or_R2(fastqs[0]))
else:
logger.error("File %s already created with id:%s" % (file_search.path, str(file_search.id)))
logger.error(
"File %s already created with id:%s" % (file_search.file.path, str(file_search.file.id)))
conflict = True
conflict_files.append((file_search.path, str(file_search.id)))
file_search = File.objects.filter(path=fastqs[1]).first()
conflict_files.append((file_search.file.path, str(file_search.file.id)))
file_search = FileRepository.filter(path=fastqs[1]).first()
if not file_search:
create_file(fastqs[1], request_id, settings.IMPORT_FILE_GROUP, 'fastq', igocomplete, data, library, run,
request_metadata, R1_or_R2(fastqs[1]))
else:
logger.error("File %s already created with id:%s" % (file_search.path, str(file_search.id)))
logger.error(
"File %s already created with id:%s" % (file_search.file.path, str(file_search.file.id)))
conflict = True
conflict_files.append((file_search.path, str(file_search.id)))
conflict_files.append((file_search.file.path, str(file_search.file.id)))
if conflict:
raise FailedToFetchFilesException(
"Files %s already exists" % ' '.join(['%s with id: %s' % (cf[0], cf[1]) for cf in conflict_files]))
Expand Down Expand Up @@ -398,9 +398,11 @@ def create_file(path, request_id, file_group_id, file_type, igocomplete, data, l
sample_id = metadata.pop('igoId', None)
patient_id = metadata.pop('cmoPatientId', None)
sample_class = metadata.pop('cmoSampleClass', None)
specimen_type = metadata.pop('specimenType', None)
metadata['specimenType'] = specimen_type
metadata['requestId'] = request_id
metadata['sampleName'] = sample_name
metadata['cmoSampleName'] = format_sample_name(sample_name)
metadata['cmoSampleName'] = format_sample_name(sample_name, specimen_type)
metadata['externalSampleId'] = external_sample_name
metadata['sampleId'] = sample_id
metadata['patientId'] = patient_id
Expand Down Expand Up @@ -511,15 +513,17 @@ def update_sample_metadata(sample_id, igocomplete, request_id, request_metadata)
missing_fastq = True
failed_runs.append(run['runId'])
else:
file_search = File.objects.filter(path=fastqs[0]).first()
f = FileRepository.filter(path=fastqs[0]).first()
file_search = f.file
if file_search:
update_file_metadata(fastqs[0], request_id, igocomplete, data, library, run, request_metadata,
R1_or_R2(fastqs[0]))
else:
logger.error("File %s missing" % file_search.path)
missing = True
missing_files.append((file_search.path, str(file_search.id)))
file_search = File.objects.filter(path=fastqs[1]).first()
f = FileRepository.filter(path=fastqs[1]).first()
file_search = f.file
if file_search:
update_file_metadata(fastqs[1], request_id, igocomplete, data, library, run, request_metadata,
R1_or_R2(fastqs[1]))
Expand Down Expand Up @@ -557,7 +561,8 @@ def update_file_metadata(path, request_id, igocomplete, data, library, run, requ
metadata[k] = v
for k, v in request_metadata.items():
metadata[k] = v
file_search = File.objects.filter(path=path).first()
f = FileRepository.filter(path=path).first()
file_search = f.file
if not file_search:
raise FailedToFetchFilesException("Failed to find file %s." % (path))
data = {
Expand Down
28 changes: 2 additions & 26 deletions beagle_etl/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,11 @@
import traceback
from celery import shared_task
from django.db import transaction
from django.db.models import Prefetch
from beagle_etl.models import JobStatus, Job
from beagle_etl.jobs.lims_etl_jobs import TYPES
from file_system.models import File, FileMetadata
from notifier.tasks import send_notification
from notifier.helper import generate_sample_data_content
from notifier.events import ETLImportEvent, ETLJobsLinksEvent, SetCIReviewEvent, UploadAttachmentEvent
# TODO: Consider moving `format_sample_name` to some other place
from runner.operator.roslin_operator.bin.make_sample import format_sample_name


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -167,28 +164,7 @@ def _generate_ticket_decription(self):
send_notification.delay(etl_e)

def _generate_sample_data_file(self):
result = "SAMPLE_ID\tPATIENT_ID\tCOLLAB_ID\tSAMPLE_TYPE\tGENE_PANEL\tONCOTREE_CODE\tSAMPLE_CLASS\tSPECIMEN_PRESERVATION_TYPE\tSEX\tTISSUE_SITE\tIGO_ID\n"
ret_str = 'filemetadata__metadata__sampleId'
samples = File.objects.prefetch_related(Prefetch('filemetadata_set',
queryset=FileMetadata.objects.select_related('file').order_by(
'-created_date'))).filter(
filemetadata__metadata__requestId=self.job.args['request_id']).order_by(ret_str).distinct(ret_str).all()
for sample in samples:
metadata = sample.filemetadata_set.first().metadata
print(metadata)
result += '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n'.format(
metadata.get('cmoSampleName', format_sample_name(metadata['sampleName'])),
metadata['patientId'],
metadata['investigatorSampleId'],
metadata['sampleClass'],
metadata['recipe'],
metadata['oncoTreeCode'],
metadata['specimenType'],
metadata['preservation'],
metadata['sex'],
metadata['tissueLocation'],
metadata['sampleId']
)
result = generate_sample_data_content(self.job.args['request_id'])
e = UploadAttachmentEvent(str(self.job.job_group.id),
'%s_sample_data_clinical.txt' % self.job.args['request_id'],
result).to_dict()
Expand Down
17 changes: 9 additions & 8 deletions beagle_etl/tests/jobs/test_lims_etl_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@ def test_fetch_samples1(self):
self.assertTrue(len(files) == 22)
self.assertTrue(len(files_metadata) == 22)

import_files = File.objects.filter(file_group = settings.IMPORT_FILE_GROUP)
import_files_metadata = FileMetadata.objects.filter(file__in = [ i.id for i in import_files ])
pooled_normal_files = File.objects.filter(file_group = settings.POOLED_NORMAL_FILE_GROUP)
pooled_normal_files_metadata = FileMetadata.objects.filter(file__in = [ i.id for i in pooled_normal_files ])
import_files = File.objects.filter(file_group=settings.IMPORT_FILE_GROUP)
import_files_metadata = FileMetadata.objects.filter(file__in=[i.id for i in import_files])
pooled_normal_files = File.objects.filter(file_group=settings.POOLED_NORMAL_FILE_GROUP)
pooled_normal_files_metadata = FileMetadata.objects.filter(file__in=[i.id for i in pooled_normal_files])
self.assertTrue(len(import_files) == 10)
self.assertTrue(len(import_files_metadata) == 10)
self.assertTrue(len(pooled_normal_files) == 12)
Expand Down Expand Up @@ -114,8 +114,8 @@ def test_create_pooled_normal1(self):
self.assertTrue(len(files) == 1)
self.assertTrue(len(files_metadata) == 1)

imported_file = File.objects.get(path = filepath)
imported_file_metadata = FileMetadata.objects.get(file = imported_file)
imported_file = File.objects.get(path=filepath)
imported_file_metadata = FileMetadata.objects.get(file=imported_file)
self.assertTrue(imported_file_metadata.metadata['preservation'] == 'FFPE')
self.assertTrue(imported_file_metadata.metadata['recipe'] == 'IMPACT468')
self.assertTrue(imported_file_metadata.metadata['runId'] == 'JAX_0397')
Expand All @@ -128,12 +128,13 @@ def test_create_pooled_normal2(self):
filepath = "/ifs/archive/GCL/hiseq/FASTQ/PITT_0439_BHFTCNBBXY/Project_POOLEDNORMALS/Sample_FROZENPOOLEDNORMAL_IGO_IMPACT468_CTAACTCG/FROZENPOOLEDNORMAL_IGO_IMPACT468_CTAACTCG_S7_R2_001.fastq.gz"
file_group_id = str(settings.POOLED_NORMAL_FILE_GROUP)
create_pooled_normal(filepath, file_group_id)
imported_file = File.objects.get(path = filepath)
imported_file_metadata = FileMetadata.objects.get(file = imported_file)
imported_file = File.objects.get(path=filepath)
imported_file_metadata = FileMetadata.objects.get(file=imported_file)
self.assertTrue(imported_file_metadata.metadata['preservation'] == 'FROZEN')
self.assertTrue(imported_file_metadata.metadata['recipe'] == 'IMPACT468')
self.assertTrue(imported_file_metadata.metadata['runId'] == 'PITT_0439')


class TestGetRunID(TestCase):
def test_true(self):
self.assertTrue(True)
Expand Down
2 changes: 1 addition & 1 deletion file_system/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


class FileAdmin(admin.ModelAdmin):
list_display = ('id', 'file_name', 'size')
list_display = ('id', 'file_name', 'file_group', 'size')
search_fields = ['file_name']


Expand Down
8 changes: 8 additions & 0 deletions file_system/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,10 @@
class MetadataValidationException(Exception):
pass


class FileNotFoundException(Exception):
pass


class InvalidQueryException(Exception):
pass
1 change: 1 addition & 0 deletions file_system/repository/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .file_repository import FileRepository
Loading

0 comments on commit c06d505

Please sign in to comment.