diff --git a/drop-boxes/register-omero-metadata/image_registration_process.py b/drop-boxes/register-omero-metadata/image_registration_process.py index 9f08abfc..5dfd8c8f 100644 --- a/drop-boxes/register-omero-metadata/image_registration_process.py +++ b/drop-boxes/register-omero-metadata/image_registration_process.py @@ -14,6 +14,7 @@ def __init__(self, transaction, env_name="omero_env_0", project_code="", sample_ self._transaction = transaction self._incoming_file_name = transaction.getIncoming().getName() + self._search_service = transaction.getSearchService() self._project_code = project_code self._sample_code = sample_code @@ -43,7 +44,17 @@ def fetchOpenBisSampleCode(self): raise SampleCodeError(self._sample_code, "The sample code seems to be invalid, the checksum could not be confirmed.") return self._project_code, self._sample_code - + + def searchOpenBisSample(self, sample_code): + #find specific sample + sc = SearchCriteria() + sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sample_code)) + foundSamples = self._search_service.searchForSamples(sc) + if len(foundSamples) == 0: + raise SampleNotFoundError(sample_code, "Sample could not be found in openBIS.") + sample = foundSamples[0] + return sample + def _isValidSampleCode(self, sample_code): try: id = sample_code[0:9] @@ -143,4 +154,14 @@ def __init__(self, sample_code, message): def test(self): pass +class SampleNotFoundError(Exception): + + def __init__(self, sample_code, message): + self.sample_code = sample_code + self.message = message + super().__init__(self.message) + + def test(self): + pass + diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 6243f8ef..6c3b7987 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -11,6 +11,7 @@ #import sample_tracking_helper_qbic as thelper import checksum +import datetime import re import os import urllib @@ -55,13 +56,17 @@ # and delete the data! ##### +INCOMING_DATE_FORMAT = '%d.%m.%Y' +OPENBIS_DATE_FORMAT = '%Y-%m-%d' -def createNewImagingExperiment(tr, space, project, properties): +def mapDateString(date_string): + return datetime.datetime.strptime(date_string, INCOMING_DATE_FORMAT).strftime(OPENBIS_DATE_FORMAT) + +def createNewImagingExperiment(tr, space, project, properties, existing_ids): IMAGING_EXP_TYPE = "Q_BMI_GENERIC_IMAGING" - MODALITY_CODE = "Q_BMI_MODALITY" search_service = tr.getSearchService() + experiment_property_map = {"IMAGING_MODALITY":"Q_BMI_MODALITY", "IMAGING_DATE":"Q_MEASUREMENT_FINISH_DATE", "INSTRUMENT_USER":"Q_INSTRUMENT_USER"} - existing_ids = [] existing_exps = search_service.listExperiments("/" + space + "/" + project) for exp in existing_exps: existing_ids.append(exp.getExperimentIdentifier()) @@ -71,52 +76,60 @@ def createNewImagingExperiment(tr, space, project, properties): i += 1 exp_num = len(existing_exps) + i exp_id = '/' + space + '/' + project + '/' + project + 'E' + str(exp_num) - exp = tr.createNewExperiment(exp_id, IMAGING_EXP_TYPE) - for key in properties.keys(): - exp.setPropertyValue(key, properties[key]) - return exp - -def createNewImagingRun(tr, base_sample, exp, omero_link, run_offset): + img_exp = tr.createNewExperiment(exp_id, IMAGING_EXP_TYPE) + existing_ids.append(exp_id) + for incoming_label in experiment_property_map: + if incoming_label in properties: + key = experiment_property_map[incoming_label] + value = properties[incoming_label] + if key == "Q_MEASUREMENT_FINISH_DATE": + value = mapDateString(value) + img_exp.setPropertyValue(key, value) + return img_exp + +def createNewImagingRun(tr, base_sample, exp, omero_image_ids, run_offset, properties): IMG_RUN_PREFIX = "IMG" IMG_RUN_TYPE = "Q_BMI_GENERIC_IMAGING_RUN" - IMG_RUN_OMERO_PROPERTY_CODE = "Q_ADDITIONAL_INFO" - # TODO: can we use a prefix for imaging samples? - # otherwise creating new samples will be more complex - # on the other hand, replicates need to be numbered if we use IMG, e.g IMG1QABCD001AB - # IMG2QABCD001AB etc. - # talk to GG and LK + IMG_RUN_OMERO_PROPERTY_CODE = "Q_OMERO_IDS" + sample_property_map = {}#no specific properties from the metadata file yet + run = 0 exists = True new_sample_id = None + # respect samples already in openbis while exists: run += 1 new_sample_id = '/' + base_sample.getSpace() + '/' + IMG_RUN_PREFIX + str(run) + base_sample.getCode() exists = tr.getSampleForUpdate(new_sample_id) + # add additional offset for samples registered in this call of the ETL script, but before this sample new_sample_id_with_offset = '/' + base_sample.getSpace() + '/' + IMG_RUN_PREFIX + str(run+run_offset) + base_sample.getCode() img_run = tr.createNewSample(new_sample_id_with_offset, IMG_RUN_TYPE) - img_run.setParentSampleIdentifiers([base_sample.getSampleIdentifier]) + img_run.setParentSampleIdentifiers([base_sample.getSampleIdentifier()]) img_run.setExperiment(exp) - img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, omero_link) + img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, str(omero_image_ids)) + for incoming_label in sample_property_map: + if incoming_label in properties: + key = sample_property_map[incoming_label] + value = properties[incoming_label] + img_run.setPropertyValue(key, value) return img_run -#TODO Luis -def callOmeroWithFilePath(file_path, sample_barcode): - list_of_omero_ids = ["1","2","3"] - return list_of_omero_ids - def getFileFromLine(line): return line.split("\t")[0] def isSameExperimentMetadata(props1, props2): """dependent on metadata dictionaries of two different files (data model), decide if new openBIS experiment needs to be created - might be replaced by specific metadata properties, once we know more """ - # initilization of tsv parser, always results in new experiment - if not props1 or not props2: - return False - else: - return True - + relevantPropertyNames = ["IMAGING_MODALITY", "IMAGED_TISSUE", "INSTRUMENT_MANUFACTURER", "INSTRUMENT_USER", "IMAGING_DATE"] + for label in relevantPropertyNames: + if label in props1 and label in props2: + if props1[label] != props2[label]: + return False + if label in props1 and not label in props2: + return False + if label in props2 and not label in props1: + return False + return True def registerImageInOpenBIS(transaction): search_service = transaction.getSearchService() @@ -179,6 +192,7 @@ def printPropertyMap(property_map): def process(transaction): + print "start transaction" """The main entry point. openBIS calls this method, when an incoming transaction is registered. @@ -190,23 +204,53 @@ def process(transaction): # Get the incoming path of the transaction incomingPath = transaction.getIncoming().getAbsolutePath() + print incomingPath + # 1. Initialize the image registration process registrationProcess = irp.ImageRegistrationProcess(transaction) + + print "started reg process" # 2. We want to get the openBIS sample code from the incoming data # This tells us to which biological sample the image data was aquired from. project_code, sample_code = registrationProcess.fetchOpenBisSampleCode() + print project_code + print sample_code + + #find specific sample + tissueSample = registrationProcess.searchOpenBisSample(sample_code) + space = tissueSample.getSpace() + + print tissueSample + print space # 3. We now request the associated omero dataset id for the openBIS sample code. # Each dataset in OMERO contains the associated openBIS biological sample id, which # happened during the experimental design registration with the projectwizard. + + print "calling omero" omero_dataset_id = registrationProcess.requestOmeroDatasetId(project_code=project_code, sample_code=sample_code) + print omero_dataset_id + # Find and parse metadata file content metadataFile = findMetaDataFile(incomingPath) + print metadataFile + property_names = getPropertyNames(metadataFile) - + + print "property names:" + print property_names + + #keep track of number of images for openBIS ID + image_number = 0 + #Initialize openBIS imaging experiment + imagingExperiment = None + previousProps = {} + existing_experiment_ids = [] + + print "start reading metadata file" # Iterate over the metadata entries containing all pre-specified imaging metadata for line in metadataFile[1:]: # (Exclude header) # Get modality and other metadata from tsv here for one sample @@ -223,6 +267,10 @@ def process(transaction): omero_image_ids = registrationProcess.registerImageFileInOmero(imageFile, omero_dataset_id) print "Created OMERO image identifiers:\t" + str(omero_image_ids) + omero_failed = len(omero_image_ids) < 1 + if omero_failed: + raise ValueError("Omero did not return expected image ids.") + # 5. Additional metadata is provided in an own metadata TSV file. # We extract the metadata from this file. #registrationProcess.extractMetadataFromTSV() @@ -242,27 +290,20 @@ def process(transaction): # imaging experiment itself, such as modality, imaged tissue and more. # We also want to connect this data with the previously created, corresponding OMERO image id t # hat represents the result of this experiment in OMERO. - #registrationProcess.registerExperimentDataInOpenBIS(omero_image_ids) - - # 7. Last but not least we create the open science file format for images which is - # OMERO-Tiff and store it in OMERO next to the proprierary vendor format. - #registrationProcess.triggerOMETiffConversion() - - #################### + #registrationProcess.registerExperimentDataInOpenBIS(omero_image_ids) # I did it myyy wayyyy - # TODO decide if new experiment is needed based on some pre-defined criteria. + # we decide if new experiment is needed based on some pre-defined criteria. # Normally, the most important criterium is collision of experiment type properties # between samples. E.g. two different imaging modalities need two experiments. - #fileBelongsToExistingExperiment = isSameExperimentMetadata(previousProps, properties) - #previousProps = properties - #if(not fileBelongsToExistingExperiment): - # exp = createNewImagingExperiment(transaction, space, project_code, properties) - #imagingSample = createNewImagingRun(transaction, sa, exp, list_of_omero_ids, offset)# maybe there are sample properties, too! - # register the actual data - #IMAGING_DATASET_CODE = Q_BMI_GENERIC_IMAGING_DATA # I guess - #dataset = transaction.createNewDataSet(IMAGING_DATASET_CODE) - #dataset.setSample(imagingSample) - #transaction.moveFile(imageFile, dataset) - # increment id offset for next sample in this loop - not sure anymore if this is needed - \ No newline at end of file + fileBelongsToExistingExperiment = isSameExperimentMetadata(previousProps, properties) + previousProps = properties + if(not fileBelongsToExistingExperiment): + imagingExperiment = createNewImagingExperiment(transaction, space, project_code, properties, existing_experiment_ids) + imagingSample = createNewImagingRun(transaction, tissueSample, imagingExperiment, omero_image_ids, image_number, properties) + # increment id offset for next sample in this loop + image_number += 1 + + # 7. Last but not least we create the open science file format for images which is + # OMERO-Tiff and store it in OMERO next to the proprierary vendor format. + #registrationProcess.triggerOMETiffConversion() \ No newline at end of file