From ecfd8219eb834fff94f74c5c2e2ed1b82d720c14 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 2 Mar 2021 13:30:58 +0100 Subject: [PATCH 1/9] add functionality to create openbis objects with metadata --- .../image_registration_process.py | 22 ++++- .../register-omero-metadata/register-omero.py | 95 +++++++++++-------- 2 files changed, 74 insertions(+), 43 deletions(-) diff --git a/drop-boxes/register-omero-metadata/image_registration_process.py b/drop-boxes/register-omero-metadata/image_registration_process.py index 9f08abfc..7287f0d8 100644 --- a/drop-boxes/register-omero-metadata/image_registration_process.py +++ b/drop-boxes/register-omero-metadata/image_registration_process.py @@ -43,7 +43,17 @@ def fetchOpenBisSampleCode(self): raise SampleCodeError(self._sample_code, "The sample code seems to be invalid, the checksum could not be confirmed.") return self._project_code, self._sample_code - + + def searchOpenBisSample(sample_code): + #find specific sample + sc = SearchCriteria() + sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sample_code)) + foundSamples = search_service.searchForSamples(sc) + if len(foundSamples) == 0: + raise SampleNotFoundError(sample_code, "Sample could not be found in openBIS.") + sample = foundSamples[0] + return sample + def _isValidSampleCode(self, sample_code): try: id = sample_code[0:9] @@ -143,4 +153,14 @@ def __init__(self, sample_code, message): def test(self): pass +class SampleNotFoundError(Exception): + + def __init__(self, sample_code, message): + self.sample_code = sample_code + self.message = message + super().__init__(self.message) + + def test(self): + pass + diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 6243f8ef..488c17e0 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -58,8 +58,8 @@ def createNewImagingExperiment(tr, space, project, properties): IMAGING_EXP_TYPE = "Q_BMI_GENERIC_IMAGING" - MODALITY_CODE = "Q_BMI_MODALITY" search_service = tr.getSearchService() + experiment_property_map = {"IMAGING_MODALITY":"Q_BMI_MODALITY", "CAMERA_ACQUISITION_TIME":"Q_MEASUREMENT_FINISH_DATE", "INSTRUMENT_USER":"Q_INSTRUMENT_USER"} existing_ids = [] existing_exps = search_service.listExperiments("/" + space + "/" + project) @@ -71,32 +71,39 @@ def createNewImagingExperiment(tr, space, project, properties): i += 1 exp_num = len(existing_exps) + i exp_id = '/' + space + '/' + project + '/' + project + 'E' + str(exp_num) - exp = tr.createNewExperiment(exp_id, IMAGING_EXP_TYPE) - for key in properties.keys(): - exp.setPropertyValue(key, properties[key]) - return exp - -def createNewImagingRun(tr, base_sample, exp, omero_link, run_offset): + img_exp = tr.createNewExperiment(exp_id, IMAGING_EXP_TYPE) + for incoming_label in experiment_property_map: + if incoming_label in properties: + key = experiment_property_map[incoming_label] + value = properties[incoming_label] + img_exp.setPropertyValue(key, value) + return img_exp + +def createNewImagingRun(tr, base_sample, exp, omero_image_ids, run_offset, properties): IMG_RUN_PREFIX = "IMG" IMG_RUN_TYPE = "Q_BMI_GENERIC_IMAGING_RUN" - IMG_RUN_OMERO_PROPERTY_CODE = "Q_ADDITIONAL_INFO" - # TODO: can we use a prefix for imaging samples? - # otherwise creating new samples will be more complex - # on the other hand, replicates need to be numbered if we use IMG, e.g IMG1QABCD001AB - # IMG2QABCD001AB etc. - # talk to GG and LK + IMG_RUN_OMERO_PROPERTY_CODE = "Q_OMERO_IDS" + sample_property_map = {}#no specific properties from the metadata file yet + run = 0 exists = True new_sample_id = None + # respect samples already in openbis while exists: run += 1 new_sample_id = '/' + base_sample.getSpace() + '/' + IMG_RUN_PREFIX + str(run) + base_sample.getCode() exists = tr.getSampleForUpdate(new_sample_id) + # add additional offset for samples registered in this call of the ETL script, but before this sample new_sample_id_with_offset = '/' + base_sample.getSpace() + '/' + IMG_RUN_PREFIX + str(run+run_offset) + base_sample.getCode() img_run = tr.createNewSample(new_sample_id_with_offset, IMG_RUN_TYPE) img_run.setParentSampleIdentifiers([base_sample.getSampleIdentifier]) img_run.setExperiment(exp) - img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, omero_link) + img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, omero_image_ids) + for incoming_label in sample_property_map: + if incoming_label in properties: + key = sample_property_map[incoming_label] + value = properties[incoming_label] + img_run.setPropertyValue(key, value) return img_run #TODO Luis @@ -109,14 +116,17 @@ def getFileFromLine(line): def isSameExperimentMetadata(props1, props2): """dependent on metadata dictionaries of two different files (data model), decide if new openBIS experiment needs to be created - might be replaced by specific metadata properties, once we know more """ - # initilization of tsv parser, always results in new experiment - if not props1 or not props2: - return False - else: - return True - + relevantPropertyNames = ["IMAGING_MODALITY", "IMAGED_TISSUE", "INSTRUMENT_MANUFACTURER", "INSTRUMENT_USER", "IMAGING_DATE"] + for label in relevantPropertyNames: + if label in props1 and label in props2: + if props1[label] != props2[label]: + return False + if label in props1 and not label in props2: + return False + if label in props2 and not label in props1: + return False + return True def registerImageInOpenBIS(transaction): search_service = transaction.getSearchService() @@ -197,6 +207,9 @@ def process(transaction): # This tells us to which biological sample the image data was aquired from. project_code, sample_code = registrationProcess.fetchOpenBisSampleCode() + #find specific sample + tissueSample = registrationProcess.searchOpenBisSample(sample_code) + space = tissueSample.getSpace() # 3. We now request the associated omero dataset id for the openBIS sample code. # Each dataset in OMERO contains the associated openBIS biological sample id, which # happened during the experimental design registration with the projectwizard. @@ -206,7 +219,12 @@ def process(transaction): metadataFile = findMetaDataFile(incomingPath) property_names = getPropertyNames(metadataFile) - + + #keep track of number of images for openBIS ID + image_number = 0 + #Initialize openBIS imaging experiment + imagingExperiment = None + previousProps = {} # Iterate over the metadata entries containing all pre-specified imaging metadata for line in metadataFile[1:]: # (Exclude header) # Get modality and other metadata from tsv here for one sample @@ -242,27 +260,20 @@ def process(transaction): # imaging experiment itself, such as modality, imaged tissue and more. # We also want to connect this data with the previously created, corresponding OMERO image id t # hat represents the result of this experiment in OMERO. - #registrationProcess.registerExperimentDataInOpenBIS(omero_image_ids) - - # 7. Last but not least we create the open science file format for images which is - # OMERO-Tiff and store it in OMERO next to the proprierary vendor format. - #registrationProcess.triggerOMETiffConversion() - - #################### + #registrationProcess.registerExperimentDataInOpenBIS(omero_image_ids) # I did it myyy wayyyy - # TODO decide if new experiment is needed based on some pre-defined criteria. + # we decide if new experiment is needed based on some pre-defined criteria. # Normally, the most important criterium is collision of experiment type properties # between samples. E.g. two different imaging modalities need two experiments. - #fileBelongsToExistingExperiment = isSameExperimentMetadata(previousProps, properties) - #previousProps = properties - #if(not fileBelongsToExistingExperiment): - # exp = createNewImagingExperiment(transaction, space, project_code, properties) - #imagingSample = createNewImagingRun(transaction, sa, exp, list_of_omero_ids, offset)# maybe there are sample properties, too! - # register the actual data - #IMAGING_DATASET_CODE = Q_BMI_GENERIC_IMAGING_DATA # I guess - #dataset = transaction.createNewDataSet(IMAGING_DATASET_CODE) - #dataset.setSample(imagingSample) - #transaction.moveFile(imageFile, dataset) - # increment id offset for next sample in this loop - not sure anymore if this is needed - \ No newline at end of file + fileBelongsToExistingExperiment = isSameExperimentMetadata(previousProps, properties) + previousProps = properties + if(not fileBelongsToExistingExperiment): + imagingExperiment = createNewImagingExperiment(transaction, space, project_code, properties) + imagingSample = createNewImagingRun(transaction, tissueSample, imagingExperiment, omero_image_ids, image_number, properties) + # increment id offset for next sample in this loop + image_number += 1 + + # 7. Last but not least we create the open science file format for images which is + # OMERO-Tiff and store it in OMERO next to the proprierary vendor format. + #registrationProcess.triggerOMETiffConversion() \ No newline at end of file From d94cf9e7266a3ffade0635a9b737999a8e8fffb6 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 9 Mar 2021 14:27:10 +0100 Subject: [PATCH 2/9] add provisional omero failure handling --- drop-boxes/register-omero-metadata/register-omero.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 488c17e0..c36c107a 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -241,6 +241,10 @@ def process(transaction): omero_image_ids = registrationProcess.registerImageFileInOmero(imageFile, omero_dataset_id) print "Created OMERO image identifiers:\t" + str(omero_image_ids) + omero_failed = len(omero_image_ids) < 1 + if omero_failed: + raise ValueError("Omero did not return expected image ids.") + # 5. Additional metadata is provided in an own metadata TSV file. # We extract the metadata from this file. #registrationProcess.extractMetadataFromTSV() From 65d4164afcb8bbc168b8a4e9c1f65946b06f432d Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Mon, 15 Mar 2021 17:58:51 +0100 Subject: [PATCH 3/9] remove old method, add logging --- .../register-omero-metadata/register-omero.py | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index c36c107a..5ae20073 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -106,11 +106,6 @@ def createNewImagingRun(tr, base_sample, exp, omero_image_ids, run_offset, prope img_run.setPropertyValue(key, value) return img_run -#TODO Luis -def callOmeroWithFilePath(file_path, sample_barcode): - list_of_omero_ids = ["1","2","3"] - return list_of_omero_ids - def getFileFromLine(line): return line.split("\t")[0] @@ -189,6 +184,7 @@ def printPropertyMap(property_map): def process(transaction): + print "start transaction" """The main entry point. openBIS calls this method, when an incoming transaction is registered. @@ -200,31 +196,52 @@ def process(transaction): # Get the incoming path of the transaction incomingPath = transaction.getIncoming().getAbsolutePath() + print incomingPath + # 1. Initialize the image registration process registrationProcess = irp.ImageRegistrationProcess(transaction) + + print "started reg process" # 2. We want to get the openBIS sample code from the incoming data # This tells us to which biological sample the image data was aquired from. project_code, sample_code = registrationProcess.fetchOpenBisSampleCode() + print project_code + print sample_code + #find specific sample tissueSample = registrationProcess.searchOpenBisSample(sample_code) space = tissueSample.getSpace() + + print tissueSample + print space # 3. We now request the associated omero dataset id for the openBIS sample code. # Each dataset in OMERO contains the associated openBIS biological sample id, which # happened during the experimental design registration with the projectwizard. + + print "calling omero" omero_dataset_id = registrationProcess.requestOmeroDatasetId(project_code=project_code, sample_code=sample_code) + print omero_dataset_id + # Find and parse metadata file content metadataFile = findMetaDataFile(incomingPath) + print metadataFile + property_names = getPropertyNames(metadataFile) + print "property names:" + print property_names + #keep track of number of images for openBIS ID image_number = 0 #Initialize openBIS imaging experiment imagingExperiment = None previousProps = {} + + print "start reading metadata file" # Iterate over the metadata entries containing all pre-specified imaging metadata for line in metadataFile[1:]: # (Exclude header) # Get modality and other metadata from tsv here for one sample From 896f66a01337b9e696604fe69d23af7eb742b3db Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Mon, 15 Mar 2021 18:27:08 +0100 Subject: [PATCH 4/9] fix searching for samples --- .../register-omero-metadata/image_registration_process.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drop-boxes/register-omero-metadata/image_registration_process.py b/drop-boxes/register-omero-metadata/image_registration_process.py index 7287f0d8..5dfd8c8f 100644 --- a/drop-boxes/register-omero-metadata/image_registration_process.py +++ b/drop-boxes/register-omero-metadata/image_registration_process.py @@ -14,6 +14,7 @@ def __init__(self, transaction, env_name="omero_env_0", project_code="", sample_ self._transaction = transaction self._incoming_file_name = transaction.getIncoming().getName() + self._search_service = transaction.getSearchService() self._project_code = project_code self._sample_code = sample_code @@ -44,11 +45,11 @@ def fetchOpenBisSampleCode(self): return self._project_code, self._sample_code - def searchOpenBisSample(sample_code): + def searchOpenBisSample(self, sample_code): #find specific sample sc = SearchCriteria() sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sample_code)) - foundSamples = search_service.searchForSamples(sc) + foundSamples = self._search_service.searchForSamples(sc) if len(foundSamples) == 0: raise SampleNotFoundError(sample_code, "Sample could not be found in openBIS.") sample = foundSamples[0] From 60a33d43c2e1e6a24545c80c5db8121fe0bff307 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 16 Mar 2021 12:00:07 +0100 Subject: [PATCH 5/9] several fixes --- drop-boxes/register-omero-metadata/register-omero.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 5ae20073..2877a60f 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -56,12 +56,11 @@ ##### -def createNewImagingExperiment(tr, space, project, properties): +def createNewImagingExperiment(tr, space, project, properties, existing_ids): IMAGING_EXP_TYPE = "Q_BMI_GENERIC_IMAGING" search_service = tr.getSearchService() experiment_property_map = {"IMAGING_MODALITY":"Q_BMI_MODALITY", "CAMERA_ACQUISITION_TIME":"Q_MEASUREMENT_FINISH_DATE", "INSTRUMENT_USER":"Q_INSTRUMENT_USER"} - existing_ids = [] existing_exps = search_service.listExperiments("/" + space + "/" + project) for exp in existing_exps: existing_ids.append(exp.getExperimentIdentifier()) @@ -72,6 +71,7 @@ def createNewImagingExperiment(tr, space, project, properties): exp_num = len(existing_exps) + i exp_id = '/' + space + '/' + project + '/' + project + 'E' + str(exp_num) img_exp = tr.createNewExperiment(exp_id, IMAGING_EXP_TYPE) + existing_ids.append(exp_id) for incoming_label in experiment_property_map: if incoming_label in properties: key = experiment_property_map[incoming_label] @@ -96,7 +96,7 @@ def createNewImagingRun(tr, base_sample, exp, omero_image_ids, run_offset, prope # add additional offset for samples registered in this call of the ETL script, but before this sample new_sample_id_with_offset = '/' + base_sample.getSpace() + '/' + IMG_RUN_PREFIX + str(run+run_offset) + base_sample.getCode() img_run = tr.createNewSample(new_sample_id_with_offset, IMG_RUN_TYPE) - img_run.setParentSampleIdentifiers([base_sample.getSampleIdentifier]) + img_run.setParentSampleIdentifiers([base_sample.getSampleIdentifier()]) img_run.setExperiment(exp) img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, omero_image_ids) for incoming_label in sample_property_map: @@ -240,6 +240,7 @@ def process(transaction): #Initialize openBIS imaging experiment imagingExperiment = None previousProps = {} + existing_experiment_ids = [] print "start reading metadata file" # Iterate over the metadata entries containing all pre-specified imaging metadata @@ -257,6 +258,7 @@ def process(transaction): # in OMERO. We pass the omero dataset id and trigger the image registration process in OMERO. omero_image_ids = registrationProcess.registerImageFileInOmero(imageFile, omero_dataset_id) print "Created OMERO image identifiers:\t" + str(omero_image_ids) + omero_image_ids = [420,42] omero_failed = len(omero_image_ids) < 1 if omero_failed: @@ -290,7 +292,7 @@ def process(transaction): fileBelongsToExistingExperiment = isSameExperimentMetadata(previousProps, properties) previousProps = properties if(not fileBelongsToExistingExperiment): - imagingExperiment = createNewImagingExperiment(transaction, space, project_code, properties) + imagingExperiment = createNewImagingExperiment(transaction, space, project_code, properties, existing_experiment_ids) imagingSample = createNewImagingRun(transaction, tissueSample, imagingExperiment, omero_image_ids, image_number, properties) # increment id offset for next sample in this loop image_number += 1 From ca3cd7839883e336ab0171e2f71c3b31a7bc2156 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 16 Mar 2021 12:01:39 +0100 Subject: [PATCH 6/9] several fixes --- drop-boxes/register-omero-metadata/register-omero.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 2877a60f..252e6c5b 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -98,7 +98,7 @@ def createNewImagingRun(tr, base_sample, exp, omero_image_ids, run_offset, prope img_run = tr.createNewSample(new_sample_id_with_offset, IMG_RUN_TYPE) img_run.setParentSampleIdentifiers([base_sample.getSampleIdentifier()]) img_run.setExperiment(exp) - img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, omero_image_ids) + img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, str(omero_image_ids)) for incoming_label in sample_property_map: if incoming_label in properties: key = sample_property_map[incoming_label] From 97b952692a235720c8387929fedffa487c41277f Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 16 Mar 2021 12:26:11 +0100 Subject: [PATCH 7/9] change metadata mapping for measurement dae --- drop-boxes/register-omero-metadata/register-omero.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 252e6c5b..7eec8ce0 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -59,7 +59,7 @@ def createNewImagingExperiment(tr, space, project, properties, existing_ids): IMAGING_EXP_TYPE = "Q_BMI_GENERIC_IMAGING" search_service = tr.getSearchService() - experiment_property_map = {"IMAGING_MODALITY":"Q_BMI_MODALITY", "CAMERA_ACQUISITION_TIME":"Q_MEASUREMENT_FINISH_DATE", "INSTRUMENT_USER":"Q_INSTRUMENT_USER"} + experiment_property_map = {"IMAGING_MODALITY":"Q_BMI_MODALITY", "IMAGING_DATE":"Q_MEASUREMENT_FINISH_DATE", "INSTRUMENT_USER":"Q_INSTRUMENT_USER"} existing_exps = search_service.listExperiments("/" + space + "/" + project) for exp in existing_exps: From 3e29e0abfe43f70a850d5711dfeccb3a65c82fb9 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 16 Mar 2021 12:39:14 +0100 Subject: [PATCH 8/9] add date mapping --- drop-boxes/register-omero-metadata/register-omero.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 7eec8ce0..2c0c18b0 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -11,6 +11,7 @@ #import sample_tracking_helper_qbic as thelper import checksum +import datetime import re import os import urllib @@ -55,6 +56,11 @@ # and delete the data! ##### +INCOMING_DATE_FORMAT = '%d.%m.%Y' +OPENBIS_DATE_FORMAT = '%Y-%m-%d' + +def mapDateString(date_string): + return datetime.datetime.strptime(date_string, INCOMING_DATE_FORMAT).strftime(OPENBIS_DATE_FORMAT) def createNewImagingExperiment(tr, space, project, properties, existing_ids): IMAGING_EXP_TYPE = "Q_BMI_GENERIC_IMAGING" @@ -76,6 +82,8 @@ def createNewImagingExperiment(tr, space, project, properties, existing_ids): if incoming_label in properties: key = experiment_property_map[incoming_label] value = properties[incoming_label] + if key == "Q_MEASUREMENT_FINISH_DATE": + value = mapDateString(value) img_exp.setPropertyValue(key, value) return img_exp From 63b3f00f08587e2218179ad6632198b755fea243 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 16 Mar 2021 12:52:06 +0100 Subject: [PATCH 9/9] remove test data --- drop-boxes/register-omero-metadata/register-omero.py | 1 - 1 file changed, 1 deletion(-) diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 2c0c18b0..6c3b7987 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -266,7 +266,6 @@ def process(transaction): # in OMERO. We pass the omero dataset id and trigger the image registration process in OMERO. omero_image_ids = registrationProcess.registerImageFileInOmero(imageFile, omero_dataset_id) print "Created OMERO image identifiers:\t" + str(omero_image_ids) - omero_image_ids = [420,42] omero_failed = len(omero_image_ids) < 1 if omero_failed: