Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion drop-boxes/register-omero-metadata/image_registration_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def __init__(self, transaction, env_name="omero_env_0", project_code="", sample_

self._transaction = transaction
self._incoming_file_name = transaction.getIncoming().getName()
self._search_service = transaction.getSearchService()

self._project_code = project_code
self._sample_code = sample_code
Expand Down Expand Up @@ -43,7 +44,17 @@ def fetchOpenBisSampleCode(self):
raise SampleCodeError(self._sample_code, "The sample code seems to be invalid, the checksum could not be confirmed.")

return self._project_code, self._sample_code


def searchOpenBisSample(self, sample_code):
#find specific sample
sc = SearchCriteria()
sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sample_code))
foundSamples = self._search_service.searchForSamples(sc)
if len(foundSamples) == 0:
raise SampleNotFoundError(sample_code, "Sample could not be found in openBIS.")
sample = foundSamples[0]
return sample

def _isValidSampleCode(self, sample_code):
try:
id = sample_code[0:9]
Expand Down Expand Up @@ -143,4 +154,14 @@ def __init__(self, sample_code, message):
def test(self):
pass

class SampleNotFoundError(Exception):

def __init__(self, sample_code, message):
self.sample_code = sample_code
self.message = message
super().__init__(self.message)

def test(self):
pass


141 changes: 91 additions & 50 deletions drop-boxes/register-omero-metadata/register-omero.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#import sample_tracking_helper_qbic as thelper

import checksum
import datetime
import re
import os
import urllib
Expand Down Expand Up @@ -55,13 +56,17 @@
# and delete the data!
#####

INCOMING_DATE_FORMAT = '%d.%m.%Y'
OPENBIS_DATE_FORMAT = '%Y-%m-%d'

def createNewImagingExperiment(tr, space, project, properties):
def mapDateString(date_string):
return datetime.datetime.strptime(date_string, INCOMING_DATE_FORMAT).strftime(OPENBIS_DATE_FORMAT)

def createNewImagingExperiment(tr, space, project, properties, existing_ids):
IMAGING_EXP_TYPE = "Q_BMI_GENERIC_IMAGING"
MODALITY_CODE = "Q_BMI_MODALITY"
search_service = tr.getSearchService()
experiment_property_map = {"IMAGING_MODALITY":"Q_BMI_MODALITY", "IMAGING_DATE":"Q_MEASUREMENT_FINISH_DATE", "INSTRUMENT_USER":"Q_INSTRUMENT_USER"}

existing_ids = []
existing_exps = search_service.listExperiments("/" + space + "/" + project)
for exp in existing_exps:
existing_ids.append(exp.getExperimentIdentifier())
Expand All @@ -71,52 +76,60 @@ def createNewImagingExperiment(tr, space, project, properties):
i += 1
exp_num = len(existing_exps) + i
exp_id = '/' + space + '/' + project + '/' + project + 'E' + str(exp_num)
exp = tr.createNewExperiment(exp_id, IMAGING_EXP_TYPE)
for key in properties.keys():
exp.setPropertyValue(key, properties[key])
return exp

def createNewImagingRun(tr, base_sample, exp, omero_link, run_offset):
img_exp = tr.createNewExperiment(exp_id, IMAGING_EXP_TYPE)
existing_ids.append(exp_id)
for incoming_label in experiment_property_map:
if incoming_label in properties:
key = experiment_property_map[incoming_label]
value = properties[incoming_label]
if key == "Q_MEASUREMENT_FINISH_DATE":
value = mapDateString(value)
img_exp.setPropertyValue(key, value)
return img_exp

def createNewImagingRun(tr, base_sample, exp, omero_image_ids, run_offset, properties):
IMG_RUN_PREFIX = "IMG"
IMG_RUN_TYPE = "Q_BMI_GENERIC_IMAGING_RUN"
IMG_RUN_OMERO_PROPERTY_CODE = "Q_ADDITIONAL_INFO"
# TODO: can we use a prefix for imaging samples?
# otherwise creating new samples will be more complex
# on the other hand, replicates need to be numbered if we use IMG, e.g IMG1QABCD001AB
# IMG2QABCD001AB etc.
# talk to GG and LK
IMG_RUN_OMERO_PROPERTY_CODE = "Q_OMERO_IDS"
sample_property_map = {}#no specific properties from the metadata file yet

run = 0
exists = True
new_sample_id = None
# respect samples already in openbis
while exists:
run += 1
new_sample_id = '/' + base_sample.getSpace() + '/' + IMG_RUN_PREFIX + str(run) + base_sample.getCode()
exists = tr.getSampleForUpdate(new_sample_id)
# add additional offset for samples registered in this call of the ETL script, but before this sample
new_sample_id_with_offset = '/' + base_sample.getSpace() + '/' + IMG_RUN_PREFIX + str(run+run_offset) + base_sample.getCode()
img_run = tr.createNewSample(new_sample_id_with_offset, IMG_RUN_TYPE)
img_run.setParentSampleIdentifiers([base_sample.getSampleIdentifier])
img_run.setParentSampleIdentifiers([base_sample.getSampleIdentifier()])
img_run.setExperiment(exp)
img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, omero_link)
img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, str(omero_image_ids))
for incoming_label in sample_property_map:
if incoming_label in properties:
key = sample_property_map[incoming_label]
value = properties[incoming_label]
img_run.setPropertyValue(key, value)
return img_run

#TODO Luis
def callOmeroWithFilePath(file_path, sample_barcode):
list_of_omero_ids = ["1","2","3"]
return list_of_omero_ids

def getFileFromLine(line):
return line.split("\t")[0]

def isSameExperimentMetadata(props1, props2):
"""dependent on metadata dictionaries of two different files (data model), decide if new openBIS experiment needs to be created
might be replaced by specific metadata properties, once we know more
"""
# initilization of tsv parser, always results in new experiment
if not props1 or not props2:
return False
else:
return True

relevantPropertyNames = ["IMAGING_MODALITY", "IMAGED_TISSUE", "INSTRUMENT_MANUFACTURER", "INSTRUMENT_USER", "IMAGING_DATE"]
for label in relevantPropertyNames:
if label in props1 and label in props2:
if props1[label] != props2[label]:
return False
if label in props1 and not label in props2:
return False
if label in props2 and not label in props1:
return False
return True

def registerImageInOpenBIS(transaction):
search_service = transaction.getSearchService()
Expand Down Expand Up @@ -179,6 +192,7 @@ def printPropertyMap(property_map):


def process(transaction):
print "start transaction"
"""The main entry point.

openBIS calls this method, when an incoming transaction is registered.
Expand All @@ -190,23 +204,53 @@ def process(transaction):
# Get the incoming path of the transaction
incomingPath = transaction.getIncoming().getAbsolutePath()

print incomingPath

# 1. Initialize the image registration process
registrationProcess = irp.ImageRegistrationProcess(transaction)

print "started reg process"

# 2. We want to get the openBIS sample code from the incoming data
# This tells us to which biological sample the image data was aquired from.
project_code, sample_code = registrationProcess.fetchOpenBisSampleCode()

print project_code
print sample_code

#find specific sample
tissueSample = registrationProcess.searchOpenBisSample(sample_code)
space = tissueSample.getSpace()

print tissueSample
print space
# 3. We now request the associated omero dataset id for the openBIS sample code.
# Each dataset in OMERO contains the associated openBIS biological sample id, which
# happened during the experimental design registration with the projectwizard.

print "calling omero"
omero_dataset_id = registrationProcess.requestOmeroDatasetId(project_code=project_code, sample_code=sample_code)

print omero_dataset_id

# Find and parse metadata file content
metadataFile = findMetaDataFile(incomingPath)

print metadataFile

property_names = getPropertyNames(metadataFile)


print "property names:"
print property_names

#keep track of number of images for openBIS ID
image_number = 0
#Initialize openBIS imaging experiment
imagingExperiment = None
previousProps = {}
existing_experiment_ids = []

print "start reading metadata file"
# Iterate over the metadata entries containing all pre-specified imaging metadata
for line in metadataFile[1:]: # (Exclude header)
# Get modality and other metadata from tsv here for one sample
Expand All @@ -223,6 +267,10 @@ def process(transaction):
omero_image_ids = registrationProcess.registerImageFileInOmero(imageFile, omero_dataset_id)
print "Created OMERO image identifiers:\t" + str(omero_image_ids)

omero_failed = len(omero_image_ids) < 1
if omero_failed:
raise ValueError("Omero did not return expected image ids.")

# 5. Additional metadata is provided in an own metadata TSV file.
# We extract the metadata from this file.
#registrationProcess.extractMetadataFromTSV()
Expand All @@ -242,27 +290,20 @@ def process(transaction):
# imaging experiment itself, such as modality, imaged tissue and more.
# We also want to connect this data with the previously created, corresponding OMERO image id t
# hat represents the result of this experiment in OMERO.
#registrationProcess.registerExperimentDataInOpenBIS(omero_image_ids)

# 7. Last but not least we create the open science file format for images which is
# OMERO-Tiff and store it in OMERO next to the proprierary vendor format.
#registrationProcess.triggerOMETiffConversion()

####################
#registrationProcess.registerExperimentDataInOpenBIS(omero_image_ids) # I did it myyy wayyyy

# TODO decide if new experiment is needed based on some pre-defined criteria.
# we decide if new experiment is needed based on some pre-defined criteria.
# Normally, the most important criterium is collision of experiment type properties
# between samples. E.g. two different imaging modalities need two experiments.

#fileBelongsToExistingExperiment = isSameExperimentMetadata(previousProps, properties)
#previousProps = properties
#if(not fileBelongsToExistingExperiment):
# exp = createNewImagingExperiment(transaction, space, project_code, properties)
#imagingSample = createNewImagingRun(transaction, sa, exp, list_of_omero_ids, offset)# maybe there are sample properties, too!
# register the actual data
#IMAGING_DATASET_CODE = Q_BMI_GENERIC_IMAGING_DATA # I guess
#dataset = transaction.createNewDataSet(IMAGING_DATASET_CODE)
#dataset.setSample(imagingSample)
#transaction.moveFile(imageFile, dataset)
# increment id offset for next sample in this loop - not sure anymore if this is needed

fileBelongsToExistingExperiment = isSameExperimentMetadata(previousProps, properties)
previousProps = properties
if(not fileBelongsToExistingExperiment):
imagingExperiment = createNewImagingExperiment(transaction, space, project_code, properties, existing_experiment_ids)
imagingSample = createNewImagingRun(transaction, tissueSample, imagingExperiment, omero_image_ids, image_number, properties)
# increment id offset for next sample in this loop
image_number += 1

# 7. Last but not least we create the open science file format for images which is
# OMERO-Tiff and store it in OMERO next to the proprierary vendor format.
#registrationProcess.triggerOMETiffConversion()