From f70d27f7eaa4c7c74582512f0f170162fc2ae4eb Mon Sep 17 00:00:00 2001 From: Sven F Date: Fri, 22 Jan 2021 16:00:28 +0100 Subject: [PATCH 01/13] Sync development with master (#61) --- CHANGELOG.md | 16 +++++++++-- README.md | 2 +- .../register-IdXML-dropbox.py | 15 +++++++++- .../register-archived-ms-raw/register-raw.py | 15 +++++++++- .../register-archived-ms-raw/register-raw.pyc | Bin 3074 -> 0 bytes .../register-bam-dropbox/register-bam.py | 15 +++++++++- .../etl_msconvert.py | 15 ++++++++-- .../register-fasta-dropbox/register-fasta.py | 15 +++++++++- .../register-fastq-with-metadata-dropbox.py | 15 +++++++++- .../register-fastq-dropbox.py | 16 ++++++++++- .../register-fXML-dropbox.py | 15 +++++++++- .../register-hlatyping.py | 15 +++++++++- .../register-imgag-dropbox/register-imgag.py | 27 +++++++++++++----- .../register-immunmonitoring.py | 15 +++++++++- .../register-cel-dropbox.py | 15 +++++++++- .../register-mtb-data-dropbox.py | 21 ++++++++++---- .../register-mzml-dropbox.py | 15 +++++++++- .../register-nanopore.py | 20 +++++++++++-- .../register-nmr-dropbox/register-nmr.py | 15 +++++++++- .../register-peptidedata.py | 15 +++++++++- .../register-qcml-dropbox.py | 15 +++++++++- .../register-qpcr-dropbox.py | 16 ++++++++++- .../register-vcf-dropbox/register-vcf.py | 16 ++++++++++- .../register-16staxonomic-dropbox.py | 10 ++----- .../register-epitopeprediction.py | 14 ++++----- .../register-hlatyping.py | 11 ++----- ...register-individualizedproteome-dropbox.py | 14 +++------ .../register-ligandomicsid-dropbox.py | 11 ++----- .../register-ligandomicsqc-dropbox.py | 10 ++----- .../register-mapping-dropbox.py | 12 ++------ .../register-wf-maqc/register-wf-maqc.py | 12 ++------ .../register-maxquant-dropbox.py | 12 ++------ .../register-mergengsdata.py | 11 ++----- .../register-wf-msqc/register-wf-msqc.py | 12 ++------ .../register-wf-ngsqc/register-wf-ngsqc.py | 10 ++----- drop-boxes/register-wf-peakpicking/script.py | 12 +++----- .../register-wf-peptideid.py | 14 ++------- .../register-wf-qedda/register-qedda.py | 9 ++---- .../register-wf-rnaexpranalysis.py | 11 ++----- .../register-shrna-dropbox.py | 12 ++------ .../register-variantannotation.py | 11 ++----- .../register-wf-variantcalling.py | 13 +++------ .../register-wiff-data-dropbox.py | 15 +++++++++- .../checksum-maintenance/plugin.properties | 2 +- .../update-experiment-metadata/update.py | 25 +--------------- 45 files changed, 382 insertions(+), 225 deletions(-) delete mode 100644 drop-boxes/register-archived-ms-raw/register-raw.pyc diff --git a/CHANGELOG.md b/CHANGELOG.md index ecfb7338..ea336184 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,20 @@ # Changelog -## Currently in development -* New maintenance task: update missing checksum one, after dss start. +## 1.6.0 2021-01-22 + +* Fix for workflow result registration: fetch sample by identifier instead of search for robustness against indexing problems +* Retry sample tracking updates twice and log failures that occur + +## 1.5.0 2020-11-03 + +* New maintenance task: update missing checksum once, after dss starts. +* Fix for nanopore registration: rename folders for pooling case +* Fix for experiment update: force identifier into a string to support v3 API objects + +## 1.4.1 2020-11-03 + +* Imgag dropbox: raise an exception, if files of unknown type are part of the transaction ## 1.4.0 diff --git a/README.md b/README.md index 83afc705..a34ef4d3 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![GitHub release (latest SemVer)](https://img.shields.io/github/v/release/qbicsoftware/omero-portlet) +![GitHub release (latest SemVer)](https://img.shields.io/github/v/release/qbicsoftware/etl-scripts) ![Python Language](https://img.shields.io/badge/language-python-blue.svg) ![License](https://img.shields.io/github/license/qbicsoftware/etl-scripts) [![DOI](https://zenodo.org/badge/45912621.svg)](https://zenodo.org/badge/latestdoi/45912621) diff --git a/drop-boxes/register-IdXML-dropbox/register-IdXML-dropbox.py b/drop-boxes/register-IdXML-dropbox/register-IdXML-dropbox.py index 8fae5fe3..40f8449f 100644 --- a/drop-boxes/register-IdXML-dropbox/register-IdXML-dropbox.py +++ b/drop-boxes/register-IdXML-dropbox/register-IdXML-dropbox.py @@ -7,6 +7,7 @@ sys.path.append('/home-link/qeana10/bin/') import checksum +import time import re import os import ch.systemsx.cisd.etlserver.registrator.api.v2 @@ -103,4 +104,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + break + except: + print "Updating location for sample "+identifier+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-archived-ms-raw/register-raw.py b/drop-boxes/register-archived-ms-raw/register-raw.py index 9e89bf74..52a7f480 100644 --- a/drop-boxes/register-archived-ms-raw/register-raw.py +++ b/drop-boxes/register-archived-ms-raw/register-raw.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -104,4 +105,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) \ No newline at end of file + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-archived-ms-raw/register-raw.pyc b/drop-boxes/register-archived-ms-raw/register-raw.pyc deleted file mode 100644 index c3375c219b936d41ff02951ec9ab2bb1586c7941..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3074 zcmbVO-E$h(6+a6CAtVGswu$SsA#R(|Lj;MNnKYhEry2>>o($Mtkey(AnAPqzELwK) z-itnxr7y`JkhlCP`IqvZ_qM-t7Lbzkr4`z<_k7%Q&-dNz&!ysDzkJ?zY4MlA`|o)4 zj{qJ~AJ8H%LxDwoi>>MVwWxL z?rpLI&jE_XPGr=>;xzU{Q`g2ZYNA4;>%*vSqKEZ=MpG4@`nsuAe6C{M^c>@86UAzf zQkp?DY+_$*X43)I4RdLV{=f0p(KQ^<97PkgANb)}^R041=i`r>r+(Pnv%oWJ0LOly zQfy;^oftpRDQO#|u5w~`bQt?a#lDl$cUJW7>~zV}?|}O3)x|fT%wK-A|Jmzr{xW~* z?0elHcck1iJ)4*ukYgv%D&-Y{`W_z^xcf1>we=c2h{0BKCGj40Es%c*jW@;J(xNn~ZMQsy3@ILp!C0E@HV<3utiWY1Yn+}1a(Y=5X6r-7@&lyo^)4UcCC?Fr*VOjH4dAX zrMvSSnGH&EjK|;e!ds*IJTI#pR_hd&$P>KgtyAqEkJyx=s4I}!lm#?w(0Gf{af!y; zvM6(Tvu0R;_DTvg-eEbmw@67*&L3Ar!R89z6?{t^zC{W2LmU&BPxNkcoM6KuYwzCl&)bBLh`qNZLh$Aejek!GLM`DNN*7p6{R6>z z@e!KeBd^5Byhcfl#&t69r#v9{$wNFLn?Y)T|B;n0H~xtd6d2+KrgTvS)!9LIiR@O- z{%UaeINcH$DoJ8`m>kZ z_?QK+JPZO%g$MI`6xUY`%7;H2w7Y|L>!5pLx7w{v4-JlqC9owu+U`nMi=2JZF{R}u z`{2dk=%9CCZr!9jZ*`8I*n{3zPg~NYEY*9|vfEGg{tP*D2Vkg-EsY*K#*f0oC^YKQ z7&Zs2rZxN!CY#b^mUg00p%+D{<4n@^ zA|OJ4J)u^Jt^5zxZIsOt}@k|LfCvV#dV}4 zECHg%6EXJX{xW%pI#XO;p5Xxr71!Jw>iHu11+RBCRT9$xIpEl86N_=FvmJGDtw#=C z(NojWjM#%jIx!Ud+8r$w1ojOjocWcKBV4atWkfT?+RD+hSb0pgxg8e4Gf!h&%(1z8 z?gTT1Su1BQEZ~q$qH`rTE8hZ=JQF*ic3iGGDFt^?cv}$g7X`K|s55mX`3g})HiuHS zOyg*bD@RIuD*hcto-$l5X+Gtf097Qtqg}(g-$b*}OD)sV0O`(OM@BA8Em`Ig)Q+5} zE~k|IgLTDi9`DY5g9Sy2pB~YPOmwTU*w~S~atacM~ z8*!SHNSl3-+)+bAR!#I}!}Yb-PzIkW{Ds#@KQ=I&qPXEq{l@tN+3$|zw6SyUH2lbh z4l=g^qId`~2V=E?AvK9Y&20I)d+^^5T6|A7hAKqz+=d>Zki7Jyj~kweWCGdTd2%3> zPVt3k-vvR&mi+}**Xfbf|MvOB{qgxE@@9eh8~2pvmdztA6{IBnaZ2#Zl`|WK{{Yj* B(J%l2 diff --git a/drop-boxes/register-bam-dropbox/register-bam.py b/drop-boxes/register-bam-dropbox/register-bam.py index 186c5004..20a21ad5 100644 --- a/drop-boxes/register-bam-dropbox/register-bam.py +++ b/drop-boxes/register-bam-dropbox/register-bam.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -136,4 +137,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-convert-ms-vendor-format/etl_msconvert.py b/drop-boxes/register-convert-ms-vendor-format/etl_msconvert.py index dcb2d9f0..13dd049f 100644 --- a/drop-boxes/register-convert-ms-vendor-format/etl_msconvert.py +++ b/drop-boxes/register-convert-ms-vendor-format/etl_msconvert.py @@ -484,8 +484,19 @@ def __str__(self): return self.value def handleSampleTracking(barcode): - #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(barcode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(barcode) + break + except: + print "Updating location for sample "+barcode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise def createRawDataSet(transaction, incomingPath, sample, format, time_stamp): rawDataSet = transaction.createNewDataSet("Q_MS_RAW_DATA") diff --git a/drop-boxes/register-fasta-dropbox/register-fasta.py b/drop-boxes/register-fasta-dropbox/register-fasta.py index aaab99a4..2fc5e52a 100644 --- a/drop-boxes/register-fasta-dropbox/register-fasta.py +++ b/drop-boxes/register-fasta-dropbox/register-fasta.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -152,4 +153,16 @@ def process(transaction): transaction.moveFile(new_folder, dataSet) # Updates the sample location of the extract sample - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + break + except: + print "Updating location for sample "+identifier+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-fastq-dropbox-with-metadata/register-fastq-with-metadata-dropbox.py b/drop-boxes/register-fastq-dropbox-with-metadata/register-fastq-with-metadata-dropbox.py index 6fae6beb..885f1c91 100644 --- a/drop-boxes/register-fastq-dropbox-with-metadata/register-fastq-with-metadata-dropbox.py +++ b/drop-boxes/register-fastq-dropbox-with-metadata/register-fastq-with-metadata-dropbox.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -167,4 +168,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + break + except: + print "Updating location for sample "+identifier+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-fastq-dropbox/register-fastq-dropbox.py b/drop-boxes/register-fastq-dropbox/register-fastq-dropbox.py index 34d5601f..d89ffec2 100644 --- a/drop-boxes/register-fastq-dropbox/register-fastq-dropbox.py +++ b/drop-boxes/register-fastq-dropbox/register-fastq-dropbox.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -128,4 +129,17 @@ def process(transaction): nameFile.close() transaction.moveFile(incomingPath, dataSet) - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + # sample tracking + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + break + except: + print "Updating location for sample "+identifier+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-featureXML-dropbox/register-fXML-dropbox.py b/drop-boxes/register-featureXML-dropbox/register-fXML-dropbox.py index 6c71ba7e..46425e0f 100644 --- a/drop-boxes/register-featureXML-dropbox/register-fXML-dropbox.py +++ b/drop-boxes/register-featureXML-dropbox/register-fXML-dropbox.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -103,4 +104,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + break + except: + print "Updating location for sample "+identifier+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-hlatyping-dropbox/register-hlatyping.py b/drop-boxes/register-hlatyping-dropbox/register-hlatyping.py index b6fa1ad6..1f213fb6 100644 --- a/drop-boxes/register-hlatyping-dropbox/register-hlatyping.py +++ b/drop-boxes/register-hlatyping-dropbox/register-hlatyping.py @@ -7,6 +7,7 @@ sys.path.append('/home-link/qeana10/bin/') import checksum +import time import re import os import ch.systemsx.cisd.etlserver.registrator.api.v2 @@ -149,4 +150,16 @@ def process(transaction): transaction.moveFile(resultPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-imgag-dropbox/register-imgag.py b/drop-boxes/register-imgag-dropbox/register-imgag.py index a97ee8e8..6aa33d3f 100644 --- a/drop-boxes/register-imgag-dropbox/register-imgag.py +++ b/drop-boxes/register-imgag-dropbox/register-imgag.py @@ -596,13 +596,14 @@ def process(transaction): print rawFile if rawFile.endswith("vcf") or rawFile.endswith("vcf.gz"): vcfs.append(rawFile) - if rawFile.endswith("fastq") or rawFile.endswith("fastq.gz"): + elif rawFile.endswith("fastq") or rawFile.endswith("fastq.gz"): fastqs.append(rawFile) - if rawFile.endswith("GSvar") or rawFile.endswith("GSvar.gz"): + elif rawFile.endswith("GSvar") or rawFile.endswith("GSvar.gz"): gsvars.append(rawFile) - if rawFile.endswith("tsv") or rawFile.endswith("tsv.gz"): + elif rawFile.endswith("tsv") or rawFile.endswith("tsv.gz"): tsvs.append(rawFile) - + else: + raise Exception(rawFile + " is of an unsupported format") #if rawFiles[0].endswith("vcf") or rawFiles[0].endswith("vcf.gz"): # datasetSample = find_and_register_vcf(transaction, jsonContent) @@ -663,7 +664,19 @@ def process(transaction): transaction.moveFile(vcfFolder, vcfDataSet) else: - find_and_register_ngs_without_metadata(transaction, parentCodes) + find_and_register_ngs_without_metadata(transaction, parentCodes) for code in parentCodes: - #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(code) + #sample tracking section + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(code) + break + except: + print "Updating location for sample "+code+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-immunmonitoring-dropbox/register-immunmonitoring.py b/drop-boxes/register-immunmonitoring-dropbox/register-immunmonitoring.py index f4242682..c5757c59 100644 --- a/drop-boxes/register-immunmonitoring-dropbox/register-immunmonitoring.py +++ b/drop-boxes/register-immunmonitoring-dropbox/register-immunmonitoring.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -103,4 +104,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-microarrays-dropbox/register-cel-dropbox.py b/drop-boxes/register-microarrays-dropbox/register-cel-dropbox.py index 0c3260db..1da34e40 100644 --- a/drop-boxes/register-microarrays-dropbox/register-cel-dropbox.py +++ b/drop-boxes/register-microarrays-dropbox/register-cel-dropbox.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -192,4 +193,16 @@ def process(transaction): os.remove(os.path.realpath(os.path.join(incomingPath,f))) #sample tracking section for code in trackingCodes: - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(code) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(code) + break + except: + print "Updating location for sample "+code+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-mtb-data-dropbox/register-mtb-data-dropbox.py b/drop-boxes/register-mtb-data-dropbox/register-mtb-data-dropbox.py index 43372890..08092b6c 100644 --- a/drop-boxes/register-mtb-data-dropbox/register-mtb-data-dropbox.py +++ b/drop-boxes/register-mtb-data-dropbox/register-mtb-data-dropbox.py @@ -164,11 +164,22 @@ ############################################################################# def update_sample_location_to_qbic(sampleId): - """Calls the sample status service and updates the - location to QBiC and the status 'DATA AT QBiC'. - """ - # Update sample location - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(sampleId) + """Calls the sample status service and updates the + location to QBiC and the status 'DATA AT QBiC'. + """ + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(sampleId) + break + except: + print("Updating location for sample " + sampleId + " failed on attempt "+str(attempt+1)) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise def process(transaction): diff --git a/drop-boxes/register-mzML-dropbox/register-mzml-dropbox.py b/drop-boxes/register-mzML-dropbox/register-mzml-dropbox.py index cd4511ce..a9ae0f71 100644 --- a/drop-boxes/register-mzML-dropbox/register-mzml-dropbox.py +++ b/drop-boxes/register-mzML-dropbox/register-mzml-dropbox.py @@ -9,6 +9,7 @@ import checksum import re import os +import time import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File from org.apache.commons.io import FileUtils @@ -107,4 +108,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) \ No newline at end of file + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise \ No newline at end of file diff --git a/drop-boxes/register-nanopore-dropbox/register-nanopore.py b/drop-boxes/register-nanopore-dropbox/register-nanopore.py index bc6b2a0c..059c4fdd 100644 --- a/drop-boxes/register-nanopore-dropbox/register-nanopore.py +++ b/drop-boxes/register-nanopore-dropbox/register-nanopore.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import shutil from datetime import datetime @@ -226,12 +227,15 @@ def registerUnclassifiedData(transaction, unclassifiedDataMap, runExperiment, cu # moves a subset of nanopore data to a new target path, needed to add fastq and fast5 subfolders to the same dataset def prepareDataFolder(incomingPath, currentPath, destinationPath, dataObject, suffix): name = dataObject.getName() + # if pooled data, folder is named using barcode and needs to be adapted + if not "_" in name: + name = name + "_" + suffix relativePath = dataObject.getRelativePath() # the source path of the currently handled data object (e.g. fast5_fail folder) sourcePath = os.path.join(os.path.dirname(currentPath), relativePath) checksumFile = createChecksumFileForFolder(incomingPath, sourcePath) # destination path containing data type (fastq or fast5), as well as the parent sample code, so pooled samples can be handled - destination = os.path.join(destinationPath, name + "_" + suffix) + destination = os.path.join(destinationPath, name) os.rename(sourcePath, destination) def createSampleWithData(transaction, space, parentSampleCode, mapWithDataForSample, openbisExperiment, currentPath, absLogPath): @@ -288,7 +292,19 @@ def createSampleWithData(transaction, space, parentSampleCode, mapWithDataForSam transaction.moveFile(absLogPath, logDataSet) # Updates the sample location of the measured sample - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentSampleCode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentSampleCode) + break + except: + print "Updating location for sample "+parentSampleCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise def process(transaction): """Main ETL routine entry point""" diff --git a/drop-boxes/register-nmr-dropbox/register-nmr.py b/drop-boxes/register-nmr-dropbox/register-nmr.py index 262ed122..48192c59 100644 --- a/drop-boxes/register-nmr-dropbox/register-nmr.py +++ b/drop-boxes/register-nmr-dropbox/register-nmr.py @@ -7,6 +7,7 @@ sys.path.append('/home-link/qeana10/bin/') import checksum +import time import re import string import os @@ -145,4 +146,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-peptidedata-dropbox/register-peptidedata.py b/drop-boxes/register-peptidedata-dropbox/register-peptidedata.py index ed881290..5c0867c5 100644 --- a/drop-boxes/register-peptidedata-dropbox/register-peptidedata.py +++ b/drop-boxes/register-peptidedata-dropbox/register-peptidedata.py @@ -9,6 +9,7 @@ sys.path.append('/home-link/qeana10/bin/') import checksum +import time import re import os import ch.systemsx.cisd.etlserver.registrator.api.v2 @@ -86,4 +87,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-qcml-dropbox/register-qcml-dropbox.py b/drop-boxes/register-qcml-dropbox/register-qcml-dropbox.py index 7f55f406..f784f479 100644 --- a/drop-boxes/register-qcml-dropbox/register-qcml-dropbox.py +++ b/drop-boxes/register-qcml-dropbox/register-qcml-dropbox.py @@ -7,6 +7,7 @@ sys.path.append('/home-link/qeana10/bin/') import checksum +import time import re import os import ch.systemsx.cisd.etlserver.registrator.api.v2 @@ -104,4 +105,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-qpcr-dropbox/register-qpcr-dropbox.py b/drop-boxes/register-qpcr-dropbox/register-qpcr-dropbox.py index daea723e..c72e9971 100644 --- a/drop-boxes/register-qpcr-dropbox/register-qpcr-dropbox.py +++ b/drop-boxes/register-qpcr-dropbox/register-qpcr-dropbox.py @@ -7,6 +7,7 @@ sys.path.append('/home-link/qeana10/bin/') import checksum +import time import re import os import ch.systemsx.cisd.etlserver.registrator.api.v2 @@ -103,4 +104,17 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) \ No newline at end of file + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise + diff --git a/drop-boxes/register-vcf-dropbox/register-vcf.py b/drop-boxes/register-vcf-dropbox/register-vcf.py index d3c3f21e..5f8aeed3 100644 --- a/drop-boxes/register-vcf-dropbox/register-vcf.py +++ b/drop-boxes/register-vcf-dropbox/register-vcf.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -163,4 +164,17 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise + diff --git a/drop-boxes/register-wf-16staxonomicprofiling/register-16staxonomic-dropbox.py b/drop-boxes/register-wf-16staxonomicprofiling/register-16staxonomic-dropbox.py index 182a5d7b..ac7ef035 100644 --- a/drop-boxes/register-wf-16staxonomicprofiling/register-16staxonomic-dropbox.py +++ b/drop-boxes/register-wf-16staxonomicprofiling/register-16staxonomic-dropbox.py @@ -56,18 +56,12 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) + sample = transaction.getSampleForUpdate(sample_id) experiment = transaction.getExperimentForUpdate("/"+space+"/"+project+"/"+experiment_id) experiment.setPropertyValue("Q_WF_STATUS", "FINISHED") diff --git a/drop-boxes/register-wf-epitopeprediction/register-epitopeprediction.py b/drop-boxes/register-wf-epitopeprediction/register-epitopeprediction.py index d8404ba4..8e28dfea 100644 --- a/drop-boxes/register-wf-epitopeprediction/register-epitopeprediction.py +++ b/drop-boxes/register-wf-epitopeprediction/register-epitopeprediction.py @@ -34,18 +34,14 @@ def process(transaction): project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode + if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + sample = transaction.getSampleForUpdate(sample_id) + + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-hlatyping/register-hlatyping.py b/drop-boxes/register-wf-hlatyping/register-hlatyping.py index 3d1b0036..c2e5c1aa 100644 --- a/drop-boxes/register-wf-hlatyping/register-hlatyping.py +++ b/drop-boxes/register-wf-hlatyping/register-hlatyping.py @@ -57,18 +57,13 @@ def process(transaction): project = pPattern.findall(name)[0] experiment_id = ePattern.findall(name)[0] sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-individualizedproteome/register-individualizedproteome-dropbox.py b/drop-boxes/register-wf-individualizedproteome/register-individualizedproteome-dropbox.py index 8d61a2f0..f7fbf1dd 100644 --- a/drop-boxes/register-wf-individualizedproteome/register-individualizedproteome-dropbox.py +++ b/drop-boxes/register-wf-individualizedproteome/register-individualizedproteome-dropbox.py @@ -41,20 +41,14 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode + if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + sample = transaction.getSampleForUpdate(sample_id) + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-ligandomicsid/register-ligandomicsid-dropbox.py b/drop-boxes/register-wf-ligandomicsid/register-ligandomicsid-dropbox.py index 98dc78b1..d854ce65 100644 --- a/drop-boxes/register-wf-ligandomicsid/register-ligandomicsid-dropbox.py +++ b/drop-boxes/register-wf-ligandomicsid/register-ligandomicsid-dropbox.py @@ -41,19 +41,12 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/" + space + "/" + sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - + sample = transaction.getSampleForUpdate(sample_id) experiment = transaction.getExperimentForUpdate("/"+space+"/"+project+"/"+experiment_id) experiment.setPropertyValue("Q_WF_STATUS", "FINISHED") diff --git a/drop-boxes/register-wf-ligandomicsqc/register-ligandomicsqc-dropbox.py b/drop-boxes/register-wf-ligandomicsqc/register-ligandomicsqc-dropbox.py index 016b9b08..9ffc8937 100644 --- a/drop-boxes/register-wf-ligandomicsqc/register-ligandomicsqc-dropbox.py +++ b/drop-boxes/register-wf-ligandomicsqc/register-ligandomicsqc-dropbox.py @@ -41,18 +41,12 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) + sample = transaction.getSampleForUpdate(sample_id) experiment = transaction.getExperimentForUpdate("/"+space+"/"+project+"/"+experiment_id) experiment.setPropertyValue("Q_WF_STATUS", "FINISHED") diff --git a/drop-boxes/register-wf-mapping/register-mapping-dropbox.py b/drop-boxes/register-wf-mapping/register-mapping-dropbox.py index dd87826f..4ca24383 100644 --- a/drop-boxes/register-wf-mapping/register-mapping-dropbox.py +++ b/drop-boxes/register-wf-mapping/register-mapping-dropbox.py @@ -41,22 +41,16 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) + sample = transaction.getSampleForUpdate(sample_id) additionalInfo = sample.getPropertyValue("Q_ADDITIONAL_INFO") - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-maqc/register-wf-maqc.py b/drop-boxes/register-wf-maqc/register-wf-maqc.py index b6a97400..efe32e30 100644 --- a/drop-boxes/register-wf-maqc/register-wf-maqc.py +++ b/drop-boxes/register-wf-maqc/register-wf-maqc.py @@ -41,20 +41,14 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() if len(parents) > 6: first = parents[0].split("/")[-1] parentInfos = first+"_and_"+str(len(parents)-1)+"others" diff --git a/drop-boxes/register-wf-maxquant/register-maxquant-dropbox.py b/drop-boxes/register-wf-maxquant/register-maxquant-dropbox.py index 8c1eff51..9317bde9 100644 --- a/drop-boxes/register-wf-maxquant/register-maxquant-dropbox.py +++ b/drop-boxes/register-wf-maxquant/register-maxquant-dropbox.py @@ -41,20 +41,14 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-mergenngsdata/register-mergengsdata.py b/drop-boxes/register-wf-mergenngsdata/register-mergengsdata.py index 39f10e78..394a44b7 100644 --- a/drop-boxes/register-wf-mergenngsdata/register-mergengsdata.py +++ b/drop-boxes/register-wf-mergenngsdata/register-mergengsdata.py @@ -34,18 +34,13 @@ def process(transaction): project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-msqc/register-wf-msqc.py b/drop-boxes/register-wf-msqc/register-wf-msqc.py index 76ffa18a..ed0e9055 100644 --- a/drop-boxes/register-wf-msqc/register-wf-msqc.py +++ b/drop-boxes/register-wf-msqc/register-wf-msqc.py @@ -41,20 +41,14 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-ngsqc/register-wf-ngsqc.py b/drop-boxes/register-wf-ngsqc/register-wf-ngsqc.py index 6957113a..1fb9643e 100644 --- a/drop-boxes/register-wf-ngsqc/register-wf-ngsqc.py +++ b/drop-boxes/register-wf-ngsqc/register-wf-ngsqc.py @@ -41,18 +41,12 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) + sample = transaction.getSampleForUpdate(sample_id) experiment = transaction.getExperimentForUpdate("/"+space+"/"+project+"/"+experiment_id) diff --git a/drop-boxes/register-wf-peakpicking/script.py b/drop-boxes/register-wf-peakpicking/script.py index 03e555ed..ec234c1f 100644 --- a/drop-boxes/register-wf-peakpicking/script.py +++ b/drop-boxes/register-wf-peakpicking/script.py @@ -54,16 +54,12 @@ def process(transaction): #Register logs wfSampleCode = nameSplit[-1] + + sample_id = "/"+space+"/"+wfSampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, wfSampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - wfSample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) + wfSample = transaction.getSampleForUpdate(sample_id) experiment = transaction.getExperimentForUpdate("/"+space+"/"+project+"/"+experiment_id) @@ -95,4 +91,4 @@ def process(transaction): sa = transaction.getSampleForUpdate(sampleID) dataSetRes = transaction.createNewDataSet('Q_MS_MZML_DATA') dataSetRes.setSample(sa) - transaction.moveFile(mzmlPath, dataSetRes) \ No newline at end of file + transaction.moveFile(mzmlPath, dataSetRes) diff --git a/drop-boxes/register-wf-peptideid/register-wf-peptideid.py b/drop-boxes/register-wf-peptideid/register-wf-peptideid.py index 0bcc3675..60f932bd 100644 --- a/drop-boxes/register-wf-peptideid/register-wf-peptideid.py +++ b/drop-boxes/register-wf-peptideid/register-wf-peptideid.py @@ -41,22 +41,14 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - #sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.TYPE, "Q_WF_MS_PEPTIDEID_RUN")) - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - #sample = transaction.createNewSample("/"+space+"/"+sample_id + str(len(foundSamples)+1), "Q_WF_MS_PEPTIDEID_RUN") - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-qedda/register-qedda.py b/drop-boxes/register-wf-qedda/register-qedda.py index d5eee34f..5ba864ac 100644 --- a/drop-boxes/register-wf-qedda/register-qedda.py +++ b/drop-boxes/register-wf-qedda/register-qedda.py @@ -34,16 +34,11 @@ def process(transaction): project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - sample = foundSamples[0] - sample = transaction.getSampleForUpdate(sample.getSampleIdentifier()) + sample = transaction.getSampleForUpdate(sample_id) experiment = transaction.getExperimentForUpdate("/"+space+"/"+project+"/"+experiment_id) diff --git a/drop-boxes/register-wf-rnaexpressionanalysis/register-wf-rnaexpranalysis.py b/drop-boxes/register-wf-rnaexpressionanalysis/register-wf-rnaexpranalysis.py index ee4bb2b9..2551720a 100644 --- a/drop-boxes/register-wf-rnaexpressionanalysis/register-wf-rnaexpranalysis.py +++ b/drop-boxes/register-wf-rnaexpressionanalysis/register-wf-rnaexpranalysis.py @@ -42,18 +42,13 @@ def process(transaction): project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-shrna/register-shrna-dropbox.py b/drop-boxes/register-wf-shrna/register-shrna-dropbox.py index a8d499ea..fde5f461 100644 --- a/drop-boxes/register-wf-shrna/register-shrna-dropbox.py +++ b/drop-boxes/register-wf-shrna/register-shrna-dropbox.py @@ -41,20 +41,14 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-variantannotation/register-variantannotation.py b/drop-boxes/register-wf-variantannotation/register-variantannotation.py index 55d13d0c..17d8188a 100644 --- a/drop-boxes/register-wf-variantannotation/register-variantannotation.py +++ b/drop-boxes/register-wf-variantannotation/register-variantannotation.py @@ -34,18 +34,13 @@ def process(transaction): project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() #parentcodes = [] #for parent in parents: # parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-variantcalling/register-wf-variantcalling.py b/drop-boxes/register-wf-variantcalling/register-wf-variantcalling.py index 2481ff8c..557da3e0 100644 --- a/drop-boxes/register-wf-variantcalling/register-wf-variantcalling.py +++ b/drop-boxes/register-wf-variantcalling/register-wf-variantcalling.py @@ -42,18 +42,13 @@ def process(transaction): project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) @@ -89,4 +84,4 @@ def process(transaction): #if os.path.isdir(incomingPath+"/result"): # transaction.moveFile(incomingPath+"/result", dataSetRes) #else: - # transaction.moveFile(incomingPath, dataSetRes) \ No newline at end of file + # transaction.moveFile(incomingPath, dataSetRes) diff --git a/drop-boxes/register-wiff-dropbox/register-wiff-data-dropbox.py b/drop-boxes/register-wiff-dropbox/register-wiff-data-dropbox.py index 1a1ff5b0..57830bb5 100644 --- a/drop-boxes/register-wiff-dropbox/register-wiff-data-dropbox.py +++ b/drop-boxes/register-wiff-dropbox/register-wiff-data-dropbox.py @@ -1,6 +1,7 @@ from __future__ import print_function import os +import time import re import sys @@ -107,7 +108,19 @@ def register_wiff_pairs(transaction, wiff_pairs, qbic_id): transaction.moveFile(registration_dir, data_set) # Update sample location - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(qbic_id) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(qbic_id) + break + except: + print("Updating location for sample "+qbic_id+" failed on attempt "+str(attempt+1)) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise def space_and_project(transaction, qbiccode): diff --git a/maintenance-tasks/checksum-maintenance/plugin.properties b/maintenance-tasks/checksum-maintenance/plugin.properties index 69e8ac15..25de8e99 100644 --- a/maintenance-tasks/checksum-maintenance/plugin.properties +++ b/maintenance-tasks/checksum-maintenance/plugin.properties @@ -1,3 +1,3 @@ # Updates missing checksums class = ch.systemsx.cisd.etlserver.path.PathInfoDatabaseChecksumCalculationTask -execute-only-once = true \ No newline at end of file +execute-only-once = true diff --git a/reporting-plugins/update-experiment-metadata/update.py b/reporting-plugins/update-experiment-metadata/update.py index 5c826561..d39ba2a3 100644 --- a/reporting-plugins/update-experiment-metadata/update.py +++ b/reporting-plugins/update-experiment-metadata/update.py @@ -1,5 +1,3 @@ -import smtplib -from email.mime.text import MIMEText def process(tr, parameters, tableBuilder): """Change properties of experiment @@ -9,30 +7,9 @@ def process(tr, parameters, tableBuilder): if not user == None: tr.setUserId(user) expId = parameters.get("identifier") - exp = tr.getExperimentForUpdate(expId) + exp = tr.getExperimentForUpdate(str(expId)) properties = parameters.get("properties") for prop in properties.keySet(): exp.setPropertyValue(prop, properties.get(prop)) - - #server = "smtpserv.uni-tuebingen.de" - #fromA = "notification_service@qbis.qbic.uni-tuebingen.de" - - # TODO get emails of space users - # Get it via liferay and pass it to this service ? - #toA = "mohr@informatik.uni-tuebingen.de" - #subject = "Update information for Experiment %s" % expId - #text = "Status of Experiment %s has been updated" % expId #, properties.get("Q_CURRENT_STATUS")) - - #msg = MIMEText(text) - #msg['From'] = fromA - #msg['To'] = toA - #msg['Subject'] = subject - # check for info@qbic.uni-tuebingen.de - #msg['reply-to'] = "mohr@informatik.uni-tuebingen.de" - - #smtpServer = smtplib.SMTP(server) - #smtpServer.sendmail(fromA, toA, msg.as_string()) - #smtpServer.close() - From 52470c99b43bce24740e210ef8dc14ad25954a8d Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Fri, 26 Feb 2021 14:55:02 +0100 Subject: [PATCH 02/13] adapt attachment script to actual running version (#67) --- .../register-attachment-dropbox.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drop-boxes/register-attachments-dropbox/register-attachment-dropbox.py b/drop-boxes/register-attachments-dropbox/register-attachment-dropbox.py index cd82f564..6660cd16 100755 --- a/drop-boxes/register-attachments-dropbox/register-attachment-dropbox.py +++ b/drop-boxes/register-attachments-dropbox/register-attachment-dropbox.py @@ -84,10 +84,10 @@ def process(transaction): sa = transaction.getSampleForUpdate(sampleID) space = sa.getSpace() if not attachmentReady: - expID = '/' + space + '/' + project + '/'+ project+'_INFO' - exp = transaction.getExperimentForUpdate(expID) - if not exp: - exp = transaction.createNewExperiment(expID, "Q_PROJECT_DETAILS") + infoSampleID = "/"+space+"/"+code + sa = transaction.getSampleForUpdate(infoSampleID) + if not sa: + exp = transaction.createNewExperiment('/' + space + '/' + project + '/'+ project+'_INFO', "Q_PROJECT_DETAILS") sa = transaction.createNewSample('/' + space + '/'+ code, "Q_ATTACHMENT_SAMPLE") sa.setExperiment(exp) info = None From e53ac7c27689d0d982d9c8e4eab63c98714cd71c Mon Sep 17 00:00:00 2001 From: luiskuhn <38211686+luiskuhn@users.noreply.github.com> Date: Mon, 1 Mar 2021 15:52:34 +0100 Subject: [PATCH 03/13] Imaging data registration: provide key-value metadata ingestion to omero (#71) The ETL script and related modules can now properly register metadata properties in the OMERO server as key-value pairs. The ETL script now reads the TSV and creates a property map for each file, it then uses the backend module to register the property pairs. I verified that the metadata properties are properly registered in the OMERO server. this is a sample of the ETL output for test dataset 3: New incoming image file for OMERO registration: /home/qeana10/dss/store/1/pre-staging/2021-02-25_12-46-46-408_register-omero-metadata_QIMGT654A2/QIMGT654A2/sub_tomo_1.mrc Created OMERO image identifiers: ['11035'] Metadata properties: KEY : VALUE --> SAMPLE_BARCODE : QIMGT649A1 --> IMAGING_MODALITY : Cryo-ET --> IMAGE_BINNING : 2 --> IMAGED_TISSUE : cell --> CAMERA_ACQUISITION_TIME : 0.15 --> INSTRUMENT_MANUFACTURER : FEI New incoming image file for OMERO registration: /home/qeana10/dss/store/1/pre-staging/2021-02-25_12-46-46-408_register-omero-metadata_QIMGT654A2/QIMGT654A2/rubisco_avg.mrc Created OMERO image identifiers: ['11036'] Metadata properties: KEY : VALUE --> SAMPLE_BARCODE : QIMGT649A1 --> IMAGING_MODALITY : Cryo-ET --> IMAGE_BINNING : 2 --> IMAGED_TISSUE : cell --> CAMERA_ACQUISITION_TIME : 0.15 --> INSTRUMENT_MANUFACTURER : FEI New incoming image file for OMERO registration: /home/qeana10/dss/store/1/pre-staging/2021-02-25_12-46-46-408_register-omero-metadata_QIMGT654A2/QIMGT654A2/Image7246.tif Created OMERO image identifiers: ['11037'] Metadata properties: KEY : VALUE --> SAMPLE_BARCODE : QIMGT649A1 --> IMAGING_MODALITY : TEM --> IMAGE_BINNING : 2 --> IMAGED_TISSUE : leaf --> CAMERA_ACQUISITION_TIME : 0.15 --> INSTRUMENT_MANUFACTURER : Zeiss New incoming image file for OMERO registration: /home/qeana10/dss/store/1/pre-staging/2021-02-25_12-46-46-408_register-omero-metadata_QIMGT654A2/QIMGT654A2/Est-B1a.lif Created OMERO image identifiers: ['11038', '11039', '11040', '11041'] Metadata properties: KEY : VALUE --> SAMPLE_BARCODE : QIMGT649A1 --> IMAGING_MODALITY : confocal_microscopy --> IMAGE_BINNING : 2 --> IMAGED_TISSUE : root --> CAMERA_ACQUISITION_TIME : 0.15 --> INSTRUMENT_MANUFACTURER : Zeiss New incoming image file for OMERO registration: /home/qeana10/dss/store/1/pre-staging/2021-02-25_12-46-46-408_register-omero-metadata_QIMGT654A2/QIMGT654A2/Image_1.czi Created OMERO image identifiers: ['11042'] Metadata properties: KEY : VALUE --> SAMPLE_BARCODE : QIMGT649A1 --> IMAGING_MODALITY : confocal_microscopy --> IMAGE_BINNING : 2 --> IMAGED_TISSUE : leaf --> CAMERA_ACQUISITION_TIME : 0.15 --> INSTRUMENT_MANUFACTURER : Zeiss New incoming image file for OMERO registration: /home/qeana10/dss/store/1/pre-staging/2021-02-25_12-46-46-408_register-omero-metadata_QIMGT654A2/QIMGT654A2/Image_2.czi Created OMERO image identifiers: ['11043'] Metadata properties: KEY : VALUE --> SAMPLE_BARCODE : QIMGT649A1 --> IMAGING_MODALITY : confocal_microscopy --> IMAGE_BINNING : 2 --> IMAGED_TISSUE : leaf --> CAMERA_ACQUISITION_TIME : 0.15 --> INSTRUMENT_MANUFACTURER : Zeiss --- .../backendinterface.py | 42 ++++++++++++++-- .../image_registration_process.py | 29 +++++++++-- .../register-omero-metadata/omero_54_env.yml | 39 +++++++++++++++ .../register-omero-metadata/register-omero.py | 48 +++++++++++++++++-- 4 files changed, 147 insertions(+), 11 deletions(-) create mode 100644 drop-boxes/register-omero-metadata/omero_54_env.yml diff --git a/drop-boxes/register-omero-metadata/backendinterface.py b/drop-boxes/register-omero-metadata/backendinterface.py index 1bed5b30..664cb57d 100644 --- a/drop-boxes/register-omero-metadata/backendinterface.py +++ b/drop-boxes/register-omero-metadata/backendinterface.py @@ -195,8 +195,10 @@ def register_image_file_with_dataset_id(file_path, dataset_id, usr, pwd, host, p if int(proc.returncode) == 0: - fist_line = std_out.splitlines()[0] - image_ids = fist_line[6:].split(',') + for line in std_out.splitlines(): + if line[:6] == "Image:": + image_ids = line[6:].split(',') + break else: image_ids = -1 @@ -319,7 +321,16 @@ def get_image_array(conn, image_id): def add_annotations_to_image(conn, image_id, key_value_data): """ - TODO + This function is used to add key-value pair annotations to an image + Example: + key_value_data = [["Drug Name", "Monastrol"], ["Concentration", "5 mg/ml"]] + add_annotations_to_image(conn, image_id, key_value_data) + Args: + conn: Established Connection to the OMERO Server via a BlitzGateway + image_id (int): An OMERO image ID + key_value_data (list of lists): list of key-value pairs + Returns: + int: not relevant atm """ import omero @@ -349,7 +360,6 @@ def add_annotations_to_image(conn, image_id, key_value_data): HOST = "host" PORT = 4064 - def get_args(): parser = OptionParser() parser.add_option('-f', '--file', dest='file_path', default="None", help='file to register') @@ -358,6 +368,10 @@ def get_args(): parser.add_option('-p', '--project', dest='project_id', default="None", help='project id for dataset id retrieval') parser.add_option('-s', '--sample', dest='sample_id', default="None", help='sample id for dataset id retrieval') + parser.add_option('-i', '--image', dest='image_id', default="None", help='image id for key-value pair annotation') + parser.add_option('-a', '--annotation', dest='ann_str', default="None", help='annotation string') + + (options, args) = parser.parse_args() return options @@ -373,9 +387,27 @@ def get_args(): id_str = id_str + id_i + " " print id_str - else: + + elif args.project_id != "None": conn = omero_connect(USERNAME, PASSWORD, HOST, str(PORT)) ds_id = get_omero_dataset_id(conn, str(args.project_id), str(args.sample_id)) print ds_id + + elif args.image_id != "None": + + conn = omero_connect(USERNAME, PASSWORD, HOST, str(PORT)) + + #string format: key1::value1//key2::value2//key3::value3//... + key_value_data = [] + pair_list = args.ann_str.split("//") + for pair in pair_list: + key_value = pair.split("::") + key_value_data.append(key_value) + + #print("backend: key-value pairs: " + str(key_value_data)) + + add_annotations_to_image(conn, str(args.image_id), key_value_data) + + print "annotation done." diff --git a/drop-boxes/register-omero-metadata/image_registration_process.py b/drop-boxes/register-omero-metadata/image_registration_process.py index 730adb89..9f08abfc 100644 --- a/drop-boxes/register-omero-metadata/image_registration_process.py +++ b/drop-boxes/register-omero-metadata/image_registration_process.py @@ -92,15 +92,15 @@ def triggerOMETiffConversion(self): pass #ToDo Check if Metadata file is provided as was suggested in test.tsv provided by LK - def extractMetadataFromTSV(self, tsvFilePath): + def extractMetadataFromTSV(self, tsv_file_path): tsvFileMap = {} try: - with open(tsvFilePath) as tsvfile: + with open(tsv_file_path) as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t', strict=True) for row in reader: tsvFileMap.update(row) except IOError: - print "Error: No file found at provided filepath " + tsvFilePath + print "Error: No file found at provided filepath " + tsv_file_path except csv.Error as e: print 'Could not gather the Metadata from TSVfile %s, in line %d: %s' % (tsvfile, reader.line_num, e) @@ -109,6 +109,29 @@ def extractMetadataFromTSV(self, tsvFilePath): def registerExperimentDataInOpenBIS(self): pass + def registerKeyValuePairs(self, image_id, property_map): + cmd_list = list(self._init_cmd_list) + + #string format: key1::value1//key2::value2//key3::value3//... + key_value_str = "" + for key in property_map.keys(): + key_value_str = key_value_str + str(key) + "::" + str(property_map[key]) + "//" + key_value_str = key_value_str[:len(key_value_str)-2] #remove last two chars + #print("irp str: " + key_value_str) + + cmd_list.append( "python backendinterface.py -i " + str(image_id) + " -a " + key_value_str ) + + commands = "" + for cmd in cmd_list: + commands = commands + cmd + "\n" + + process = Popen( "/bin/bash", shell=False, universal_newlines=True, stdin=PIPE, stdout=PIPE, stderr=PIPE ) + out, err = process.communicate( commands ) + + #print(out) + + return 0 + class SampleCodeError(Exception): diff --git a/drop-boxes/register-omero-metadata/omero_54_env.yml b/drop-boxes/register-omero-metadata/omero_54_env.yml new file mode 100644 index 00000000..44a22e69 --- /dev/null +++ b/drop-boxes/register-omero-metadata/omero_54_env.yml @@ -0,0 +1,39 @@ +name: omero_env_0 +channels: + - bioconda + - sven1103 + - hargup/label/pypi + - anaconda + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - bzip2=1.0.8=h7b6447c_0 + - ca-certificates=2020.7.22=0 + - certifi=2019.11.28=py27_0 + - freetype=2.10.2=h5ab3b9f_0 + - hashlib=20081119=py27_0 + - jpeg=9b=habf39ab_1 + - libedit=3.1.20191231=h14c3975_1 + - libffi=3.3=he6710b0_2 + - libgcc-ng=9.1.0=hdf63c60_0 + - libpng=1.6.37=hbc83047_0 + - libstdcxx-ng=9.1.0=hdf63c60_0 + - libtiff=4.1.0=h2733197_1 + - lz4-c=1.9.2=he6710b0_1 + - ncurses=6.2=he6710b0_1 + - olefile=0.46=py27_0 + - omero-importer-cli=v1.0.0=0 + - openjdk=8.0.152=h7b6447c_3 + - openssl=1.0.2u=h7b6447c_0 + - pillow=6.2.1=py27h34e0f95_0 + - pip=19.3.1=py27_0 + - python=2.7.18=h15b4118_1 + - readline=8.0=h7b6447c_0 + - setuptools=44.0.0=py27_0 + - sqlite=3.33.0=h62c20be_0 + - tk=8.6.10=hbc83047_0 + - wheel=0.33.6=py27_0 + - xz=5.2.5=h7b6447c_0 + - zeroc-ice=3.6.3=py27hd0a1c67_1 + - zlib=1.2.11=h7b6447c_3 + - zstd=1.4.4=h0b5b093_3 diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 4587d55e..6243f8ef 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -144,9 +144,39 @@ def findMetaDataFile(incomingPath): for f in files: stem, ext = os.path.splitext(f) if ext.lower()=='.tsv': - with open(os.path.join(root, f), 'U') as fh: metadataFile = fh.readlines() + with open(os.path.join(root, f), 'U') as fh: metadataFileContent = fh.readlines() return metadataFileContent +def getPropertyNames(metadataFile): + """Here we could add more complex behaviour later on. + """ + + return metadataFile[0].split("\t") + +def getPropertyMap(line, property_names): + """Build the property map. Here we could add more complex behaviour later on. + """ + + properties = {} + property_values = line.split("\t") + + for i in range(1, len(property_names)): #exclude first col (filename) + ##remove trailing newline, and replace space with underscore + name = property_names[i].rstrip('\n').replace(" ", "_") + value = property_values[i].rstrip('\n').replace(" ", "_") + + properties[name] = value + + return properties + +def printPropertyMap(property_map): + """Function to display metadata properties. + """ + + print("KEY : VALUE") + for key in property_map.keys(): + print "--> " + str(key) + " : " + str(property_map[key]) + def process(transaction): """The main entry point. @@ -170,17 +200,19 @@ def process(transaction): # 3. We now request the associated omero dataset id for the openBIS sample code. # Each dataset in OMERO contains the associated openBIS biological sample id, which # happened during the experimental design registration with the projectwizard. - omero_dataset_id = registrationProcess.requestOmeroDatasetId() + omero_dataset_id = registrationProcess.requestOmeroDatasetId(project_code=project_code, sample_code=sample_code) # Find and parse metadata file content metadataFile = findMetaDataFile(incomingPath) + + property_names = getPropertyNames(metadataFile) # Iterate over the metadata entries containing all pre-specified imaging metadata for line in metadataFile[1:]: # (Exclude header) # Get modality and other metadata from tsv here for one sample properties = {} - # Retrieve the image file name + # Retrieve the image file name, please no whitespace characters in filename! fileName = getFileFromLine(line) imageFile = os.path.join(incomingPath, fileName) @@ -195,6 +227,16 @@ def process(transaction): # We extract the metadata from this file. #registrationProcess.extractMetadataFromTSV() + properties = getPropertyMap(line, property_names) + print "Metadata properties:\t" + printPropertyMap(properties) + + #one file can have many images, iterate over all img ids + for img_id in omero_image_ids: + registrationProcess.registerKeyValuePairs(img_id, properties) + + + #### # 6. In addition to the image registration and technical metadata storage, we want to add # further experimental metadata in openBIS. This metadata contains information about the # imaging experiment itself, such as modality, imaged tissue and more. From ecfd8219eb834fff94f74c5c2e2ed1b82d720c14 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 2 Mar 2021 13:30:58 +0100 Subject: [PATCH 04/13] add functionality to create openbis objects with metadata --- .../image_registration_process.py | 22 ++++- .../register-omero-metadata/register-omero.py | 95 +++++++++++-------- 2 files changed, 74 insertions(+), 43 deletions(-) diff --git a/drop-boxes/register-omero-metadata/image_registration_process.py b/drop-boxes/register-omero-metadata/image_registration_process.py index 9f08abfc..7287f0d8 100644 --- a/drop-boxes/register-omero-metadata/image_registration_process.py +++ b/drop-boxes/register-omero-metadata/image_registration_process.py @@ -43,7 +43,17 @@ def fetchOpenBisSampleCode(self): raise SampleCodeError(self._sample_code, "The sample code seems to be invalid, the checksum could not be confirmed.") return self._project_code, self._sample_code - + + def searchOpenBisSample(sample_code): + #find specific sample + sc = SearchCriteria() + sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sample_code)) + foundSamples = search_service.searchForSamples(sc) + if len(foundSamples) == 0: + raise SampleNotFoundError(sample_code, "Sample could not be found in openBIS.") + sample = foundSamples[0] + return sample + def _isValidSampleCode(self, sample_code): try: id = sample_code[0:9] @@ -143,4 +153,14 @@ def __init__(self, sample_code, message): def test(self): pass +class SampleNotFoundError(Exception): + + def __init__(self, sample_code, message): + self.sample_code = sample_code + self.message = message + super().__init__(self.message) + + def test(self): + pass + diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 6243f8ef..488c17e0 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -58,8 +58,8 @@ def createNewImagingExperiment(tr, space, project, properties): IMAGING_EXP_TYPE = "Q_BMI_GENERIC_IMAGING" - MODALITY_CODE = "Q_BMI_MODALITY" search_service = tr.getSearchService() + experiment_property_map = {"IMAGING_MODALITY":"Q_BMI_MODALITY", "CAMERA_ACQUISITION_TIME":"Q_MEASUREMENT_FINISH_DATE", "INSTRUMENT_USER":"Q_INSTRUMENT_USER"} existing_ids = [] existing_exps = search_service.listExperiments("/" + space + "/" + project) @@ -71,32 +71,39 @@ def createNewImagingExperiment(tr, space, project, properties): i += 1 exp_num = len(existing_exps) + i exp_id = '/' + space + '/' + project + '/' + project + 'E' + str(exp_num) - exp = tr.createNewExperiment(exp_id, IMAGING_EXP_TYPE) - for key in properties.keys(): - exp.setPropertyValue(key, properties[key]) - return exp - -def createNewImagingRun(tr, base_sample, exp, omero_link, run_offset): + img_exp = tr.createNewExperiment(exp_id, IMAGING_EXP_TYPE) + for incoming_label in experiment_property_map: + if incoming_label in properties: + key = experiment_property_map[incoming_label] + value = properties[incoming_label] + img_exp.setPropertyValue(key, value) + return img_exp + +def createNewImagingRun(tr, base_sample, exp, omero_image_ids, run_offset, properties): IMG_RUN_PREFIX = "IMG" IMG_RUN_TYPE = "Q_BMI_GENERIC_IMAGING_RUN" - IMG_RUN_OMERO_PROPERTY_CODE = "Q_ADDITIONAL_INFO" - # TODO: can we use a prefix for imaging samples? - # otherwise creating new samples will be more complex - # on the other hand, replicates need to be numbered if we use IMG, e.g IMG1QABCD001AB - # IMG2QABCD001AB etc. - # talk to GG and LK + IMG_RUN_OMERO_PROPERTY_CODE = "Q_OMERO_IDS" + sample_property_map = {}#no specific properties from the metadata file yet + run = 0 exists = True new_sample_id = None + # respect samples already in openbis while exists: run += 1 new_sample_id = '/' + base_sample.getSpace() + '/' + IMG_RUN_PREFIX + str(run) + base_sample.getCode() exists = tr.getSampleForUpdate(new_sample_id) + # add additional offset for samples registered in this call of the ETL script, but before this sample new_sample_id_with_offset = '/' + base_sample.getSpace() + '/' + IMG_RUN_PREFIX + str(run+run_offset) + base_sample.getCode() img_run = tr.createNewSample(new_sample_id_with_offset, IMG_RUN_TYPE) img_run.setParentSampleIdentifiers([base_sample.getSampleIdentifier]) img_run.setExperiment(exp) - img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, omero_link) + img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, omero_image_ids) + for incoming_label in sample_property_map: + if incoming_label in properties: + key = sample_property_map[incoming_label] + value = properties[incoming_label] + img_run.setPropertyValue(key, value) return img_run #TODO Luis @@ -109,14 +116,17 @@ def getFileFromLine(line): def isSameExperimentMetadata(props1, props2): """dependent on metadata dictionaries of two different files (data model), decide if new openBIS experiment needs to be created - might be replaced by specific metadata properties, once we know more """ - # initilization of tsv parser, always results in new experiment - if not props1 or not props2: - return False - else: - return True - + relevantPropertyNames = ["IMAGING_MODALITY", "IMAGED_TISSUE", "INSTRUMENT_MANUFACTURER", "INSTRUMENT_USER", "IMAGING_DATE"] + for label in relevantPropertyNames: + if label in props1 and label in props2: + if props1[label] != props2[label]: + return False + if label in props1 and not label in props2: + return False + if label in props2 and not label in props1: + return False + return True def registerImageInOpenBIS(transaction): search_service = transaction.getSearchService() @@ -197,6 +207,9 @@ def process(transaction): # This tells us to which biological sample the image data was aquired from. project_code, sample_code = registrationProcess.fetchOpenBisSampleCode() + #find specific sample + tissueSample = registrationProcess.searchOpenBisSample(sample_code) + space = tissueSample.getSpace() # 3. We now request the associated omero dataset id for the openBIS sample code. # Each dataset in OMERO contains the associated openBIS biological sample id, which # happened during the experimental design registration with the projectwizard. @@ -206,7 +219,12 @@ def process(transaction): metadataFile = findMetaDataFile(incomingPath) property_names = getPropertyNames(metadataFile) - + + #keep track of number of images for openBIS ID + image_number = 0 + #Initialize openBIS imaging experiment + imagingExperiment = None + previousProps = {} # Iterate over the metadata entries containing all pre-specified imaging metadata for line in metadataFile[1:]: # (Exclude header) # Get modality and other metadata from tsv here for one sample @@ -242,27 +260,20 @@ def process(transaction): # imaging experiment itself, such as modality, imaged tissue and more. # We also want to connect this data with the previously created, corresponding OMERO image id t # hat represents the result of this experiment in OMERO. - #registrationProcess.registerExperimentDataInOpenBIS(omero_image_ids) - - # 7. Last but not least we create the open science file format for images which is - # OMERO-Tiff and store it in OMERO next to the proprierary vendor format. - #registrationProcess.triggerOMETiffConversion() - - #################### + #registrationProcess.registerExperimentDataInOpenBIS(omero_image_ids) # I did it myyy wayyyy - # TODO decide if new experiment is needed based on some pre-defined criteria. + # we decide if new experiment is needed based on some pre-defined criteria. # Normally, the most important criterium is collision of experiment type properties # between samples. E.g. two different imaging modalities need two experiments. - #fileBelongsToExistingExperiment = isSameExperimentMetadata(previousProps, properties) - #previousProps = properties - #if(not fileBelongsToExistingExperiment): - # exp = createNewImagingExperiment(transaction, space, project_code, properties) - #imagingSample = createNewImagingRun(transaction, sa, exp, list_of_omero_ids, offset)# maybe there are sample properties, too! - # register the actual data - #IMAGING_DATASET_CODE = Q_BMI_GENERIC_IMAGING_DATA # I guess - #dataset = transaction.createNewDataSet(IMAGING_DATASET_CODE) - #dataset.setSample(imagingSample) - #transaction.moveFile(imageFile, dataset) - # increment id offset for next sample in this loop - not sure anymore if this is needed - \ No newline at end of file + fileBelongsToExistingExperiment = isSameExperimentMetadata(previousProps, properties) + previousProps = properties + if(not fileBelongsToExistingExperiment): + imagingExperiment = createNewImagingExperiment(transaction, space, project_code, properties) + imagingSample = createNewImagingRun(transaction, tissueSample, imagingExperiment, omero_image_ids, image_number, properties) + # increment id offset for next sample in this loop + image_number += 1 + + # 7. Last but not least we create the open science file format for images which is + # OMERO-Tiff and store it in OMERO next to the proprierary vendor format. + #registrationProcess.triggerOMETiffConversion() \ No newline at end of file From d94cf9e7266a3ffade0635a9b737999a8e8fffb6 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 9 Mar 2021 14:27:10 +0100 Subject: [PATCH 05/13] add provisional omero failure handling --- drop-boxes/register-omero-metadata/register-omero.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 488c17e0..c36c107a 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -241,6 +241,10 @@ def process(transaction): omero_image_ids = registrationProcess.registerImageFileInOmero(imageFile, omero_dataset_id) print "Created OMERO image identifiers:\t" + str(omero_image_ids) + omero_failed = len(omero_image_ids) < 1 + if omero_failed: + raise ValueError("Omero did not return expected image ids.") + # 5. Additional metadata is provided in an own metadata TSV file. # We extract the metadata from this file. #registrationProcess.extractMetadataFromTSV() From 65d4164afcb8bbc168b8a4e9c1f65946b06f432d Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Mon, 15 Mar 2021 17:58:51 +0100 Subject: [PATCH 06/13] remove old method, add logging --- .../register-omero-metadata/register-omero.py | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index c36c107a..5ae20073 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -106,11 +106,6 @@ def createNewImagingRun(tr, base_sample, exp, omero_image_ids, run_offset, prope img_run.setPropertyValue(key, value) return img_run -#TODO Luis -def callOmeroWithFilePath(file_path, sample_barcode): - list_of_omero_ids = ["1","2","3"] - return list_of_omero_ids - def getFileFromLine(line): return line.split("\t")[0] @@ -189,6 +184,7 @@ def printPropertyMap(property_map): def process(transaction): + print "start transaction" """The main entry point. openBIS calls this method, when an incoming transaction is registered. @@ -200,31 +196,52 @@ def process(transaction): # Get the incoming path of the transaction incomingPath = transaction.getIncoming().getAbsolutePath() + print incomingPath + # 1. Initialize the image registration process registrationProcess = irp.ImageRegistrationProcess(transaction) + + print "started reg process" # 2. We want to get the openBIS sample code from the incoming data # This tells us to which biological sample the image data was aquired from. project_code, sample_code = registrationProcess.fetchOpenBisSampleCode() + print project_code + print sample_code + #find specific sample tissueSample = registrationProcess.searchOpenBisSample(sample_code) space = tissueSample.getSpace() + + print tissueSample + print space # 3. We now request the associated omero dataset id for the openBIS sample code. # Each dataset in OMERO contains the associated openBIS biological sample id, which # happened during the experimental design registration with the projectwizard. + + print "calling omero" omero_dataset_id = registrationProcess.requestOmeroDatasetId(project_code=project_code, sample_code=sample_code) + print omero_dataset_id + # Find and parse metadata file content metadataFile = findMetaDataFile(incomingPath) + print metadataFile + property_names = getPropertyNames(metadataFile) + print "property names:" + print property_names + #keep track of number of images for openBIS ID image_number = 0 #Initialize openBIS imaging experiment imagingExperiment = None previousProps = {} + + print "start reading metadata file" # Iterate over the metadata entries containing all pre-specified imaging metadata for line in metadataFile[1:]: # (Exclude header) # Get modality and other metadata from tsv here for one sample From 896f66a01337b9e696604fe69d23af7eb742b3db Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Mon, 15 Mar 2021 18:27:08 +0100 Subject: [PATCH 07/13] fix searching for samples --- .../register-omero-metadata/image_registration_process.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drop-boxes/register-omero-metadata/image_registration_process.py b/drop-boxes/register-omero-metadata/image_registration_process.py index 7287f0d8..5dfd8c8f 100644 --- a/drop-boxes/register-omero-metadata/image_registration_process.py +++ b/drop-boxes/register-omero-metadata/image_registration_process.py @@ -14,6 +14,7 @@ def __init__(self, transaction, env_name="omero_env_0", project_code="", sample_ self._transaction = transaction self._incoming_file_name = transaction.getIncoming().getName() + self._search_service = transaction.getSearchService() self._project_code = project_code self._sample_code = sample_code @@ -44,11 +45,11 @@ def fetchOpenBisSampleCode(self): return self._project_code, self._sample_code - def searchOpenBisSample(sample_code): + def searchOpenBisSample(self, sample_code): #find specific sample sc = SearchCriteria() sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sample_code)) - foundSamples = search_service.searchForSamples(sc) + foundSamples = self._search_service.searchForSamples(sc) if len(foundSamples) == 0: raise SampleNotFoundError(sample_code, "Sample could not be found in openBIS.") sample = foundSamples[0] From 60a33d43c2e1e6a24545c80c5db8121fe0bff307 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 16 Mar 2021 12:00:07 +0100 Subject: [PATCH 08/13] several fixes --- drop-boxes/register-omero-metadata/register-omero.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 5ae20073..2877a60f 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -56,12 +56,11 @@ ##### -def createNewImagingExperiment(tr, space, project, properties): +def createNewImagingExperiment(tr, space, project, properties, existing_ids): IMAGING_EXP_TYPE = "Q_BMI_GENERIC_IMAGING" search_service = tr.getSearchService() experiment_property_map = {"IMAGING_MODALITY":"Q_BMI_MODALITY", "CAMERA_ACQUISITION_TIME":"Q_MEASUREMENT_FINISH_DATE", "INSTRUMENT_USER":"Q_INSTRUMENT_USER"} - existing_ids = [] existing_exps = search_service.listExperiments("/" + space + "/" + project) for exp in existing_exps: existing_ids.append(exp.getExperimentIdentifier()) @@ -72,6 +71,7 @@ def createNewImagingExperiment(tr, space, project, properties): exp_num = len(existing_exps) + i exp_id = '/' + space + '/' + project + '/' + project + 'E' + str(exp_num) img_exp = tr.createNewExperiment(exp_id, IMAGING_EXP_TYPE) + existing_ids.append(exp_id) for incoming_label in experiment_property_map: if incoming_label in properties: key = experiment_property_map[incoming_label] @@ -96,7 +96,7 @@ def createNewImagingRun(tr, base_sample, exp, omero_image_ids, run_offset, prope # add additional offset for samples registered in this call of the ETL script, but before this sample new_sample_id_with_offset = '/' + base_sample.getSpace() + '/' + IMG_RUN_PREFIX + str(run+run_offset) + base_sample.getCode() img_run = tr.createNewSample(new_sample_id_with_offset, IMG_RUN_TYPE) - img_run.setParentSampleIdentifiers([base_sample.getSampleIdentifier]) + img_run.setParentSampleIdentifiers([base_sample.getSampleIdentifier()]) img_run.setExperiment(exp) img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, omero_image_ids) for incoming_label in sample_property_map: @@ -240,6 +240,7 @@ def process(transaction): #Initialize openBIS imaging experiment imagingExperiment = None previousProps = {} + existing_experiment_ids = [] print "start reading metadata file" # Iterate over the metadata entries containing all pre-specified imaging metadata @@ -257,6 +258,7 @@ def process(transaction): # in OMERO. We pass the omero dataset id and trigger the image registration process in OMERO. omero_image_ids = registrationProcess.registerImageFileInOmero(imageFile, omero_dataset_id) print "Created OMERO image identifiers:\t" + str(omero_image_ids) + omero_image_ids = [420,42] omero_failed = len(omero_image_ids) < 1 if omero_failed: @@ -290,7 +292,7 @@ def process(transaction): fileBelongsToExistingExperiment = isSameExperimentMetadata(previousProps, properties) previousProps = properties if(not fileBelongsToExistingExperiment): - imagingExperiment = createNewImagingExperiment(transaction, space, project_code, properties) + imagingExperiment = createNewImagingExperiment(transaction, space, project_code, properties, existing_experiment_ids) imagingSample = createNewImagingRun(transaction, tissueSample, imagingExperiment, omero_image_ids, image_number, properties) # increment id offset for next sample in this loop image_number += 1 From ca3cd7839883e336ab0171e2f71c3b31a7bc2156 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 16 Mar 2021 12:01:39 +0100 Subject: [PATCH 09/13] several fixes --- drop-boxes/register-omero-metadata/register-omero.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 2877a60f..252e6c5b 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -98,7 +98,7 @@ def createNewImagingRun(tr, base_sample, exp, omero_image_ids, run_offset, prope img_run = tr.createNewSample(new_sample_id_with_offset, IMG_RUN_TYPE) img_run.setParentSampleIdentifiers([base_sample.getSampleIdentifier()]) img_run.setExperiment(exp) - img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, omero_image_ids) + img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, str(omero_image_ids)) for incoming_label in sample_property_map: if incoming_label in properties: key = sample_property_map[incoming_label] From 97b952692a235720c8387929fedffa487c41277f Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 16 Mar 2021 12:26:11 +0100 Subject: [PATCH 10/13] change metadata mapping for measurement dae --- drop-boxes/register-omero-metadata/register-omero.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 252e6c5b..7eec8ce0 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -59,7 +59,7 @@ def createNewImagingExperiment(tr, space, project, properties, existing_ids): IMAGING_EXP_TYPE = "Q_BMI_GENERIC_IMAGING" search_service = tr.getSearchService() - experiment_property_map = {"IMAGING_MODALITY":"Q_BMI_MODALITY", "CAMERA_ACQUISITION_TIME":"Q_MEASUREMENT_FINISH_DATE", "INSTRUMENT_USER":"Q_INSTRUMENT_USER"} + experiment_property_map = {"IMAGING_MODALITY":"Q_BMI_MODALITY", "IMAGING_DATE":"Q_MEASUREMENT_FINISH_DATE", "INSTRUMENT_USER":"Q_INSTRUMENT_USER"} existing_exps = search_service.listExperiments("/" + space + "/" + project) for exp in existing_exps: From 3e29e0abfe43f70a850d5711dfeccb3a65c82fb9 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 16 Mar 2021 12:39:14 +0100 Subject: [PATCH 11/13] add date mapping --- drop-boxes/register-omero-metadata/register-omero.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 7eec8ce0..2c0c18b0 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -11,6 +11,7 @@ #import sample_tracking_helper_qbic as thelper import checksum +import datetime import re import os import urllib @@ -55,6 +56,11 @@ # and delete the data! ##### +INCOMING_DATE_FORMAT = '%d.%m.%Y' +OPENBIS_DATE_FORMAT = '%Y-%m-%d' + +def mapDateString(date_string): + return datetime.datetime.strptime(date_string, INCOMING_DATE_FORMAT).strftime(OPENBIS_DATE_FORMAT) def createNewImagingExperiment(tr, space, project, properties, existing_ids): IMAGING_EXP_TYPE = "Q_BMI_GENERIC_IMAGING" @@ -76,6 +82,8 @@ def createNewImagingExperiment(tr, space, project, properties, existing_ids): if incoming_label in properties: key = experiment_property_map[incoming_label] value = properties[incoming_label] + if key == "Q_MEASUREMENT_FINISH_DATE": + value = mapDateString(value) img_exp.setPropertyValue(key, value) return img_exp From 63b3f00f08587e2218179ad6632198b755fea243 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 16 Mar 2021 12:52:06 +0100 Subject: [PATCH 12/13] remove test data --- drop-boxes/register-omero-metadata/register-omero.py | 1 - 1 file changed, 1 deletion(-) diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 2c0c18b0..6c3b7987 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -266,7 +266,6 @@ def process(transaction): # in OMERO. We pass the omero dataset id and trigger the image registration process in OMERO. omero_image_ids = registrationProcess.registerImageFileInOmero(imageFile, omero_dataset_id) print "Created OMERO image identifiers:\t" + str(omero_image_ids) - omero_image_ids = [420,42] omero_failed = len(omero_image_ids) < 1 if omero_failed: From da49732665b55f6d3c3a3b021550c6ed7fd31159 Mon Sep 17 00:00:00 2001 From: luiskuhn <38211686+luiskuhn@users.noreply.github.com> Date: Wed, 17 Mar 2021 18:28:42 +0100 Subject: [PATCH 13/13] Feature/omero prep release v1 (#77) * Handle registration of imaging data and metadata (OMERO and openBIS) * Update drop-boxes/register-omero-metadata/register-omero.py * format omero ids correctly for openbis * handle datamover folders correctly Co-authored-by: wow-such-code --- .../backendinterface.py | 26 ++++---- .../image_registration_process.py | 36 +++++++++-- .../register-omero-metadata/register-omero.py | 63 ++++++++++++++++--- 3 files changed, 98 insertions(+), 27 deletions(-) diff --git a/drop-boxes/register-omero-metadata/backendinterface.py b/drop-boxes/register-omero-metadata/backendinterface.py index 664cb57d..1b4f1ee2 100644 --- a/drop-boxes/register-omero-metadata/backendinterface.py +++ b/drop-boxes/register-omero-metadata/backendinterface.py @@ -182,9 +182,7 @@ def register_image_file_with_dataset_id(file_path, dataset_id, usr, pwd, host, p ds_id = dataset_id if ds_id != -1: - cmd = "omero-importer -s " + host + " -p " + str(port) + " -u " + usr + " -w " + pwd + " -d " + str(int(ds_id)) + " " + file_path - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, @@ -194,18 +192,14 @@ def register_image_file_with_dataset_id(file_path, dataset_id, usr, pwd, host, p std_out, std_err = proc.communicate() if int(proc.returncode) == 0: - for line in std_out.splitlines(): if line[:6] == "Image:": image_ids = line[6:].split(',') break - else: - image_ids = -1 - + image_ids = [] else: - image_ids = -1 - + image_ids = [] return image_ids @@ -353,12 +347,18 @@ def add_annotations_to_image(conn, image_id, key_value_data): ##app from optparse import OptionParser +import ConfigParser + +config = ConfigParser.RawConfigParser() +config.read("imaging_config.properties") + ###OMERO server info -USERNAME = "usr" -PASSWORD = "pwd" -HOST = "host" -PORT = 4064 +USERNAME = config.get('OmeroServerSection', 'omero.username') +PASSWORD = config.get('OmeroServerSection', 'omero.password') +HOST = config.get('OmeroServerSection', 'omero.host') +PORT = int(config.get('OmeroServerSection', 'omero.port')) + def get_args(): parser = OptionParser() @@ -410,4 +410,4 @@ def get_args(): add_annotations_to_image(conn, str(args.image_id), key_value_data) - print "annotation done." + print "0" diff --git a/drop-boxes/register-omero-metadata/image_registration_process.py b/drop-boxes/register-omero-metadata/image_registration_process.py index 5dfd8c8f..306c937b 100644 --- a/drop-boxes/register-omero-metadata/image_registration_process.py +++ b/drop-boxes/register-omero-metadata/image_registration_process.py @@ -7,10 +7,14 @@ from subprocess import Popen, PIPE barcode_pattern = re.compile('Q[a-zA-Z0-9]{4}[0-9]{3}[A-Z][a-zA-Z0-9]') +conda_home_path = "/home/qeana10/miniconda2/" +omero_lib_path = "/home/qeana10/openbis/servers/core-plugins/QBIC/1/dss/drop-boxes/register-omero-metadata/OMERO.py-5.4.10-ice36-b105" +etl_home_path = "/home/qeana10/openbis/servers/core-plugins/QBIC/1/dss/drop-boxes/register-omero-metadata/" + class ImageRegistrationProcess: - def __init__(self, transaction, env_name="omero_env_0", project_code="", sample_code=""): + def __init__(self, transaction, env_name="omero_env_0", project_code="", sample_code="", conda_path=None, omero_path=None, etl_path=None): self._transaction = transaction self._incoming_file_name = transaction.getIncoming().getName() @@ -19,19 +23,32 @@ def __init__(self, transaction, env_name="omero_env_0", project_code="", sample_ self._project_code = project_code self._sample_code = sample_code + ###set env + self._conda_path = conda_home_path + if not conda_path is None: + self._conda_path = conda_path + + self._omero_path = omero_lib_path + if not omero_path is None: + self._omero_path = omero_path + + self._etl_path= etl_home_path + if not etl_path is None: + self._etl_path = etl_path + self._init_cmd_list = [] - self._init_cmd_list.append('eval "$(/home/qeana10/miniconda2/bin/conda shell.bash hook)"') + self._init_cmd_list.append('eval "$(' + self._conda_path + 'bin/conda shell.bash hook)"') self._init_cmd_list.append('conda activate ' + env_name) - self._init_cmd_list.append('export OMERO_PREFIX=/home/qeana10/openbis/servers/core-plugins/QBIC/1/dss/drop-boxes/register-omero-metadata/OMERO.py-5.4.10-ice36-b105') + self._init_cmd_list.append('export OMERO_PREFIX=' + self._omero_path) self._init_cmd_list.append('export PYTHONPATH=$PYTHONPATH:$OMERO_PREFIX/lib/python') #now use the omero-importer app packaged in the conda env #self._init_cmd_list.append('export PATH=$PATH:/home/qeana10/openbis/servers/core-plugins/QBIC/1/dss/drop-boxes/register-omero-metadata/OMERO.server-5.4.10-ice36-b105/bin') - self._init_cmd_list.append('export PATH=$PATH:/home/qeana10/miniconda2/envs/' + env_name + '/bin') + self._init_cmd_list.append('export PATH=$PATH:' + self._conda_path + 'envs/' + env_name + '/bin') - #move to the dir where backendinterface.py lives - self._init_cmd_list.append('cd /home/qeana10/openbis/servers/core-plugins/QBIC/1/dss/drop-boxes/register-omero-metadata/') + #move to the dir where backendinterface.py lives for exec. + self._init_cmd_list.append('cd ' + self._etl_path) def fetchOpenBisSampleCode(self): found = barcode_pattern.findall(self._incoming_file_name) @@ -84,6 +101,10 @@ def requestOmeroDatasetId(self, project_code=None, sample_code=None): return ds_id def registerImageFileInOmero(self, file_path, dataset_id): + print "using file_path:" + print file_path + print "ds id:" + print dataset_id cmd_list = list(self._init_cmd_list) cmd_list.append( "python backendinterface.py -f " + file_path + " -d " + str(dataset_id) ) @@ -95,6 +116,9 @@ def registerImageFileInOmero(self, file_path, dataset_id): out, err = process.communicate( commands ) id_list = str(out).split() + for img_id in id_list: + if not img_id.isdigit(): + return [] return id_list diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 6c3b7987..eea783e8 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -106,7 +106,7 @@ def createNewImagingRun(tr, base_sample, exp, omero_image_ids, run_offset, prope img_run = tr.createNewSample(new_sample_id_with_offset, IMG_RUN_TYPE) img_run.setParentSampleIdentifiers([base_sample.getSampleIdentifier()]) img_run.setExperiment(exp) - img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, str(omero_image_ids)) + img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, '\n'.join(omero_image_ids)) for incoming_label in sample_property_map: if incoming_label in properties: key = sample_property_map[incoming_label] @@ -163,8 +163,26 @@ def findMetaDataFile(incomingPath): def getPropertyNames(metadataFile): """Here we could add more complex behaviour later on. """ + + property_names = metadataFile[0].split("\t") + for i in range(len(property_names)): + property_names[i] = property_names[i].strip().upper() + + return property_names - return metadataFile[0].split("\t") +def validatePropertyNames(property_names): + """Validate metadata property names. + TODO: call the imaging metadata parser (with json schema). + """ + + # fast validation without parser object. + required_names = ["IMAGE_FILE_NAME", "IMAGING_MODALITY", "IMAGED_TISSUE", "INSTRUMENT_MANUFACTURER", "INSTRUMENT_USER", "IMAGING_DATE"] + + for name in required_names: + if not name in property_names: + return False + + return True def getPropertyMap(line, property_names): """Build the property map. Here we could add more complex behaviour later on. @@ -182,6 +200,21 @@ def getPropertyMap(line, property_names): return properties +def filterOmeroPropertyMap(property_map): + """Filters map before ingestion into omero server + """ + + #the blacklist, e.g. what is going to openBIS or is automatically added to omero (e.g. file name) + filter_list = ["IMAGE_FILE_NAME", "INSTRUMENT_USER", "IMAGING_DATE"] + + new_props = {} + for key in property_map.keys(): + if not key in filter_list: + new_props[key] = property_map[key] + + return new_props + + def printPropertyMap(property_map): """Function to display metadata properties. """ @@ -203,13 +236,15 @@ def process(transaction): # Get the incoming path of the transaction incomingPath = transaction.getIncoming().getAbsolutePath() + # Get the name of the incoming folder + folderName = transaction.getIncoming().getName() print incomingPath # 1. Initialize the image registration process registrationProcess = irp.ImageRegistrationProcess(transaction) - print "started reg process" + print "started reg. process" # 2. We want to get the openBIS sample code from the incoming data # This tells us to which biological sample the image data was aquired from. @@ -228,14 +263,21 @@ def process(transaction): # Each dataset in OMERO contains the associated openBIS biological sample id, which # happened during the experimental design registration with the projectwizard. - print "calling omero" + print "calling omero..." + #returns -1 if operation failed omero_dataset_id = registrationProcess.requestOmeroDatasetId(project_code=project_code, sample_code=sample_code) + print "omero dataset id:" print omero_dataset_id + omero_failed = int(omero_dataset_id) < 0 + if omero_failed: + raise ValueError("Omero did not return expected dataset id.") + # Find and parse metadata file content metadataFile = findMetaDataFile(incomingPath) + print "metadataFile:" print metadataFile property_names = getPropertyNames(metadataFile) @@ -243,6 +285,10 @@ def process(transaction): print "property names:" print property_names + valid_names = validatePropertyNames(property_names) + if not valid_names: + raise ValueError("Invalid Property Names.") + #keep track of number of images for openBIS ID image_number = 0 #Initialize openBIS imaging experiment @@ -258,8 +304,9 @@ def process(transaction): # Retrieve the image file name, please no whitespace characters in filename! fileName = getFileFromLine(line) - - imageFile = os.path.join(incomingPath, fileName) + # Due to the datahandler we need to add another subfolder of the same name to the path + imageFolder = os.path.join(incomingPath, folderName) + imageFile = os.path.join(imageFolder, fileName) print "New incoming image file for OMERO registration:\t" + imageFile # 4. After we have received the omero dataset id, we know where to attach the image to @@ -281,7 +328,7 @@ def process(transaction): #one file can have many images, iterate over all img ids for img_id in omero_image_ids: - registrationProcess.registerKeyValuePairs(img_id, properties) + registrationProcess.registerKeyValuePairs(img_id, filterOmeroPropertyMap(properties)) #### @@ -306,4 +353,4 @@ def process(transaction): # 7. Last but not least we create the open science file format for images which is # OMERO-Tiff and store it in OMERO next to the proprierary vendor format. - #registrationProcess.triggerOMETiffConversion() \ No newline at end of file + #registrationProcess.triggerOMETiffConversion()