From 461b81e90e1b8fd4f4f9bed3627aefd58e0a9d48 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Mon, 2 Nov 2020 17:21:47 +0100 Subject: [PATCH 01/17] make imgag dropbox raise exception if unknown data types arrive --- drop-boxes/register-imgag-dropbox/register-imgag.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drop-boxes/register-imgag-dropbox/register-imgag.py b/drop-boxes/register-imgag-dropbox/register-imgag.py index a97ee8e8..02896d4f 100644 --- a/drop-boxes/register-imgag-dropbox/register-imgag.py +++ b/drop-boxes/register-imgag-dropbox/register-imgag.py @@ -596,13 +596,14 @@ def process(transaction): print rawFile if rawFile.endswith("vcf") or rawFile.endswith("vcf.gz"): vcfs.append(rawFile) - if rawFile.endswith("fastq") or rawFile.endswith("fastq.gz"): + elif rawFile.endswith("fastq") or rawFile.endswith("fastq.gz"): fastqs.append(rawFile) - if rawFile.endswith("GSvar") or rawFile.endswith("GSvar.gz"): + elif rawFile.endswith("GSvar") or rawFile.endswith("GSvar.gz"): gsvars.append(rawFile) - if rawFile.endswith("tsv") or rawFile.endswith("tsv.gz"): + elif rawFile.endswith("tsv") or rawFile.endswith("tsv.gz"): tsvs.append(rawFile) - + else: + raise Exception(rawFile + " is of an unsupported format") #if rawFiles[0].endswith("vcf") or rawFiles[0].endswith("vcf.gz"): # datasetSample = find_and_register_vcf(transaction, jsonContent) From 1b51c234f06356d64fa3aaca8491ff663a358441 Mon Sep 17 00:00:00 2001 From: Sven F Date: Tue, 3 Nov 2020 09:18:38 +0100 Subject: [PATCH 02/17] Update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5130ba87..5190ed1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 1.4.1 2020-11-03 + +* Imgag dropbox: raise an exception, if files of unknown type are part of the transaction + ## 1.4.0 * Provide first imaging registration support with OMERO server From 468887b121b25966d8de6541bf20383baf7f2ddc Mon Sep 17 00:00:00 2001 From: Sven F Date: Tue, 3 Nov 2020 10:24:57 +0100 Subject: [PATCH 03/17] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 83afc705..a34ef4d3 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![GitHub release (latest SemVer)](https://img.shields.io/github/v/release/qbicsoftware/omero-portlet) +![GitHub release (latest SemVer)](https://img.shields.io/github/v/release/qbicsoftware/etl-scripts) ![Python Language](https://img.shields.io/badge/language-python-blue.svg) ![License](https://img.shields.io/github/license/qbicsoftware/etl-scripts) [![DOI](https://zenodo.org/badge/45912621.svg)](https://zenodo.org/badge/latestdoi/45912621) From c8aa7a942532c1b499937a8696b3c7585660568e Mon Sep 17 00:00:00 2001 From: Sven Fillinger Date: Tue, 3 Nov 2020 10:26:45 +0100 Subject: [PATCH 04/17] Update CL --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ecfb7338..9878718d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ # Changelog -## Currently in development +## 1.5.0 2020-11-03 -* New maintenance task: update missing checksum one, after dss start. +* New maintenance task: update missing checksum once, after dss starts. ## 1.4.0 From 1c67c06b1f9d80f498fd540232bd4530ab6ce97c Mon Sep 17 00:00:00 2001 From: Sven Fillinger Date: Tue, 3 Nov 2020 10:29:24 +0100 Subject: [PATCH 05/17] Add new line at end of file Add new line at end of file --- maintenance-tasks/checksum-maintenance/plugin.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maintenance-tasks/checksum-maintenance/plugin.properties b/maintenance-tasks/checksum-maintenance/plugin.properties index 69e8ac15..25de8e99 100644 --- a/maintenance-tasks/checksum-maintenance/plugin.properties +++ b/maintenance-tasks/checksum-maintenance/plugin.properties @@ -1,3 +1,3 @@ # Updates missing checksums class = ch.systemsx.cisd.etlserver.path.PathInfoDatabaseChecksumCalculationTask -execute-only-once = true \ No newline at end of file +execute-only-once = true From 187bb14f2d28520ecdd70dfdcec488040b93f3f7 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 3 Nov 2020 11:15:16 +0100 Subject: [PATCH 06/17] Convert experiment id to string for v3 objects (#55) --- .../update-experiment-metadata/update.py | 25 +------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/reporting-plugins/update-experiment-metadata/update.py b/reporting-plugins/update-experiment-metadata/update.py index 5c826561..d39ba2a3 100644 --- a/reporting-plugins/update-experiment-metadata/update.py +++ b/reporting-plugins/update-experiment-metadata/update.py @@ -1,5 +1,3 @@ -import smtplib -from email.mime.text import MIMEText def process(tr, parameters, tableBuilder): """Change properties of experiment @@ -9,30 +7,9 @@ def process(tr, parameters, tableBuilder): if not user == None: tr.setUserId(user) expId = parameters.get("identifier") - exp = tr.getExperimentForUpdate(expId) + exp = tr.getExperimentForUpdate(str(expId)) properties = parameters.get("properties") for prop in properties.keySet(): exp.setPropertyValue(prop, properties.get(prop)) - - #server = "smtpserv.uni-tuebingen.de" - #fromA = "notification_service@qbis.qbic.uni-tuebingen.de" - - # TODO get emails of space users - # Get it via liferay and pass it to this service ? - #toA = "mohr@informatik.uni-tuebingen.de" - #subject = "Update information for Experiment %s" % expId - #text = "Status of Experiment %s has been updated" % expId #, properties.get("Q_CURRENT_STATUS")) - - #msg = MIMEText(text) - #msg['From'] = fromA - #msg['To'] = toA - #msg['Subject'] = subject - # check for info@qbic.uni-tuebingen.de - #msg['reply-to'] = "mohr@informatik.uni-tuebingen.de" - - #smtpServer = smtplib.SMTP(server) - #smtpServer.sendmail(fromA, toA, msg.as_string()) - #smtpServer.close() - From 7f578fd05ed74e65c27219afb13e423ba00463ed Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 3 Nov 2020 11:15:53 +0100 Subject: [PATCH 07/17] Rename data folder for pooled data (#54) --- drop-boxes/register-nanopore-dropbox/register-nanopore.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drop-boxes/register-nanopore-dropbox/register-nanopore.py b/drop-boxes/register-nanopore-dropbox/register-nanopore.py index bc6b2a0c..7ae690ac 100644 --- a/drop-boxes/register-nanopore-dropbox/register-nanopore.py +++ b/drop-boxes/register-nanopore-dropbox/register-nanopore.py @@ -226,12 +226,15 @@ def registerUnclassifiedData(transaction, unclassifiedDataMap, runExperiment, cu # moves a subset of nanopore data to a new target path, needed to add fastq and fast5 subfolders to the same dataset def prepareDataFolder(incomingPath, currentPath, destinationPath, dataObject, suffix): name = dataObject.getName() + # if pooled data, folder is named using barcode and needs to be adapted + if not "_" in name: + name = name + "_" + suffix relativePath = dataObject.getRelativePath() # the source path of the currently handled data object (e.g. fast5_fail folder) sourcePath = os.path.join(os.path.dirname(currentPath), relativePath) checksumFile = createChecksumFileForFolder(incomingPath, sourcePath) # destination path containing data type (fastq or fast5), as well as the parent sample code, so pooled samples can be handled - destination = os.path.join(destinationPath, name + "_" + suffix) + destination = os.path.join(destinationPath, name) os.rename(sourcePath, destination) def createSampleWithData(transaction, space, parentSampleCode, mapWithDataForSample, openbisExperiment, currentPath, absLogPath): From ab8997edf3838d45de1acac0b488d51958e20704 Mon Sep 17 00:00:00 2001 From: Sven F Date: Tue, 3 Nov 2020 12:01:27 +0100 Subject: [PATCH 08/17] Release/1.5.0 (#53) * Feature/checksum maintenance (#50) This PR introduces a maintenance task that will refresh the path info db and update missing checksums * Feature/checksum maintenance (#52) This PR favors the ChecksumCalculationTask over the path info db refresh task and has been confirmed to work in a openbis 18.06.2 test instance. * Update CL * Add new line at end of file Add new line at end of file * convert experiment id to string for v3 objects * Convert experiment id to string for v3 objects (#55) * Rename data folder for pooled data (#54) * adapt changelog Co-authored-by: wow-such-code --- CHANGELOG.md | 7 ++++++ .../register-nanopore.py | 5 +++- .../checksum-maintenance/plugin.properties | 3 +++ .../update-experiment-metadata/update.py | 25 +------------------ 4 files changed, 15 insertions(+), 25 deletions(-) create mode 100644 maintenance-tasks/checksum-maintenance/plugin.properties diff --git a/CHANGELOG.md b/CHANGELOG.md index 5190ed1a..32e81588 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog + +## 1.5.0 2020-11-03 + +* New maintenance task: update missing checksum once, after dss starts. +* Fix for nanopore registration: rename folders for pooling case +* Fix for experiment update: force identifier into a string to support v3 API objects + ## 1.4.1 2020-11-03 * Imgag dropbox: raise an exception, if files of unknown type are part of the transaction diff --git a/drop-boxes/register-nanopore-dropbox/register-nanopore.py b/drop-boxes/register-nanopore-dropbox/register-nanopore.py index bc6b2a0c..7ae690ac 100644 --- a/drop-boxes/register-nanopore-dropbox/register-nanopore.py +++ b/drop-boxes/register-nanopore-dropbox/register-nanopore.py @@ -226,12 +226,15 @@ def registerUnclassifiedData(transaction, unclassifiedDataMap, runExperiment, cu # moves a subset of nanopore data to a new target path, needed to add fastq and fast5 subfolders to the same dataset def prepareDataFolder(incomingPath, currentPath, destinationPath, dataObject, suffix): name = dataObject.getName() + # if pooled data, folder is named using barcode and needs to be adapted + if not "_" in name: + name = name + "_" + suffix relativePath = dataObject.getRelativePath() # the source path of the currently handled data object (e.g. fast5_fail folder) sourcePath = os.path.join(os.path.dirname(currentPath), relativePath) checksumFile = createChecksumFileForFolder(incomingPath, sourcePath) # destination path containing data type (fastq or fast5), as well as the parent sample code, so pooled samples can be handled - destination = os.path.join(destinationPath, name + "_" + suffix) + destination = os.path.join(destinationPath, name) os.rename(sourcePath, destination) def createSampleWithData(transaction, space, parentSampleCode, mapWithDataForSample, openbisExperiment, currentPath, absLogPath): diff --git a/maintenance-tasks/checksum-maintenance/plugin.properties b/maintenance-tasks/checksum-maintenance/plugin.properties new file mode 100644 index 00000000..25de8e99 --- /dev/null +++ b/maintenance-tasks/checksum-maintenance/plugin.properties @@ -0,0 +1,3 @@ +# Updates missing checksums +class = ch.systemsx.cisd.etlserver.path.PathInfoDatabaseChecksumCalculationTask +execute-only-once = true diff --git a/reporting-plugins/update-experiment-metadata/update.py b/reporting-plugins/update-experiment-metadata/update.py index 5c826561..d39ba2a3 100644 --- a/reporting-plugins/update-experiment-metadata/update.py +++ b/reporting-plugins/update-experiment-metadata/update.py @@ -1,5 +1,3 @@ -import smtplib -from email.mime.text import MIMEText def process(tr, parameters, tableBuilder): """Change properties of experiment @@ -9,30 +7,9 @@ def process(tr, parameters, tableBuilder): if not user == None: tr.setUserId(user) expId = parameters.get("identifier") - exp = tr.getExperimentForUpdate(expId) + exp = tr.getExperimentForUpdate(str(expId)) properties = parameters.get("properties") for prop in properties.keySet(): exp.setPropertyValue(prop, properties.get(prop)) - - #server = "smtpserv.uni-tuebingen.de" - #fromA = "notification_service@qbis.qbic.uni-tuebingen.de" - - # TODO get emails of space users - # Get it via liferay and pass it to this service ? - #toA = "mohr@informatik.uni-tuebingen.de" - #subject = "Update information for Experiment %s" % expId - #text = "Status of Experiment %s has been updated" % expId #, properties.get("Q_CURRENT_STATUS")) - - #msg = MIMEText(text) - #msg['From'] = fromA - #msg['To'] = toA - #msg['Subject'] = subject - # check for info@qbic.uni-tuebingen.de - #msg['reply-to'] = "mohr@informatik.uni-tuebingen.de" - - #smtpServer = smtplib.SMTP(server) - #smtpServer.sendmail(fromA, toA, msg.as_string()) - #smtpServer.close() - From 46b13be52d0dfaa4f2152551d98587aa3ff201b8 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 12 Jan 2021 14:07:09 +0100 Subject: [PATCH 09/17] Hotfix/wf sample fetching (#58) * convert experiment id to string for v3 objects * adapt changelog * fetch samples by id without using search service --- .../register-16staxonomic-dropbox.py | 10 ++-------- .../register-epitopeprediction.py | 14 +++++--------- .../register-wf-hlatyping/register-hlatyping.py | 11 +++-------- .../register-individualizedproteome-dropbox.py | 14 ++++---------- .../register-ligandomicsid-dropbox.py | 11 ++--------- .../register-ligandomicsqc-dropbox.py | 10 ++-------- .../register-mapping-dropbox.py | 12 +++--------- drop-boxes/register-wf-maqc/register-wf-maqc.py | 12 +++--------- .../register-maxquant-dropbox.py | 12 +++--------- .../register-mergengsdata.py | 11 +++-------- drop-boxes/register-wf-msqc/register-wf-msqc.py | 12 +++--------- drop-boxes/register-wf-ngsqc/register-wf-ngsqc.py | 10 ++-------- drop-boxes/register-wf-peakpicking/script.py | 12 ++++-------- .../register-wf-peptideid/register-wf-peptideid.py | 14 +++----------- drop-boxes/register-wf-qedda/register-qedda.py | 9 ++------- .../register-wf-rnaexpranalysis.py | 11 +++-------- .../register-wf-shrna/register-shrna-dropbox.py | 12 +++--------- .../register-variantannotation.py | 11 +++-------- .../register-wf-variantcalling.py | 13 ++++--------- 19 files changed, 57 insertions(+), 164 deletions(-) diff --git a/drop-boxes/register-wf-16staxonomicprofiling/register-16staxonomic-dropbox.py b/drop-boxes/register-wf-16staxonomicprofiling/register-16staxonomic-dropbox.py index 182a5d7b..ac7ef035 100644 --- a/drop-boxes/register-wf-16staxonomicprofiling/register-16staxonomic-dropbox.py +++ b/drop-boxes/register-wf-16staxonomicprofiling/register-16staxonomic-dropbox.py @@ -56,18 +56,12 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) + sample = transaction.getSampleForUpdate(sample_id) experiment = transaction.getExperimentForUpdate("/"+space+"/"+project+"/"+experiment_id) experiment.setPropertyValue("Q_WF_STATUS", "FINISHED") diff --git a/drop-boxes/register-wf-epitopeprediction/register-epitopeprediction.py b/drop-boxes/register-wf-epitopeprediction/register-epitopeprediction.py index d8404ba4..8e28dfea 100644 --- a/drop-boxes/register-wf-epitopeprediction/register-epitopeprediction.py +++ b/drop-boxes/register-wf-epitopeprediction/register-epitopeprediction.py @@ -34,18 +34,14 @@ def process(transaction): project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode + if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + sample = transaction.getSampleForUpdate(sample_id) + + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-hlatyping/register-hlatyping.py b/drop-boxes/register-wf-hlatyping/register-hlatyping.py index 3d1b0036..c2e5c1aa 100644 --- a/drop-boxes/register-wf-hlatyping/register-hlatyping.py +++ b/drop-boxes/register-wf-hlatyping/register-hlatyping.py @@ -57,18 +57,13 @@ def process(transaction): project = pPattern.findall(name)[0] experiment_id = ePattern.findall(name)[0] sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-individualizedproteome/register-individualizedproteome-dropbox.py b/drop-boxes/register-wf-individualizedproteome/register-individualizedproteome-dropbox.py index 8d61a2f0..f7fbf1dd 100644 --- a/drop-boxes/register-wf-individualizedproteome/register-individualizedproteome-dropbox.py +++ b/drop-boxes/register-wf-individualizedproteome/register-individualizedproteome-dropbox.py @@ -41,20 +41,14 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode + if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + sample = transaction.getSampleForUpdate(sample_id) + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-ligandomicsid/register-ligandomicsid-dropbox.py b/drop-boxes/register-wf-ligandomicsid/register-ligandomicsid-dropbox.py index 98dc78b1..d854ce65 100644 --- a/drop-boxes/register-wf-ligandomicsid/register-ligandomicsid-dropbox.py +++ b/drop-boxes/register-wf-ligandomicsid/register-ligandomicsid-dropbox.py @@ -41,19 +41,12 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/" + space + "/" + sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - + sample = transaction.getSampleForUpdate(sample_id) experiment = transaction.getExperimentForUpdate("/"+space+"/"+project+"/"+experiment_id) experiment.setPropertyValue("Q_WF_STATUS", "FINISHED") diff --git a/drop-boxes/register-wf-ligandomicsqc/register-ligandomicsqc-dropbox.py b/drop-boxes/register-wf-ligandomicsqc/register-ligandomicsqc-dropbox.py index 016b9b08..9ffc8937 100644 --- a/drop-boxes/register-wf-ligandomicsqc/register-ligandomicsqc-dropbox.py +++ b/drop-boxes/register-wf-ligandomicsqc/register-ligandomicsqc-dropbox.py @@ -41,18 +41,12 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) + sample = transaction.getSampleForUpdate(sample_id) experiment = transaction.getExperimentForUpdate("/"+space+"/"+project+"/"+experiment_id) experiment.setPropertyValue("Q_WF_STATUS", "FINISHED") diff --git a/drop-boxes/register-wf-mapping/register-mapping-dropbox.py b/drop-boxes/register-wf-mapping/register-mapping-dropbox.py index dd87826f..4ca24383 100644 --- a/drop-boxes/register-wf-mapping/register-mapping-dropbox.py +++ b/drop-boxes/register-wf-mapping/register-mapping-dropbox.py @@ -41,22 +41,16 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) + sample = transaction.getSampleForUpdate(sample_id) additionalInfo = sample.getPropertyValue("Q_ADDITIONAL_INFO") - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-maqc/register-wf-maqc.py b/drop-boxes/register-wf-maqc/register-wf-maqc.py index b6a97400..efe32e30 100644 --- a/drop-boxes/register-wf-maqc/register-wf-maqc.py +++ b/drop-boxes/register-wf-maqc/register-wf-maqc.py @@ -41,20 +41,14 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() if len(parents) > 6: first = parents[0].split("/")[-1] parentInfos = first+"_and_"+str(len(parents)-1)+"others" diff --git a/drop-boxes/register-wf-maxquant/register-maxquant-dropbox.py b/drop-boxes/register-wf-maxquant/register-maxquant-dropbox.py index 8c1eff51..9317bde9 100644 --- a/drop-boxes/register-wf-maxquant/register-maxquant-dropbox.py +++ b/drop-boxes/register-wf-maxquant/register-maxquant-dropbox.py @@ -41,20 +41,14 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-mergenngsdata/register-mergengsdata.py b/drop-boxes/register-wf-mergenngsdata/register-mergengsdata.py index 39f10e78..394a44b7 100644 --- a/drop-boxes/register-wf-mergenngsdata/register-mergengsdata.py +++ b/drop-boxes/register-wf-mergenngsdata/register-mergengsdata.py @@ -34,18 +34,13 @@ def process(transaction): project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-msqc/register-wf-msqc.py b/drop-boxes/register-wf-msqc/register-wf-msqc.py index 76ffa18a..ed0e9055 100644 --- a/drop-boxes/register-wf-msqc/register-wf-msqc.py +++ b/drop-boxes/register-wf-msqc/register-wf-msqc.py @@ -41,20 +41,14 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-ngsqc/register-wf-ngsqc.py b/drop-boxes/register-wf-ngsqc/register-wf-ngsqc.py index 6957113a..1fb9643e 100644 --- a/drop-boxes/register-wf-ngsqc/register-wf-ngsqc.py +++ b/drop-boxes/register-wf-ngsqc/register-wf-ngsqc.py @@ -41,18 +41,12 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) + sample = transaction.getSampleForUpdate(sample_id) experiment = transaction.getExperimentForUpdate("/"+space+"/"+project+"/"+experiment_id) diff --git a/drop-boxes/register-wf-peakpicking/script.py b/drop-boxes/register-wf-peakpicking/script.py index 03e555ed..ec234c1f 100644 --- a/drop-boxes/register-wf-peakpicking/script.py +++ b/drop-boxes/register-wf-peakpicking/script.py @@ -54,16 +54,12 @@ def process(transaction): #Register logs wfSampleCode = nameSplit[-1] + + sample_id = "/"+space+"/"+wfSampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, wfSampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - wfSample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) + wfSample = transaction.getSampleForUpdate(sample_id) experiment = transaction.getExperimentForUpdate("/"+space+"/"+project+"/"+experiment_id) @@ -95,4 +91,4 @@ def process(transaction): sa = transaction.getSampleForUpdate(sampleID) dataSetRes = transaction.createNewDataSet('Q_MS_MZML_DATA') dataSetRes.setSample(sa) - transaction.moveFile(mzmlPath, dataSetRes) \ No newline at end of file + transaction.moveFile(mzmlPath, dataSetRes) diff --git a/drop-boxes/register-wf-peptideid/register-wf-peptideid.py b/drop-boxes/register-wf-peptideid/register-wf-peptideid.py index 0bcc3675..60f932bd 100644 --- a/drop-boxes/register-wf-peptideid/register-wf-peptideid.py +++ b/drop-boxes/register-wf-peptideid/register-wf-peptideid.py @@ -41,22 +41,14 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - #sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.TYPE, "Q_WF_MS_PEPTIDEID_RUN")) - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - #sample = transaction.createNewSample("/"+space+"/"+sample_id + str(len(foundSamples)+1), "Q_WF_MS_PEPTIDEID_RUN") - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-qedda/register-qedda.py b/drop-boxes/register-wf-qedda/register-qedda.py index d5eee34f..5ba864ac 100644 --- a/drop-boxes/register-wf-qedda/register-qedda.py +++ b/drop-boxes/register-wf-qedda/register-qedda.py @@ -34,16 +34,11 @@ def process(transaction): project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() - - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - sample = foundSamples[0] - sample = transaction.getSampleForUpdate(sample.getSampleIdentifier()) + sample = transaction.getSampleForUpdate(sample_id) experiment = transaction.getExperimentForUpdate("/"+space+"/"+project+"/"+experiment_id) diff --git a/drop-boxes/register-wf-rnaexpressionanalysis/register-wf-rnaexpranalysis.py b/drop-boxes/register-wf-rnaexpressionanalysis/register-wf-rnaexpranalysis.py index ee4bb2b9..2551720a 100644 --- a/drop-boxes/register-wf-rnaexpressionanalysis/register-wf-rnaexpranalysis.py +++ b/drop-boxes/register-wf-rnaexpressionanalysis/register-wf-rnaexpranalysis.py @@ -42,18 +42,13 @@ def process(transaction): project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-shrna/register-shrna-dropbox.py b/drop-boxes/register-wf-shrna/register-shrna-dropbox.py index a8d499ea..fde5f461 100644 --- a/drop-boxes/register-wf-shrna/register-shrna-dropbox.py +++ b/drop-boxes/register-wf-shrna/register-shrna-dropbox.py @@ -41,20 +41,14 @@ def process(transaction): space = nameSplit[0] project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] - #sample_id = experiment_id+'.' sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-variantannotation/register-variantannotation.py b/drop-boxes/register-wf-variantannotation/register-variantannotation.py index 55d13d0c..17d8188a 100644 --- a/drop-boxes/register-wf-variantannotation/register-variantannotation.py +++ b/drop-boxes/register-wf-variantannotation/register-variantannotation.py @@ -34,18 +34,13 @@ def process(transaction): project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() #parentcodes = [] #for parent in parents: # parentcodes.append(parent.split("/")[-1]) diff --git a/drop-boxes/register-wf-variantcalling/register-wf-variantcalling.py b/drop-boxes/register-wf-variantcalling/register-wf-variantcalling.py index 2481ff8c..557da3e0 100644 --- a/drop-boxes/register-wf-variantcalling/register-wf-variantcalling.py +++ b/drop-boxes/register-wf-variantcalling/register-wf-variantcalling.py @@ -42,18 +42,13 @@ def process(transaction): project = pPattern.findall(nameSplit[1])[0] experiment_id = ePattern.findall(nameSplit[2])[0] sampleCode = nameSplit[-1] + sample_id = "/"+space+"/"+sampleCode if not experiment_id: print "The identifier matching the pattern Q\w{4}E\[0-9]+ was not found in the fileName "+name - ss = transaction.getSearchService() + sample = transaction.getSampleForUpdate(sample_id) - sc = SearchCriteria() - sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sampleCode)) - foundSamples = ss.searchForSamples(sc) - samplehit = foundSamples[0] - sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier()) - - parents = samplehit.getParentSampleIdentifiers() + parents = sample.getParentSampleIdentifiers() parentcodes = [] for parent in parents: parentcodes.append(parent.split("/")[-1]) @@ -89,4 +84,4 @@ def process(transaction): #if os.path.isdir(incomingPath+"/result"): # transaction.moveFile(incomingPath+"/result", dataSetRes) #else: - # transaction.moveFile(incomingPath, dataSetRes) \ No newline at end of file + # transaction.moveFile(incomingPath, dataSetRes) From 9f554d76d36df65bf12fe643b497b8642cc5c40d Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Fri, 22 Jan 2021 13:44:46 +0100 Subject: [PATCH 10/17] Hotfix/retry sample tracking (#59) * retry sample tracking update for incoming data and log failure Co-authored-by: Sven F --- .../register-IdXML-dropbox.py | 15 ++++++++++++- .../register-archived-ms-raw/register-raw.py | 15 ++++++++++++- .../register-archived-ms-raw/register-raw.pyc | Bin 3074 -> 0 bytes .../register-bam-dropbox/register-bam.py | 15 ++++++++++++- .../etl_msconvert.py | 15 +++++++++++-- .../register-fasta-dropbox/register-fasta.py | 15 ++++++++++++- .../register-fastq-with-metadata-dropbox.py | 15 ++++++++++++- .../register-fastq-dropbox.py | 16 ++++++++++++- .../register-fXML-dropbox.py | 15 ++++++++++++- .../register-hlatyping.py | 15 ++++++++++++- .../register-imgag-dropbox/register-imgag.py | 18 ++++++++++++--- .../register-immunmonitoring.py | 15 ++++++++++++- .../register-cel-dropbox.py | 15 ++++++++++++- .../register-mtb-data-dropbox.py | 21 +++++++++++++----- .../register-mzml-dropbox.py | 15 ++++++++++++- .../register-nanopore.py | 15 ++++++++++++- .../register-nmr-dropbox/register-nmr.py | 15 ++++++++++++- .../register-peptidedata.py | 15 ++++++++++++- .../register-qcml-dropbox.py | 15 ++++++++++++- .../register-qpcr-dropbox.py | 16 ++++++++++++- .../register-vcf-dropbox/register-vcf.py | 16 ++++++++++++- .../register-wiff-data-dropbox.py | 15 ++++++++++++- 22 files changed, 299 insertions(+), 28 deletions(-) delete mode 100644 drop-boxes/register-archived-ms-raw/register-raw.pyc diff --git a/drop-boxes/register-IdXML-dropbox/register-IdXML-dropbox.py b/drop-boxes/register-IdXML-dropbox/register-IdXML-dropbox.py index 8fae5fe3..40f8449f 100644 --- a/drop-boxes/register-IdXML-dropbox/register-IdXML-dropbox.py +++ b/drop-boxes/register-IdXML-dropbox/register-IdXML-dropbox.py @@ -7,6 +7,7 @@ sys.path.append('/home-link/qeana10/bin/') import checksum +import time import re import os import ch.systemsx.cisd.etlserver.registrator.api.v2 @@ -103,4 +104,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + break + except: + print "Updating location for sample "+identifier+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-archived-ms-raw/register-raw.py b/drop-boxes/register-archived-ms-raw/register-raw.py index 9e89bf74..52a7f480 100644 --- a/drop-boxes/register-archived-ms-raw/register-raw.py +++ b/drop-boxes/register-archived-ms-raw/register-raw.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -104,4 +105,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) \ No newline at end of file + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-archived-ms-raw/register-raw.pyc b/drop-boxes/register-archived-ms-raw/register-raw.pyc deleted file mode 100644 index c3375c219b936d41ff02951ec9ab2bb1586c7941..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3074 zcmbVO-E$h(6+a6CAtVGswu$SsA#R(|Lj;MNnKYhEry2>>o($Mtkey(AnAPqzELwK) z-itnxr7y`JkhlCP`IqvZ_qM-t7Lbzkr4`z<_k7%Q&-dNz&!ysDzkJ?zY4MlA`|o)4 zj{qJ~AJ8H%LxDwoi>>MVwWxL z?rpLI&jE_XPGr=>;xzU{Q`g2ZYNA4;>%*vSqKEZ=MpG4@`nsuAe6C{M^c>@86UAzf zQkp?DY+_$*X43)I4RdLV{=f0p(KQ^<97PkgANb)}^R041=i`r>r+(Pnv%oWJ0LOly zQfy;^oftpRDQO#|u5w~`bQt?a#lDl$cUJW7>~zV}?|}O3)x|fT%wK-A|Jmzr{xW~* z?0elHcck1iJ)4*ukYgv%D&-Y{`W_z^xcf1>we=c2h{0BKCGj40Es%c*jW@;J(xNn~ZMQsy3@ILp!C0E@HV<3utiWY1Yn+}1a(Y=5X6r-7@&lyo^)4UcCC?Fr*VOjH4dAX zrMvSSnGH&EjK|;e!ds*IJTI#pR_hd&$P>KgtyAqEkJyx=s4I}!lm#?w(0Gf{af!y; zvM6(Tvu0R;_DTvg-eEbmw@67*&L3Ar!R89z6?{t^zC{W2LmU&BPxNkcoM6KuYwzCl&)bBLh`qNZLh$Aejek!GLM`DNN*7p6{R6>z z@e!KeBd^5Byhcfl#&t69r#v9{$wNFLn?Y)T|B;n0H~xtd6d2+KrgTvS)!9LIiR@O- z{%UaeINcH$DoJ8`m>kZ z_?QK+JPZO%g$MI`6xUY`%7;H2w7Y|L>!5pLx7w{v4-JlqC9owu+U`nMi=2JZF{R}u z`{2dk=%9CCZr!9jZ*`8I*n{3zPg~NYEY*9|vfEGg{tP*D2Vkg-EsY*K#*f0oC^YKQ z7&Zs2rZxN!CY#b^mUg00p%+D{<4n@^ zA|OJ4J)u^Jt^5zxZIsOt}@k|LfCvV#dV}4 zECHg%6EXJX{xW%pI#XO;p5Xxr71!Jw>iHu11+RBCRT9$xIpEl86N_=FvmJGDtw#=C z(NojWjM#%jIx!Ud+8r$w1ojOjocWcKBV4atWkfT?+RD+hSb0pgxg8e4Gf!h&%(1z8 z?gTT1Su1BQEZ~q$qH`rTE8hZ=JQF*ic3iGGDFt^?cv}$g7X`K|s55mX`3g})HiuHS zOyg*bD@RIuD*hcto-$l5X+Gtf097Qtqg}(g-$b*}OD)sV0O`(OM@BA8Em`Ig)Q+5} zE~k|IgLTDi9`DY5g9Sy2pB~YPOmwTU*w~S~atacM~ z8*!SHNSl3-+)+bAR!#I}!}Yb-PzIkW{Ds#@KQ=I&qPXEq{l@tN+3$|zw6SyUH2lbh z4l=g^qId`~2V=E?AvK9Y&20I)d+^^5T6|A7hAKqz+=d>Zki7Jyj~kweWCGdTd2%3> zPVt3k-vvR&mi+}**Xfbf|MvOB{qgxE@@9eh8~2pvmdztA6{IBnaZ2#Zl`|WK{{Yj* B(J%l2 diff --git a/drop-boxes/register-bam-dropbox/register-bam.py b/drop-boxes/register-bam-dropbox/register-bam.py index 186c5004..20a21ad5 100644 --- a/drop-boxes/register-bam-dropbox/register-bam.py +++ b/drop-boxes/register-bam-dropbox/register-bam.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -136,4 +137,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-convert-ms-vendor-format/etl_msconvert.py b/drop-boxes/register-convert-ms-vendor-format/etl_msconvert.py index dcb2d9f0..13dd049f 100644 --- a/drop-boxes/register-convert-ms-vendor-format/etl_msconvert.py +++ b/drop-boxes/register-convert-ms-vendor-format/etl_msconvert.py @@ -484,8 +484,19 @@ def __str__(self): return self.value def handleSampleTracking(barcode): - #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(barcode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(barcode) + break + except: + print "Updating location for sample "+barcode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise def createRawDataSet(transaction, incomingPath, sample, format, time_stamp): rawDataSet = transaction.createNewDataSet("Q_MS_RAW_DATA") diff --git a/drop-boxes/register-fasta-dropbox/register-fasta.py b/drop-boxes/register-fasta-dropbox/register-fasta.py index aaab99a4..2fc5e52a 100644 --- a/drop-boxes/register-fasta-dropbox/register-fasta.py +++ b/drop-boxes/register-fasta-dropbox/register-fasta.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -152,4 +153,16 @@ def process(transaction): transaction.moveFile(new_folder, dataSet) # Updates the sample location of the extract sample - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + break + except: + print "Updating location for sample "+identifier+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-fastq-dropbox-with-metadata/register-fastq-with-metadata-dropbox.py b/drop-boxes/register-fastq-dropbox-with-metadata/register-fastq-with-metadata-dropbox.py index 6fae6beb..885f1c91 100644 --- a/drop-boxes/register-fastq-dropbox-with-metadata/register-fastq-with-metadata-dropbox.py +++ b/drop-boxes/register-fastq-dropbox-with-metadata/register-fastq-with-metadata-dropbox.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -167,4 +168,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + break + except: + print "Updating location for sample "+identifier+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-fastq-dropbox/register-fastq-dropbox.py b/drop-boxes/register-fastq-dropbox/register-fastq-dropbox.py index 34d5601f..d89ffec2 100644 --- a/drop-boxes/register-fastq-dropbox/register-fastq-dropbox.py +++ b/drop-boxes/register-fastq-dropbox/register-fastq-dropbox.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -128,4 +129,17 @@ def process(transaction): nameFile.close() transaction.moveFile(incomingPath, dataSet) - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + # sample tracking + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + break + except: + print "Updating location for sample "+identifier+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-featureXML-dropbox/register-fXML-dropbox.py b/drop-boxes/register-featureXML-dropbox/register-fXML-dropbox.py index 6c71ba7e..46425e0f 100644 --- a/drop-boxes/register-featureXML-dropbox/register-fXML-dropbox.py +++ b/drop-boxes/register-featureXML-dropbox/register-fXML-dropbox.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -103,4 +104,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(identifier) + break + except: + print "Updating location for sample "+identifier+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-hlatyping-dropbox/register-hlatyping.py b/drop-boxes/register-hlatyping-dropbox/register-hlatyping.py index b6fa1ad6..1f213fb6 100644 --- a/drop-boxes/register-hlatyping-dropbox/register-hlatyping.py +++ b/drop-boxes/register-hlatyping-dropbox/register-hlatyping.py @@ -7,6 +7,7 @@ sys.path.append('/home-link/qeana10/bin/') import checksum +import time import re import os import ch.systemsx.cisd.etlserver.registrator.api.v2 @@ -149,4 +150,16 @@ def process(transaction): transaction.moveFile(resultPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-imgag-dropbox/register-imgag.py b/drop-boxes/register-imgag-dropbox/register-imgag.py index 02896d4f..6aa33d3f 100644 --- a/drop-boxes/register-imgag-dropbox/register-imgag.py +++ b/drop-boxes/register-imgag-dropbox/register-imgag.py @@ -664,7 +664,19 @@ def process(transaction): transaction.moveFile(vcfFolder, vcfDataSet) else: - find_and_register_ngs_without_metadata(transaction, parentCodes) + find_and_register_ngs_without_metadata(transaction, parentCodes) for code in parentCodes: - #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(code) + #sample tracking section + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(code) + break + except: + print "Updating location for sample "+code+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-immunmonitoring-dropbox/register-immunmonitoring.py b/drop-boxes/register-immunmonitoring-dropbox/register-immunmonitoring.py index f4242682..c5757c59 100644 --- a/drop-boxes/register-immunmonitoring-dropbox/register-immunmonitoring.py +++ b/drop-boxes/register-immunmonitoring-dropbox/register-immunmonitoring.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -103,4 +104,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-microarrays-dropbox/register-cel-dropbox.py b/drop-boxes/register-microarrays-dropbox/register-cel-dropbox.py index 0c3260db..1da34e40 100644 --- a/drop-boxes/register-microarrays-dropbox/register-cel-dropbox.py +++ b/drop-boxes/register-microarrays-dropbox/register-cel-dropbox.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -192,4 +193,16 @@ def process(transaction): os.remove(os.path.realpath(os.path.join(incomingPath,f))) #sample tracking section for code in trackingCodes: - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(code) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(code) + break + except: + print "Updating location for sample "+code+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-mtb-data-dropbox/register-mtb-data-dropbox.py b/drop-boxes/register-mtb-data-dropbox/register-mtb-data-dropbox.py index 43372890..08092b6c 100644 --- a/drop-boxes/register-mtb-data-dropbox/register-mtb-data-dropbox.py +++ b/drop-boxes/register-mtb-data-dropbox/register-mtb-data-dropbox.py @@ -164,11 +164,22 @@ ############################################################################# def update_sample_location_to_qbic(sampleId): - """Calls the sample status service and updates the - location to QBiC and the status 'DATA AT QBiC'. - """ - # Update sample location - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(sampleId) + """Calls the sample status service and updates the + location to QBiC and the status 'DATA AT QBiC'. + """ + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(sampleId) + break + except: + print("Updating location for sample " + sampleId + " failed on attempt "+str(attempt+1)) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise def process(transaction): diff --git a/drop-boxes/register-mzML-dropbox/register-mzml-dropbox.py b/drop-boxes/register-mzML-dropbox/register-mzml-dropbox.py index cd4511ce..a9ae0f71 100644 --- a/drop-boxes/register-mzML-dropbox/register-mzml-dropbox.py +++ b/drop-boxes/register-mzML-dropbox/register-mzml-dropbox.py @@ -9,6 +9,7 @@ import checksum import re import os +import time import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File from org.apache.commons.io import FileUtils @@ -107,4 +108,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) \ No newline at end of file + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise \ No newline at end of file diff --git a/drop-boxes/register-nanopore-dropbox/register-nanopore.py b/drop-boxes/register-nanopore-dropbox/register-nanopore.py index 7ae690ac..059c4fdd 100644 --- a/drop-boxes/register-nanopore-dropbox/register-nanopore.py +++ b/drop-boxes/register-nanopore-dropbox/register-nanopore.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import shutil from datetime import datetime @@ -291,7 +292,19 @@ def createSampleWithData(transaction, space, parentSampleCode, mapWithDataForSam transaction.moveFile(absLogPath, logDataSet) # Updates the sample location of the measured sample - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentSampleCode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentSampleCode) + break + except: + print "Updating location for sample "+parentSampleCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise def process(transaction): """Main ETL routine entry point""" diff --git a/drop-boxes/register-nmr-dropbox/register-nmr.py b/drop-boxes/register-nmr-dropbox/register-nmr.py index 262ed122..48192c59 100644 --- a/drop-boxes/register-nmr-dropbox/register-nmr.py +++ b/drop-boxes/register-nmr-dropbox/register-nmr.py @@ -7,6 +7,7 @@ sys.path.append('/home-link/qeana10/bin/') import checksum +import time import re import string import os @@ -145,4 +146,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-peptidedata-dropbox/register-peptidedata.py b/drop-boxes/register-peptidedata-dropbox/register-peptidedata.py index ed881290..5c0867c5 100644 --- a/drop-boxes/register-peptidedata-dropbox/register-peptidedata.py +++ b/drop-boxes/register-peptidedata-dropbox/register-peptidedata.py @@ -9,6 +9,7 @@ sys.path.append('/home-link/qeana10/bin/') import checksum +import time import re import os import ch.systemsx.cisd.etlserver.registrator.api.v2 @@ -86,4 +87,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-qcml-dropbox/register-qcml-dropbox.py b/drop-boxes/register-qcml-dropbox/register-qcml-dropbox.py index 7f55f406..f784f479 100644 --- a/drop-boxes/register-qcml-dropbox/register-qcml-dropbox.py +++ b/drop-boxes/register-qcml-dropbox/register-qcml-dropbox.py @@ -7,6 +7,7 @@ sys.path.append('/home-link/qeana10/bin/') import checksum +import time import re import os import ch.systemsx.cisd.etlserver.registrator.api.v2 @@ -104,4 +105,16 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise diff --git a/drop-boxes/register-qpcr-dropbox/register-qpcr-dropbox.py b/drop-boxes/register-qpcr-dropbox/register-qpcr-dropbox.py index daea723e..c72e9971 100644 --- a/drop-boxes/register-qpcr-dropbox/register-qpcr-dropbox.py +++ b/drop-boxes/register-qpcr-dropbox/register-qpcr-dropbox.py @@ -7,6 +7,7 @@ sys.path.append('/home-link/qeana10/bin/') import checksum +import time import re import os import ch.systemsx.cisd.etlserver.registrator.api.v2 @@ -103,4 +104,17 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) \ No newline at end of file + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise + diff --git a/drop-boxes/register-vcf-dropbox/register-vcf.py b/drop-boxes/register-vcf-dropbox/register-vcf.py index d3c3f21e..5f8aeed3 100644 --- a/drop-boxes/register-vcf-dropbox/register-vcf.py +++ b/drop-boxes/register-vcf-dropbox/register-vcf.py @@ -8,6 +8,7 @@ import checksum import re +import time import os import ch.systemsx.cisd.etlserver.registrator.api.v2 from java.io import File @@ -163,4 +164,17 @@ def process(transaction): transaction.moveFile(incomingPath, dataSet) #sample tracking section - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(parentCode) + break + except: + print "Updating location for sample "+parentCode+" failed on attempt "+str(attempt+1) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise + diff --git a/drop-boxes/register-wiff-dropbox/register-wiff-data-dropbox.py b/drop-boxes/register-wiff-dropbox/register-wiff-data-dropbox.py index 1a1ff5b0..57830bb5 100644 --- a/drop-boxes/register-wiff-dropbox/register-wiff-data-dropbox.py +++ b/drop-boxes/register-wiff-dropbox/register-wiff-data-dropbox.py @@ -1,6 +1,7 @@ from __future__ import print_function import os +import time import re import sys @@ -107,7 +108,19 @@ def register_wiff_pairs(transaction, wiff_pairs, qbic_id): transaction.moveFile(registration_dir, data_set) # Update sample location - SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(qbic_id) + wait_seconds = 1 + max_attempts = 3 + for attempt in range(max_attempts): + try: + SAMPLE_TRACKER.updateSampleLocationToCurrentLocation(qbic_id) + break + except: + print("Updating location for sample "+qbic_id+" failed on attempt "+str(attempt+1)) + if attempt < max_attempts -1: + time.sleep(wait_seconds) + continue + else: + raise def space_and_project(transaction, qbiccode): From 9e01d1a8e9fce854255924d7c1e2a1898c1ed44b Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Fri, 22 Jan 2021 15:59:10 +0100 Subject: [PATCH 11/17] Update CHANGELOG.md --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 32e81588..ea336184 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ # Changelog +## 1.6.0 2021-01-22 + +* Fix for workflow result registration: fetch sample by identifier instead of search for robustness against indexing problems +* Retry sample tracking updates twice and log failures that occur + ## 1.5.0 2020-11-03 * New maintenance task: update missing checksum once, after dss starts. From 893b0d5e9ba890156f2f86cf0d84662f977a4ff2 Mon Sep 17 00:00:00 2001 From: Sven F Date: Thu, 25 Feb 2021 10:59:27 +0100 Subject: [PATCH 12/17] Correct fastq dropbox docs (#69) --- README.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a34ef4d3..e9f29324 100644 --- a/README.md +++ b/README.md @@ -47,8 +47,17 @@ Formats: [QBiC-register-fastq-dropbox](drop-boxes/register-fastq-dropbox) **Resulting data model in openBIS** -Q_TEST_SAMPLE -> Q_NGS_RAW_DATA (with sample code) -> DataSet (directory -with files contained) +Q_TEST_SAMPLE -> Q_NGS_SINGLE_SAMPLE_RUN (with sample code) -> DataSet +of type Q_NGS_RAW_DATA (directory with files contained) + +Example sample ids are: + +QABCD001AE (Analyte, Q_TEST_SAMPLE) +NGSQABCD001AE (Sequencing result, Q_SINGLE_SAMPLE_RUN) + +If several runs are submitted with the same analyte id, then no new id +for the run is generated, but a new dataset attached to the existing +sequencing result id. **Description** For paired-end sequencing reads in FASTQ format, the file structure From fb8b5a5ba7175bdeb3294c533d3b54c73c98ff03 Mon Sep 17 00:00:00 2001 From: Sven F Date: Fri, 26 Feb 2021 10:22:24 +0100 Subject: [PATCH 13/17] Add documentation for NGS data with metadata (#68) --- README.md | 56 +++++++++++++++++++++ drop-boxes/register-imgag-dropbox/README.md | 42 ---------------- 2 files changed, 56 insertions(+), 42 deletions(-) delete mode 100644 drop-boxes/register-imgag-dropbox/README.md diff --git a/README.md b/README.md index e9f29324..be2c0125 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ openBIS. Formats: - [NGS single-end / paired-end data](#ngs-single-end--paired-end-data) +- [NGS single-end / paired-end data with metadata (deprecated)](#ngs-single-end--paired-end-data-with-metadata-(deprecated)) ### NGS single-end / paired-end data @@ -90,4 +91,59 @@ look like this: |-- .fastq.gz.sha256sum ``` +### NGS single-end / paired-end data with metadata (deprecated) +**Disclaimer!** +This data format is targeted for a single use case and should not be +used for general data registration purposes. Please use the +[NGS single-end / paired-end data](#ngs-single-end--paired-end-data) +format for now. + +**Responsible dropbox:** +[QBiC-register-imgag-dropbox](drop-boxes/register-imgag-dropbox) + +**Resulting data model in openBIS** +Q_TEST_SAMPLE -> Q_NGS_SINGLE_SAMPLE_RUN (with sample code) -> DataSet +of type Q_NGS_RAW_DATA (directory with raw sequencing files contained) + +Example sample ids: + +QABCD001AE (Analyte, Q_TEST_SAMPLE) +NGS[0-9]{2}QABCS001AE (Sequencing Result, Q_NGS_SINGLE_SAMPLE_RUN) where +the running two-digit number is taken from the identifier suffix from +the `genetics_id` in the metadata file. + +**Description** +For paired-end sequencing reads in FASTQ format, the file structure +needs to look like this + +``` + // Directory + |-- file1.fastq.gz + |-- file2.fastq.gz + |-- metadata + |- ... +``` + +**Expected metadata** +Additional metadata is required in this format case and expected to be +noted in JSON in a file called `metadata` and following the +[upload metadata schema](drop-boxes/register-imgag-dropbox/upload-metadata.schema.json). +A valid JSON object can look like this: + +``` +{ + "files": [ + "reads.1.fastq.gz", + "reads.2.fastq.gz" + ], + "type": "dna_seq", + "sample1": { + "genome": "GRCh37", + "id_genetics": "GS000000_01", + "id_qbic": "QTEST002AE", + "processing_system": "Test system", + "tumor": "no" + } +} +``` \ No newline at end of file diff --git a/drop-boxes/register-imgag-dropbox/README.md b/drop-boxes/register-imgag-dropbox/README.md deleted file mode 100644 index eab6c82f..00000000 --- a/drop-boxes/register-imgag-dropbox/README.md +++ /dev/null @@ -1,42 +0,0 @@ -# IMGAG dropbox - -## Expected data structure -The data structure needs to be a root folder, containing a file `metadata` following the [upload metadata schema](upload-metadata.schema.json). In addition, the folder shall contain files of type `fastq/fastq.gz` and/or `vcf/vcf.gz` and/or `GSvar/GSvar.gz`. - -Incoming structure overview: - -``` -|-QTEST001AE (top level folder name) - | - |- file1.fastq.gz - |- file2.fastq.gz - |- metadata - |- ... - -``` - -openBIS structure overview: - -TODO: ER model. - -## Expected metadata -Metadata is expected to be noted in JSON and following the [upload metadata schema](upload-metadata.schema.json). An example JSON entry can look like this: - -``` -{ - "files": [ - "reads.1.fastq.gz", - "reads.2.fastq.gz" - ], - "type": "dna_seq", - "sample1": { - "genome": "GRCh37", - "id_genetics": "GS000000_01", - "id_qbic": "QTEST002AE", - "processing_system": "Test system", - "tumor": "no" - } -} -``` - -The sample code for `id_qbic` can be of type `Q_TEST_SAMPLE` or `Q_BIOLOGICAL_SAMPLE`. In the latter case, a new sample of type `Q_TEST_SAMPLE` is created and attached as child to the biological sample. The data-set will be registered under this test sample then. \ No newline at end of file From f0ba26094f9038e30dbde986a50098d68da4bdf1 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Fri, 26 Feb 2021 15:57:16 +0100 Subject: [PATCH 14/17] Documentation/attachments (#65) * add readme * add regex description * include attachment readme in main readme Co-authored-by: Sven F --- README.md | 38 ++++++++++++++++++- .../register-attachment-dropbox.py | 12 ------ 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index be2c0125..abaa06e3 100644 --- a/README.md +++ b/README.md @@ -146,4 +146,40 @@ A valid JSON object can look like this: "tumor": "no" } } -``` \ No newline at end of file +``` + +### Attachment Data + +**Responsible dropbox:** +[QBiC-register-exp-proj-attachment](drop-boxes/register-attachments-dropbox) + +**openBIS structure:** + +Attachments are attached to the Q_PROJECT_DETAILS experiment type and its sample type Q_ATTACHMENT_SAMPLE. + +**Expected data structure** +The data structure needs to be a root folder, containing a file `metadata.txt`. + +Incoming structure overview: + +``` +|- (top level folder name, normally a time stamp of upload time) + | + |- metadata.txt +``` + +**Expected metadata** +Metadata is expected to be denoted in line-separated key-value pairs, where key and value are separated by a '='. The following structure/pairs are expected: + +``` +user= +info= +barcode= +type= +``` +The code of the attachment sample is built from the project code followed by three zeroes, conforming to the regular expression "Q[A-Z0-9]{4}000", e.g. QABCD000. + +See code examples: +https://github.com/qbicsoftware/attachi-cli/blob/master/attachi/attachi.py#L63 +https://github.com/qbicsoftware/projectwizard-portlet/blob/9c86f500b26af4cf2613cfae32e470bf5d50bf78/src/main/java/life/qbic/projectwizard/io/AttachmentMover.java#L145 + diff --git a/drop-boxes/register-attachments-dropbox/register-attachment-dropbox.py b/drop-boxes/register-attachments-dropbox/register-attachment-dropbox.py index cd82f564..27abd34c 100755 --- a/drop-boxes/register-attachments-dropbox/register-attachment-dropbox.py +++ b/drop-boxes/register-attachments-dropbox/register-attachment-dropbox.py @@ -92,18 +92,6 @@ def process(transaction): sa.setExperiment(exp) info = None - #if isProject: - #experiments = search_service.listExperiments("/" + space + "/" + project) - #for e in experiments: - # if project+"_INFO" in e.getExperimentIdentifier(): - # info = e - #if not info: - # info = transaction.createNewExperiment('/' + space + '/' + project + '/'+ project+'_INFO', "Q_PROJECT_DETAILS") - #else: - # info = transaction.getExperiment('/' + space + '/' + project + '/' + code) - # register new experiment and sample - #sa.setExperiment(info) - # create new dataset dataSet = transaction.createNewDataSet("Q_PROJECT_DATA") dataSet.setMeasuredData(False) dataSet.setPropertyValue("Q_SECONDARY_NAME", secname) From 414073bb6fe484317c39d5e0cc4138d43ca5a3d5 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Fri, 26 Feb 2021 16:37:52 +0100 Subject: [PATCH 15/17] Documentation/convert ms (#72) * add readme for conversion and registration of mass spec data Co-authored-by: jnnfr * move readme to the top folder * fix formatting Co-authored-by: Sven F Co-authored-by: jnnfr --- README.md | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index abaa06e3..21e9f043 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,9 @@ openBIS. Formats: - [NGS single-end / paired-end data](#ngs-single-end--paired-end-data) -- [NGS single-end / paired-end data with metadata (deprecated)](#ngs-single-end--paired-end-data-with-metadata-(deprecated)) +- [NGS single-end / paired-end data with metadata (deprecated)](#ngs-single-end--paired-end-data-with-metadata) +- [Attachment Data](#attachment-data) +- [Mass Spectrometry mzML conversion and registration](#mass-spectrometry-mzml-conversion-and-registration) ### NGS single-end / paired-end data @@ -91,7 +93,9 @@ look like this: |-- .fastq.gz.sha256sum ``` -### NGS single-end / paired-end data with metadata (deprecated) + +### NGS single-end / paired-end data with metadata +(deprecated) **Disclaimer!** This data format is targeted for a single use case and should not be @@ -183,3 +187,65 @@ See code examples: https://github.com/qbicsoftware/attachi-cli/blob/master/attachi/attachi.py#L63 https://github.com/qbicsoftware/projectwizard-portlet/blob/9c86f500b26af4cf2613cfae32e470bf5d50bf78/src/main/java/life/qbic/projectwizard/io/AttachmentMover.java#L145 + +### Mass Spectrometry mzML conversion and registration + +**Responsible dropbox:** +[QBiC-convert-register-ms-vendor-format](drop-boxes/register-convert-ms-vendor-format) + +**Resulting data model in openBIS** +...Q_TEST_SAMPLE (-> Q_MHC_LIGAND_EXTRACT (Immunomics case)) -> Q_MS_RUN per data file --> 2 DataSets per data file, one for raw data, one converted to mzML + +**Expected data structure** +In every use case, the data structure needs to contain a top folder around the respective data in order to accommodate metadata files. + +The sample code found in the top folder can be of type `Q_TEST_SAMPLE` or `Q_MS_RUN`. In the former case, a new sample of type `Q_MS_RUN` is created and attached as child to the test sample. + +**Valid folder/file types**: +- Thermo Fisher Raw file format +- Waters Raw folder +- Bruker .d folder + +**Incoming structure overview for standard case without additional metadata file:** +``` +QABCD102A5_20201229145526_20201014_CO_0976StSi_R05_.raw +|-- QABCD102A5_20201229145526_20201014_CO_0976StSi_R05_.raw +|-- QABCD102A5_20201229145526_20201014_CO_0976StSi_R05_.raw.sha256sum +``` +In this case, existing mass spectrometry metadata is expected to be already stored and the dataset will be attached. + + +**Incoming structure overview for the use case of Immunomics data with metadata file:** +``` +QABCD090B7 +|-- QABCD090B7 +| |-- file1.raw +| |-- file2.raw +| |-- file3.raw +| `-- metadata.tsv +|-- QABCD090B7.sha256sum +`-- source_dropbox.txt +``` +The source_dropbox.txt currently has to indicate the source as one of the Immunomics data sources. + +The `metadata.tsv` columns for the Immunomics case are tab-separated: +``` +Filename Q_MS_DEVICE Q_MEASUREMENT_FINISH_DATE Q_EXTRACT_SHARE Q_ADDITIONAL_INFO Q_MS_LCMS_METHODS technical_replicate workflow_type +file1.raw THERMO_QEXACTIVE 171010 10 QEX_TOP07_470MIN DDA_Rep1 DDA +``` + +Filename - one of the (e.g. raw) file names found in the incoming structure + +Q_MS_DEVICE - openBIS code from the vocabulary of Mass Spectrometry devices + +Q_MEASUREMENT_FINISH_DATE - Date in YYMMDD format (ISO 8601:2000) + +Q_EXTRACT_SHARE - the extract share + +Q_ADDITIONAL_INFO - any optional comments + +Q_MS_LCMS_METHODS - openBIS code from the vocabulary of LCMS methods + +technical_replicate - free text to denote replicates + +workflow_type - DDA or DIA From 67552ae4917a15221ca790fbd618c50821de7659 Mon Sep 17 00:00:00 2001 From: jnnfr Date: Tue, 2 Mar 2021 14:33:21 +0100 Subject: [PATCH 16/17] Add description for HLA typing data (#70) * add HLA typing description to README * remove faulty description from script Co-authored-by: wow-such-code --- CHANGELOG.md | 1 + README.md | 30 +++++++++++++++++++ .../register-hlatyping.py | 2 +- 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea336184..edb18c36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ environment for the proper setup for the register-omero-metadata dropbox * Register unclassified pooling data of Nanopore experiments directly at the experiment level (no copies are added to sample-based datasets) +* Add description for data of register-hlatyping-dropbox ## 1.3.1 diff --git a/README.md b/README.md index 21e9f043..a6e88590 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ openBIS. Formats: - [NGS single-end / paired-end data](#ngs-single-end--paired-end-data) +- [HLA Typing data](#hla-typing-data) - [NGS single-end / paired-end data with metadata (deprecated)](#ngs-single-end--paired-end-data-with-metadata) - [Attachment Data](#attachment-data) - [Mass Spectrometry mzML conversion and registration](#mass-spectrometry-mzml-conversion-and-registration) @@ -93,6 +94,35 @@ look like this: |-- .fastq.gz.sha256sum ``` +### HLA Typing data +**Responsible dropbox:** +[QBiC-register-hlatyping-dropbox](drop-boxes/register-hlatyping-dropbox) + +**Resulting data model in openBIS** +Q_TEST_SAMPLE -> Q_NGS_HLATYPING (with sample code) -> DataSet (directory +with files contained) + +or + +Q_TEST_SAMPLE -> Q_NGS_SINGLE_SAMPLE_RUN (provided sample code) -> Q_NGS_HLATYPING -> DataSet (directory +with files contained) + +Example sample ids are: +QABCD001AE (Analyte, Q_TEST_SAMPLE) +HLA1QABCD001AE (HLA-Typing result, Q_NGS_HLATYPING) for HLA MHC class I +or +HLA2QABCD001AE (HLA-Typing result, Q_NGS_HLATYPING) for HLA MHC class II + + +**Description** +For HLA typing data in VCF format, the file structure +needs to look like this: + +``` + // Directory + |-- .txt + |-- .txt.sha256sum +``` ### NGS single-end / paired-end data with metadata (deprecated) diff --git a/drop-boxes/register-hlatyping-dropbox/register-hlatyping.py b/drop-boxes/register-hlatyping-dropbox/register-hlatyping.py index 1f213fb6..614f28c8 100644 --- a/drop-boxes/register-hlatyping-dropbox/register-hlatyping.py +++ b/drop-boxes/register-hlatyping-dropbox/register-hlatyping.py @@ -31,7 +31,7 @@ ### We need this object to update the sample location later SAMPLE_TRACKER = SampleTracker.createQBiCSampleTracker(SERVICE_REGISTRY_URL, SERVICE_CREDENTIALS, QBIC_LOCATION) -# ETL script for registration of VCF files +# ETL script for registration of HLA Typing # expected: # *Q[Project Code]^4[Sample No.]^3[Sample Type][Checksum]*.* pattern = re.compile('Q\w{4}[0-9]{3}[a-zA-Z]\w') From d4eb322a504be05fd4c76a647af58991436d9bc2 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Fri, 19 Mar 2021 11:45:10 +0100 Subject: [PATCH 17/17] Release/1.7.0 (#78) * Adds completed version of Omero imaging data registration script: * General metadata is stored in openBIS (imaging experiments and samples) * Data and additional metadata key-value-pairs are stored in OMERO * Add readme for Omero etl script * Added some comments to code * Minor code cleanup Co-authored-by: Sven F Co-authored-by: luiskuhn <38211686+luiskuhn@users.noreply.github.com> Co-authored-by: luiskuhn Co-authored-by: Tobias Koch --- CHANGELOG.md | 5 + README.md | 54 ++++ .../register-attachment-dropbox.py | 8 +- .../backendinterface.py | 72 ++++-- .../image_registration_process.py | 89 ++++++- .../register-omero-metadata/omero_54_env.yml | 39 +++ .../register-omero-metadata/register-omero.py | 230 +++++++++++++----- 7 files changed, 404 insertions(+), 93 deletions(-) create mode 100644 drop-boxes/register-omero-metadata/omero_54_env.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index edb18c36..7039bf9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## 1.7.0 2021-03-19 + +* Provides fully tested functionality to register generic imaging data, with OMERO server support (v5.4.10). [Link to PR](https://github.com/qbicsoftware/etl-scripts/pull/78) +* Uses an omero-importer-cli (with Bio-formats) for image file registration into an OMERO server instance +* Uses an initial version of the openBIS-OMERO metadata model ## 1.6.0 2021-01-22 diff --git a/README.md b/README.md index a6e88590..860ad623 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ Formats: - [NGS single-end / paired-end data with metadata (deprecated)](#ngs-single-end--paired-end-data-with-metadata) - [Attachment Data](#attachment-data) - [Mass Spectrometry mzML conversion and registration](#mass-spectrometry-mzml-conversion-and-registration) +- [Imaging data with an OMERO server instance](#imaging-data-with-an-omero-server-instance) ### NGS single-end / paired-end data @@ -279,3 +280,56 @@ Q_MS_LCMS_METHODS - openBIS code from the vocabulary of LCMS methods technical_replicate - free text to denote replicates workflow_type - DDA or DIA + + +### Imaging data with an OMERO server instance + +**Responsible dropbox:** +[QBiC-register-omero-metadata](drop-boxes/register-omero-metadata) + +**Resulting data model in openBIS** +For each tissue sample multiple images (the data files) can be created, so multiple Q_BMI_GENERIC_IMAGING_RUN samples are created and attached to that tissue sample +...Q_BIOLOGICAL_SAMPLE -> one Q_BMI_GENERIC_IMAGING_RUN per data file + +**Expected data structure** +In every use case, the data structure needs to contain a top folder around the respective data in order to accommodate metadata files. + +The sample code found in the top folder is of type `Q_BIOLOGICAL_SAMPLE` (tissue imaging). + +**Valid file types**: +Valid files in the folder are any imaging files that can be handled by the OMERO server + +**Incoming structure overview:** +``` +QABCD002A8 +|-- QABCD002A8 +| |-- Est-B1a.lif +| |-- Image_1.czi +| |-- Image_2.czi +| |-- Image7246.tif +| |-- metadata_3.tsv +| |-- rubisco_avg.mrc +| `-- tomogram_x.mrc +|-- QABCD002A8.sha256sum +`-- source_dropbox.txt +``` + +The metadata file, ending in `.tsv` has tab-separated columns: +``` +IMAGE_FILE_NAME IMAGING_MODALITY IMAGED_TISSUE INSTRUMENT_MANUFACTURER INSTRUMENT_USER IMAGING_DATE +tomogram_x.mrc NCIT_C18113 cell FEI Dr. Horrible 01.03.2021 +rubisco_avg.mrc NCIT_C18113 cell FEI Max Mustermann 01.04.2021 +Image7246.tif NCIT_C18216 leaf Zeiss Max Mustermann 23.02.2021 +Est-B1a.lif NCIT_C17753 root Zeiss Max Mustermann 01.02.2021 +Image_1.czi NCIT_C17753 leaf Zeiss Max Mustermann 11.02.2021 +Image_2.czi NCIT_C17753 leaf Zeiss Max Mustermann 01.02.2021 +``` + +column name | description +--------------|---------------- +`IMAGE_FILE_NAME`| one of the file names found in the incoming folder per line +`IMAGING_MODALITY`| Ontology Identifier for the imaging modality, currently from the [NCI Thesaurus](https://ncit.nci.nih.gov/ncitbrowser/pages/home.jsf?version=21.02d). **Examples:** NCIT_C18113 (Cryo-Electron Microscopy), NCIT_C18216 (Transmission Electron Microscopy), NCIT_C17753 (Confocal Microscopy) +`IMAGED_TISSUE` | the imaged tissue +`INSTRUMENT_MANUFACTURER` | the imaging instrument manufacturer +`INSTRUMENT_USER` | the person who measured the data file using the imaging instrument +`IMAGING_DATE` | the date of the measurement in dd.mm.yyyy format (days and months with leading zeroes) diff --git a/drop-boxes/register-attachments-dropbox/register-attachment-dropbox.py b/drop-boxes/register-attachments-dropbox/register-attachment-dropbox.py index 27abd34c..ae4615b2 100755 --- a/drop-boxes/register-attachments-dropbox/register-attachment-dropbox.py +++ b/drop-boxes/register-attachments-dropbox/register-attachment-dropbox.py @@ -84,10 +84,10 @@ def process(transaction): sa = transaction.getSampleForUpdate(sampleID) space = sa.getSpace() if not attachmentReady: - expID = '/' + space + '/' + project + '/'+ project+'_INFO' - exp = transaction.getExperimentForUpdate(expID) - if not exp: - exp = transaction.createNewExperiment(expID, "Q_PROJECT_DETAILS") + infoSampleID = "/"+space+"/"+code + sa = transaction.getSampleForUpdate(infoSampleID) + if not sa: + exp = transaction.createNewExperiment('/' + space + '/' + project + '/'+ project+'_INFO', "Q_PROJECT_DETAILS") sa = transaction.createNewSample('/' + space + '/'+ code, "Q_ATTACHMENT_SAMPLE") sa.setExperiment(exp) info = None diff --git a/drop-boxes/register-omero-metadata/backendinterface.py b/drop-boxes/register-omero-metadata/backendinterface.py index 1bed5b30..0e2b0b11 100644 --- a/drop-boxes/register-omero-metadata/backendinterface.py +++ b/drop-boxes/register-omero-metadata/backendinterface.py @@ -14,7 +14,6 @@ """ - def omero_connect(usr, pwd, host, port): """ Connects to the OMERO Server with the provided username and password. @@ -182,9 +181,7 @@ def register_image_file_with_dataset_id(file_path, dataset_id, usr, pwd, host, p ds_id = dataset_id if ds_id != -1: - cmd = "omero-importer -s " + host + " -p " + str(port) + " -u " + usr + " -w " + pwd + " -d " + str(int(ds_id)) + " " + file_path - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, @@ -193,17 +190,20 @@ def register_image_file_with_dataset_id(file_path, dataset_id, usr, pwd, host, p std_out, std_err = proc.communicate() - if int(proc.returncode) == 0: - - fist_line = std_out.splitlines()[0] - image_ids = fist_line[6:].split(',') + # the terminal output of the omero-importer tool provides a lot of information on the registration process + # we are looking for a line with this format: "Image:id_1,1d_2,id_3,...,id_n" + # where id_1,...,id_n are a list of ints, which denote the unique OMERO image IDs for the image file + # (one file can have many images) + if int(proc.returncode) == 0: + for line in std_out.splitlines(): + if line[:6] == "Image:": + image_ids = line[6:].split(',') + break else: - image_ids = -1 - + image_ids = [] else: - image_ids = -1 - + image_ids = [] return image_ids @@ -315,11 +315,18 @@ def get_image_array(conn, image_id): return hypercube -################################ - def add_annotations_to_image(conn, image_id, key_value_data): """ - TODO + This function is used to add key-value pair annotations to an image + Example: + key_value_data = [["Drug Name", "Monastrol"], ["Concentration", "5 mg/ml"]] + add_annotations_to_image(conn, image_id, key_value_data) + Args: + conn: Established Connection to the OMERO Server via a BlitzGateway + image_id (int): An OMERO image ID + key_value_data (list of lists): list of key-value pairs + Returns: + int: not relevant atm """ import omero @@ -339,15 +346,19 @@ def add_annotations_to_image(conn, image_id, key_value_data): ######################### -##app from optparse import OptionParser +import ConfigParser + +config = ConfigParser.RawConfigParser() +config.read("imaging_config.properties") + ###OMERO server info -USERNAME = "usr" -PASSWORD = "pwd" -HOST = "host" -PORT = 4064 +USERNAME = config.get('OmeroServerSection', 'omero.username') +PASSWORD = config.get('OmeroServerSection', 'omero.password') +HOST = config.get('OmeroServerSection', 'omero.host') +PORT = int(config.get('OmeroServerSection', 'omero.port')) def get_args(): @@ -358,6 +369,10 @@ def get_args(): parser.add_option('-p', '--project', dest='project_id', default="None", help='project id for dataset id retrieval') parser.add_option('-s', '--sample', dest='sample_id', default="None", help='sample id for dataset id retrieval') + parser.add_option('-i', '--image', dest='image_id', default="None", help='image id for key-value pair annotation') + parser.add_option('-a', '--annotation', dest='ann_str', default="None", help='annotation string') + + (options, args) = parser.parse_args() return options @@ -373,9 +388,26 @@ def get_args(): id_str = id_str + id_i + " " print id_str - else: + + elif args.project_id != "None": conn = omero_connect(USERNAME, PASSWORD, HOST, str(PORT)) ds_id = get_omero_dataset_id(conn, str(args.project_id), str(args.sample_id)) print ds_id + + elif args.image_id != "None": + + conn = omero_connect(USERNAME, PASSWORD, HOST, str(PORT)) + + #string format: key1::value1//key2::value2//key3::value3//... + key_value_data = [] + pair_list = args.ann_str.split("//") + for pair in pair_list: + key_value = pair.split("::") + key_value_data.append(key_value) + + + add_annotations_to_image(conn, str(args.image_id), key_value_data) + + print "0" diff --git a/drop-boxes/register-omero-metadata/image_registration_process.py b/drop-boxes/register-omero-metadata/image_registration_process.py index 730adb89..a2ab062f 100644 --- a/drop-boxes/register-omero-metadata/image_registration_process.py +++ b/drop-boxes/register-omero-metadata/image_registration_process.py @@ -7,30 +7,47 @@ from subprocess import Popen, PIPE barcode_pattern = re.compile('Q[a-zA-Z0-9]{4}[0-9]{3}[A-Z][a-zA-Z0-9]') +conda_home_path = "/home/qeana10/miniconda2/" +omero_lib_path = "/home/qeana10/openbis/servers/core-plugins/QBIC/1/dss/drop-boxes/register-omero-metadata/OMERO.py-5.4.10-ice36-b105" +etl_home_path = "/home/qeana10/openbis/servers/core-plugins/QBIC/1/dss/drop-boxes/register-omero-metadata/" + class ImageRegistrationProcess: - def __init__(self, transaction, env_name="omero_env_0", project_code="", sample_code=""): + def __init__(self, transaction, env_name="omero_env_0", project_code="", sample_code="", conda_path=None, omero_path=None, etl_path=None): self._transaction = transaction self._incoming_file_name = transaction.getIncoming().getName() + self._search_service = transaction.getSearchService() self._project_code = project_code self._sample_code = sample_code + ### set exec. env + self._conda_path = conda_home_path + if not conda_path is None: + self._conda_path = conda_path + + self._omero_path = omero_lib_path + if not omero_path is None: + self._omero_path = omero_path + + self._etl_path= etl_home_path + if not etl_path is None: + self._etl_path = etl_path + self._init_cmd_list = [] - self._init_cmd_list.append('eval "$(/home/qeana10/miniconda2/bin/conda shell.bash hook)"') + self._init_cmd_list.append('eval "$(' + self._conda_path + 'bin/conda shell.bash hook)"') self._init_cmd_list.append('conda activate ' + env_name) - self._init_cmd_list.append('export OMERO_PREFIX=/home/qeana10/openbis/servers/core-plugins/QBIC/1/dss/drop-boxes/register-omero-metadata/OMERO.py-5.4.10-ice36-b105') + self._init_cmd_list.append('export OMERO_PREFIX=' + self._omero_path) self._init_cmd_list.append('export PYTHONPATH=$PYTHONPATH:$OMERO_PREFIX/lib/python') - #now use the omero-importer app packaged in the conda env - #self._init_cmd_list.append('export PATH=$PATH:/home/qeana10/openbis/servers/core-plugins/QBIC/1/dss/drop-boxes/register-omero-metadata/OMERO.server-5.4.10-ice36-b105/bin') - self._init_cmd_list.append('export PATH=$PATH:/home/qeana10/miniconda2/envs/' + env_name + '/bin') + # now use the omero-importer app packaged in the conda env + self._init_cmd_list.append('export PATH=$PATH:' + self._conda_path + 'envs/' + env_name + '/bin') - #move to the dir where backendinterface.py lives - self._init_cmd_list.append('cd /home/qeana10/openbis/servers/core-plugins/QBIC/1/dss/drop-boxes/register-omero-metadata/') + # move to the dir where backendinterface.py lives for exec. + self._init_cmd_list.append('cd ' + self._etl_path) def fetchOpenBisSampleCode(self): found = barcode_pattern.findall(self._incoming_file_name) @@ -43,7 +60,17 @@ def fetchOpenBisSampleCode(self): raise SampleCodeError(self._sample_code, "The sample code seems to be invalid, the checksum could not be confirmed.") return self._project_code, self._sample_code - + + def searchOpenBisSample(self, sample_code): + # find specific sample + sc = SearchCriteria() + sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, sample_code)) + foundSamples = self._search_service.searchForSamples(sc) + if len(foundSamples) == 0: + raise SampleNotFoundError(sample_code, "Sample could not be found in openBIS.") + sample = foundSamples[0] + return sample + def _isValidSampleCode(self, sample_code): try: id = sample_code[0:9] @@ -73,6 +100,7 @@ def requestOmeroDatasetId(self, project_code=None, sample_code=None): return ds_id def registerImageFileInOmero(self, file_path, dataset_id): + cmd_list = list(self._init_cmd_list) cmd_list.append( "python backendinterface.py -f " + file_path + " -d " + str(dataset_id) ) @@ -84,6 +112,9 @@ def registerImageFileInOmero(self, file_path, dataset_id): out, err = process.communicate( commands ) id_list = str(out).split() + for img_id in id_list: + if not img_id.isdigit(): + return [] return id_list @@ -91,16 +122,16 @@ def registerImageFileInOmero(self, file_path, dataset_id): def triggerOMETiffConversion(self): pass - #ToDo Check if Metadata file is provided as was suggested in test.tsv provided by LK - def extractMetadataFromTSV(self, tsvFilePath): + #ToDo Check if Metadata file is provided as defined + def extractMetadataFromTSV(self, tsv_file_path): tsvFileMap = {} try: - with open(tsvFilePath) as tsvfile: + with open(tsv_file_path) as tsvfile: reader = csv.DictReader(tsvfile, delimiter='\t', strict=True) for row in reader: tsvFileMap.update(row) except IOError: - print "Error: No file found at provided filepath " + tsvFilePath + print "Error: No file found at provided filepath " + tsv_file_path except csv.Error as e: print 'Could not gather the Metadata from TSVfile %s, in line %d: %s' % (tsvfile, reader.line_num, e) @@ -109,6 +140,30 @@ def extractMetadataFromTSV(self, tsvFilePath): def registerExperimentDataInOpenBIS(self): pass + def registerOmeroKeyValuePairs(self, image_id, property_map): + """Registers the property map as key-value pairs in the OMERO server. + """ + + cmd_list = list(self._init_cmd_list) + + # string format: key1::value1//key2::value2//key3::value3//... + key_value_str = "" + for key in property_map.keys(): + key_value_str = key_value_str + str(key) + "::" + str(property_map[key]) + "//" + key_value_str = key_value_str[:len(key_value_str)-2] #remove last two chars + + cmd_list.append( "python backendinterface.py -i " + str(image_id) + " -a " + key_value_str ) + + commands = "" + for cmd in cmd_list: + commands = commands + cmd + "\n" + + process = Popen( "/bin/bash", shell=False, universal_newlines=True, stdin=PIPE, stdout=PIPE, stderr=PIPE ) + out, err = process.communicate( commands ) + + + return 0 + class SampleCodeError(Exception): @@ -120,4 +175,12 @@ def __init__(self, sample_code, message): def test(self): pass +class SampleNotFoundError(Exception): + + def __init__(self, sample_code, message): + self.sample_code = sample_code + self.message = message + super().__init__(self.message) + def test(self): + pass diff --git a/drop-boxes/register-omero-metadata/omero_54_env.yml b/drop-boxes/register-omero-metadata/omero_54_env.yml new file mode 100644 index 00000000..44a22e69 --- /dev/null +++ b/drop-boxes/register-omero-metadata/omero_54_env.yml @@ -0,0 +1,39 @@ +name: omero_env_0 +channels: + - bioconda + - sven1103 + - hargup/label/pypi + - anaconda + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - bzip2=1.0.8=h7b6447c_0 + - ca-certificates=2020.7.22=0 + - certifi=2019.11.28=py27_0 + - freetype=2.10.2=h5ab3b9f_0 + - hashlib=20081119=py27_0 + - jpeg=9b=habf39ab_1 + - libedit=3.1.20191231=h14c3975_1 + - libffi=3.3=he6710b0_2 + - libgcc-ng=9.1.0=hdf63c60_0 + - libpng=1.6.37=hbc83047_0 + - libstdcxx-ng=9.1.0=hdf63c60_0 + - libtiff=4.1.0=h2733197_1 + - lz4-c=1.9.2=he6710b0_1 + - ncurses=6.2=he6710b0_1 + - olefile=0.46=py27_0 + - omero-importer-cli=v1.0.0=0 + - openjdk=8.0.152=h7b6447c_3 + - openssl=1.0.2u=h7b6447c_0 + - pillow=6.2.1=py27h34e0f95_0 + - pip=19.3.1=py27_0 + - python=2.7.18=h15b4118_1 + - readline=8.0=h7b6447c_0 + - setuptools=44.0.0=py27_0 + - sqlite=3.33.0=h62c20be_0 + - tk=8.6.10=hbc83047_0 + - wheel=0.33.6=py27_0 + - xz=5.2.5=h7b6447c_0 + - zeroc-ice=3.6.3=py27hd0a1c67_1 + - zlib=1.2.11=h7b6447c_3 + - zstd=1.4.4=h0b5b093_3 diff --git a/drop-boxes/register-omero-metadata/register-omero.py b/drop-boxes/register-omero-metadata/register-omero.py index 4587d55e..b5fbaaf8 100755 --- a/drop-boxes/register-omero-metadata/register-omero.py +++ b/drop-boxes/register-omero-metadata/register-omero.py @@ -11,6 +11,7 @@ #import sample_tracking_helper_qbic as thelper import checksum +import datetime import re import os import urllib @@ -21,7 +22,6 @@ from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchSubCriteria - #class OmeroError(Error): # ETL script for registration of Imaging files that need to end up in Omero. @@ -56,12 +56,20 @@ ##### -def createNewImagingExperiment(tr, space, project, properties): +INCOMING_DATE_FORMAT = '%d.%m.%Y' +OPENBIS_DATE_FORMAT = '%Y-%m-%d' + +PROPPERTY_FILTER_LIST = ["IMAGE_FILE_NAME", "INSTRUMENT_USER", "IMAGING_DATE"] + + +def mapDateString(date_string): + return datetime.datetime.strptime(date_string, INCOMING_DATE_FORMAT).strftime(OPENBIS_DATE_FORMAT) + +def createNewImagingExperiment(tr, space, project, properties, existing_ids): IMAGING_EXP_TYPE = "Q_BMI_GENERIC_IMAGING" - MODALITY_CODE = "Q_BMI_MODALITY" search_service = tr.getSearchService() + experiment_property_map = {"IMAGING_MODALITY":"Q_BMI_MODALITY", "IMAGING_DATE":"Q_MEASUREMENT_FINISH_DATE", "INSTRUMENT_USER":"Q_INSTRUMENT_USER"} - existing_ids = [] existing_exps = search_service.listExperiments("/" + space + "/" + project) for exp in existing_exps: existing_ids.append(exp.getExperimentIdentifier()) @@ -71,52 +79,60 @@ def createNewImagingExperiment(tr, space, project, properties): i += 1 exp_num = len(existing_exps) + i exp_id = '/' + space + '/' + project + '/' + project + 'E' + str(exp_num) - exp = tr.createNewExperiment(exp_id, IMAGING_EXP_TYPE) - for key in properties.keys(): - exp.setPropertyValue(key, properties[key]) - return exp + img_exp = tr.createNewExperiment(exp_id, IMAGING_EXP_TYPE) + existing_ids.append(exp_id) + for incoming_label in experiment_property_map: + if incoming_label in properties: + key = experiment_property_map[incoming_label] + value = properties[incoming_label] + if key == "Q_MEASUREMENT_FINISH_DATE": + value = mapDateString(value) + img_exp.setPropertyValue(key, value) + return img_exp -def createNewImagingRun(tr, base_sample, exp, omero_link, run_offset): +def createNewImagingRun(tr, base_sample, exp, omero_image_ids, run_offset, properties): IMG_RUN_PREFIX = "IMG" IMG_RUN_TYPE = "Q_BMI_GENERIC_IMAGING_RUN" - IMG_RUN_OMERO_PROPERTY_CODE = "Q_ADDITIONAL_INFO" - # TODO: can we use a prefix for imaging samples? - # otherwise creating new samples will be more complex - # on the other hand, replicates need to be numbered if we use IMG, e.g IMG1QABCD001AB - # IMG2QABCD001AB etc. - # talk to GG and LK + IMG_RUN_OMERO_PROPERTY_CODE = "Q_OMERO_IDS" + sample_property_map = {}#no specific properties from the metadata file yet + run = 0 exists = True new_sample_id = None + # respect samples already in openbis while exists: run += 1 new_sample_id = '/' + base_sample.getSpace() + '/' + IMG_RUN_PREFIX + str(run) + base_sample.getCode() exists = tr.getSampleForUpdate(new_sample_id) + # add additional offset for samples registered in this call of the ETL script, but before this sample new_sample_id_with_offset = '/' + base_sample.getSpace() + '/' + IMG_RUN_PREFIX + str(run+run_offset) + base_sample.getCode() img_run = tr.createNewSample(new_sample_id_with_offset, IMG_RUN_TYPE) - img_run.setParentSampleIdentifiers([base_sample.getSampleIdentifier]) + img_run.setParentSampleIdentifiers([base_sample.getSampleIdentifier()]) img_run.setExperiment(exp) - img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, omero_link) + img_run.setPropertyValue(IMG_RUN_OMERO_PROPERTY_CODE, '\n'.join(omero_image_ids)) + for incoming_label in sample_property_map: + if incoming_label in properties: + key = sample_property_map[incoming_label] + value = properties[incoming_label] + img_run.setPropertyValue(key, value) return img_run -#TODO Luis -def callOmeroWithFilePath(file_path, sample_barcode): - list_of_omero_ids = ["1","2","3"] - return list_of_omero_ids - def getFileFromLine(line): return line.split("\t")[0] def isSameExperimentMetadata(props1, props2): """dependent on metadata dictionaries of two different files (data model), decide if new openBIS experiment needs to be created - might be replaced by specific metadata properties, once we know more """ - # initilization of tsv parser, always results in new experiment - if not props1 or not props2: - return False - else: - return True - + relevantPropertyNames = ["IMAGING_MODALITY", "IMAGED_TISSUE", "INSTRUMENT_MANUFACTURER", "INSTRUMENT_USER", "IMAGING_DATE"] + for label in relevantPropertyNames: + if label in props1 and label in props2: + if props1[label] != props2[label]: + return False + if label in props1 and not label in props2: + return False + if label in props2 and not label in props1: + return False + return True def registerImageInOpenBIS(transaction): search_service = transaction.getSearchService() @@ -144,11 +160,74 @@ def findMetaDataFile(incomingPath): for f in files: stem, ext = os.path.splitext(f) if ext.lower()=='.tsv': - with open(os.path.join(root, f), 'U') as fh: metadataFile = fh.readlines() + with open(os.path.join(root, f), 'U') as fh: metadataFileContent = fh.readlines() return metadataFileContent +def getPropertyNames(metadataFile): + """Here we could add more complex behaviour later on. + """ + + property_names = metadataFile[0].split("\t") + for i in range(len(property_names)): + property_names[i] = property_names[i].strip().upper() + + return property_names + +def validatePropertyNames(property_names): + """Validate metadata property names. + TODO: call the imaging metadata parser (with json schema). + """ + + # fast validation without parser object. + required_names = ["IMAGE_FILE_NAME", "IMAGING_MODALITY", "IMAGED_TISSUE", "INSTRUMENT_MANUFACTURER", "INSTRUMENT_USER", "IMAGING_DATE"] + + for name in required_names: + if not name in property_names: + return False + + return True + +def getPropertyMap(line, property_names): + """Build the property map. Here we could add more complex behaviour later on. + """ + + properties = {} + property_values = line.split("\t") + + for i in range(1, len(property_names)): #exclude first col (filename) + ##remove trailing newline, and replace space with underscore + name = property_names[i].rstrip('\n').replace(" ", "_") + value = property_values[i].rstrip('\n').replace(" ", "_") + + properties[name] = value + + return properties + +def filterOmeroPropertyMap(property_map, filter_list): + """Filters map before ingestion into omero server + + filter_list is a the blacklist, e.g. for what is going to openBIS or is automatically added to omero (e.g. file name) + """ + + new_props = {} + for key in property_map.keys(): + if not key in filter_list: + new_props[key] = property_map[key] + + return new_props + + +def printPropertyMap(property_map): + """Function to display metadata properties. + """ + + print("KEY : VALUE") + for key in property_map.keys(): + print "--> " + str(key) + " : " + str(property_map[key]) + def process(transaction): + print "start transaction" """The main entry point. openBIS calls this method, when an incoming transaction is registered. @@ -159,6 +238,10 @@ def process(transaction): # Get the incoming path of the transaction incomingPath = transaction.getIncoming().getAbsolutePath() + # Get the name of the incoming folder + folderName = transaction.getIncoming().getName() + + print incomingPath # 1. Initialize the image registration process registrationProcess = irp.ImageRegistrationProcess(transaction) @@ -167,23 +250,58 @@ def process(transaction): # This tells us to which biological sample the image data was aquired from. project_code, sample_code = registrationProcess.fetchOpenBisSampleCode() + #find specific sample + tissueSample = registrationProcess.searchOpenBisSample(sample_code) + space = tissueSample.getSpace() + # 3. We now request the associated omero dataset id for the openBIS sample code. # Each dataset in OMERO contains the associated openBIS biological sample id, which # happened during the experimental design registration with the projectwizard. - omero_dataset_id = registrationProcess.requestOmeroDatasetId() + + # Starts omero registration + # returns -1 if fetching dataset-id operation failed + omero_dataset_id = registrationProcess.requestOmeroDatasetId(project_code=project_code, sample_code=sample_code) + + print "omero dataset id:" + print omero_dataset_id + + omero_failed = int(omero_dataset_id) < 0 + if omero_failed: + raise ValueError("Omero did not return expected dataset id.") # Find and parse metadata file content metadataFile = findMetaDataFile(incomingPath) - + + print "metadataFile:" + print metadataFile + + property_names = getPropertyNames(metadataFile) + + print "property names:" + print property_names + + valid_names = validatePropertyNames(property_names) + if not valid_names: + raise ValueError("Invalid Property Names.") + + #keep track of number of images for openBIS ID + image_number = 0 + #Initialize openBIS imaging experiment + imagingExperiment = None + previousProps = {} + existing_experiment_ids = [] + + print "start reading metadata file" # Iterate over the metadata entries containing all pre-specified imaging metadata for line in metadataFile[1:]: # (Exclude header) # Get modality and other metadata from tsv here for one sample properties = {} - # Retrieve the image file name + # Retrieve the image file name, please no whitespace characters in filename! fileName = getFileFromLine(line) - - imageFile = os.path.join(incomingPath, fileName) + # Due to the datahandler we need to add another subfolder of the same name to the path + imageFolder = os.path.join(incomingPath, folderName) + imageFile = os.path.join(imageFolder, fileName) print "New incoming image file for OMERO registration:\t" + imageFile # 4. After we have received the omero dataset id, we know where to attach the image to @@ -191,36 +309,36 @@ def process(transaction): omero_image_ids = registrationProcess.registerImageFileInOmero(imageFile, omero_dataset_id) print "Created OMERO image identifiers:\t" + str(omero_image_ids) + omero_failed = len(omero_image_ids) < 1 + if omero_failed: + raise ValueError("Omero did not return expected image ids.") + # 5. Additional metadata is provided in an own metadata TSV file. # We extract the metadata from this file. - #registrationProcess.extractMetadataFromTSV() + properties = getPropertyMap(line, property_names) + + #one file can have many images, iterate over all img ids + for img_id in omero_image_ids: + registrationProcess.registerOmeroKeyValuePairs(img_id, filterOmeroPropertyMap(properties, PROPPERTY_FILTER_LIST)) + #### # 6. In addition to the image registration and technical metadata storage, we want to add # further experimental metadata in openBIS. This metadata contains information about the # imaging experiment itself, such as modality, imaged tissue and more. # We also want to connect this data with the previously created, corresponding OMERO image id t # hat represents the result of this experiment in OMERO. - #registrationProcess.registerExperimentDataInOpenBIS(omero_image_ids) - # 7. Last but not least we create the open science file format for images which is - # OMERO-Tiff and store it in OMERO next to the proprierary vendor format. - #registrationProcess.triggerOMETiffConversion() - - #################### - - # TODO decide if new experiment is needed based on some pre-defined criteria. + # we decide if new experiment is needed based on some pre-defined criteria. # Normally, the most important criterium is collision of experiment type properties # between samples. E.g. two different imaging modalities need two experiments. - #fileBelongsToExistingExperiment = isSameExperimentMetadata(previousProps, properties) - #previousProps = properties - #if(not fileBelongsToExistingExperiment): - # exp = createNewImagingExperiment(transaction, space, project_code, properties) - #imagingSample = createNewImagingRun(transaction, sa, exp, list_of_omero_ids, offset)# maybe there are sample properties, too! - # register the actual data - #IMAGING_DATASET_CODE = Q_BMI_GENERIC_IMAGING_DATA # I guess - #dataset = transaction.createNewDataSet(IMAGING_DATASET_CODE) - #dataset.setSample(imagingSample) - #transaction.moveFile(imageFile, dataset) - # increment id offset for next sample in this loop - not sure anymore if this is needed - \ No newline at end of file + fileBelongsToExistingExperiment = isSameExperimentMetadata(previousProps, properties) + previousProps = properties + if(not fileBelongsToExistingExperiment): + imagingExperiment = createNewImagingExperiment(transaction, space, project_code, properties, existing_experiment_ids) + imagingSample = createNewImagingRun(transaction, tissueSample, imagingExperiment, omero_image_ids, image_number, properties) + # increment id offset for next sample in this loop + image_number += 1 + + # 7. Last but not least we create the open science file format for images which is + # OMERO-Tiff and store it in OMERO next to the proprierary vendor format.