From 3a75e99cc087e8f5d9050e3d0201e1da625584aa Mon Sep 17 00:00:00 2001 From: Caroline Sands Date: Wed, 20 Mar 2024 11:37:21 +0000 Subject: [PATCH] updated filenameSpec in GenericMS.json to read any three name sample SOP code (LIMS) added error message if adding info from filenames fails --- nPYc/StudyDesigns/SOP/GenericMS.json | 2 +- nPYc/objects/_msDataset.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/nPYc/StudyDesigns/SOP/GenericMS.json b/nPYc/StudyDesigns/SOP/GenericMS.json index 6cc407c..69507b6 100644 --- a/nPYc/StudyDesigns/SOP/GenericMS.json +++ b/nPYc/StudyDesigns/SOP/GenericMS.json @@ -11,7 +11,7 @@ "msPrecision": 0.000008, "varianceRatio": 1.1, "blankThreshold": 1.1, - "filenameSpec": "\n\t\t^(?P\n\t\t\t(?P\n\t\t\t\t(?P\\w+?)\t\t\t\t\t\t\t\t\t\t# Study\n\t\t\t\t_\n\t\t\t\t(?P[HRL]|BA)(?PPOS|NEG)\t# Chromatography and mode\n\t\t\t\t_\n\t\t\t\t(?P\\w+?\\d\\d)\t\t\t\t\t\t\t# Instrument\n\t\t\t\t_\n\t\t\t\t(?PBlank|E?IC|[A-Z]{1,2})(?P\\d+?) # Sample grouping\n\t\t\t\t(?:\n\t\t\t\t(?P[WSE]|SRD|SRDB)(?P\\d\\d?) # Subject ID\n\t\t\t\t)?\n\t\t\t\t(?:_(?PSR|LTR|MR))?\t\t\t\t\t # Reference\n\t\t\t)\n\t\t\t(?:_(?P[xX]))?\t\t\t\t\t\t\t # Exclusions\n\t\t\t(?:_(?P[a-wyzA-WYZ]|[Rr]e[Rr]un\\d*?))?\t\t # Reruns\n\t\t\t(?:_(?P\\d+?))?\t\t\t\t\t\t # Repeats\n\t\t\t(?:_(?P[xX]))?\t\t\t\t\t\t\t # badly ordered exclusions\n\t\t)$\n\t\t", + "filenameSpec": "\n\t\t^(?P\n\t\t\t(?P\n\t\t\t\t(?P\\w+?)\t\t\t\t\t\t\t\t\t\t# Study\n\t\t\t\t_\n\t\t\t\t(?P[HRL]|BA)(?PPOS|NEG)\t# Chromatography and mode\n\t\t\t\t_\n\t\t\t\t(?P\\w+?\\d\\d)\t\t\t\t\t\t\t# Instrument\n\t\t\t\t_\n\t\t\t\t(?PBlank|E?IC|[A-Z]{1,3})(?P\\d+?) # Sample grouping\n\t\t\t\t(?:\n\t\t\t\t(?P[WSE]|SRD|SRDB)(?P\\d\\d?) # Subject ID\n\t\t\t\t)?\n\t\t\t\t(?:_(?PSR|LTR|MR))?\t\t\t\t\t # Reference\n\t\t\t)\n\t\t\t(?:_(?P[xX]))?\t\t\t\t\t\t\t # Exclusions\n\t\t\t(?:_(?P[a-wyzA-WYZ]|[Rr]e[Rr]un\\d*?))?\t\t # Reruns\n\t\t\t(?:_(?P\\d+?))?\t\t\t\t\t\t # Repeats\n\t\t\t(?:_(?P[xX]))?\t\t\t\t\t\t\t # badly ordered exclusions\n\t\t)$\n\t\t", "analyticalMeasurements": {"Study" : "categorical", "Chromatography" : "categorical", "Ionisation" : "categorical", "Instrument" : "categorical", "Re-Run" : "categorical", "Suplemental Injections" : "categorical", "Matrix" : "categorical", "Well" : "categorical", "Plate" : "categorical", "Batch" : "categorical", "Dilution" : "continuous", "Measurement Date" : "date", "Measurement Time" : "date", "$$ Instrument:" : "categorical", "Backing" : "continuous", "Capillary (kV)" : "continuous", "Collision" : "continuous", "Collision Energy" : "continuous", "Column Serial Number:" : "categorical", "ColumnType:" : "categorical", "Cone Gas Flow (L/Hr)" : "continuous", "Desolvation Gas Flow (L/Hr)" : "continuous", "Desolvation Temperature (°C)" : "continuous", "Detector" : "continuous", "Detector Unit" : "categorical", "End Mass" : "continuous", "HM Resolution" : "continuous", "Interscan Time (sec)" : "continuous", "LM Resolution" : "continuous", "Polarity" : "categorical", "Resolution" : "continuous", "Sampling Cone" : "continuous", "Scan Time (sec)" : "continuous", "Source Offset" : "continuous", "Source Temperature (°C)" : "continuous", "Start Mass" : "continuous", "TOF" : "continuous", "Warnings" : "categorical", "Acquired Time" : "date", "Run Order" : "continuous", "Correction Batch" : "categorical", "Assay data name": "categorical", "Assay data location": "categorical", "Sample position": "categorical", "Sample batch": "categorical", "Acquisition batch": "categorical", "Plot Sample Type": "categorical", "AssayRole": "categorical", "SampleType": "categorical", "Exclusion Details": "categorical", "Skipped": "categorical", "Assay protocol": "categorical"}, "excludeFromPlotting": ["Sample File Name", "Sample Base Name", "Batch Termini", "Study Reference", "Long-Term Reference", "Method Reference", "Dilution Series", "Skipped", "Study Sample", "File Path", "Exclusion Details", "Assay protocol", "Status", "Measurement Date", "Measurement Time", "Data Present", "LIMS Present", "LIMS Marked Missing", "Assay data name", "Assay data location", "AssayRole", "SampleType", "Sampling ID", "Plot Sample Type", "SubjectInfoData", "Detector Unit"], "sampleMetadataNotExported":["Metadata Available", "Sample Base Name", "Study", "Chromatography", "Ionisation", "Re-Run", diff --git a/nPYc/objects/_msDataset.py b/nPYc/objects/_msDataset.py index 243d007..397b5ab 100644 --- a/nPYc/objects/_msDataset.py +++ b/nPYc/objects/_msDataset.py @@ -1429,6 +1429,11 @@ def _getSampleMetadataFromFilename(self, filenameSpec): self.sampleMetadata['Metadata Available'] = True self.Attributes['Log'].append([datetime.now(), 'Sample metadata parsed from filenames.']) + # Return failure message if information not able to be inferred for any sample + if (self.sampleMetadata['Sample Base Name'].isnull().values.any()): + raise npycToolboxError('Inferring information from filenames failed for some samples, check and amend `filenameSpec` as appropriate and re-start import') + + def _inferBatches(self, gapLength=24): """ Use acquisition time and run order to suggest batch structure from a dataset.