Skip to content

Commit

Permalink
This will add support to read and manipulate file meta
Browse files Browse the repository at this point in the history
Remaining questions:
1. Do file meta fields overlap with fields in the dicom? If so, we need to be checking each field if it is a filemeta or not before replace. As implemented now, the fields are parsing over equivalently and it is assumed that file meta fields do not appear in the dicom and vice versa.
2. Do we need to make any special changes to the file meta, e.g., perhaps the size? I have never manipulated it before, and I am hoping that pydicom save handles these changes, but if not we should do it manually.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch committed Sep 2, 2021
1 parent f177634 commit 39d8e01
Show file tree
Hide file tree
Showing 21 changed files with 251 additions and 130 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
run: |
export PATH="/usr/share/miniconda/bin:$PATH"
source activate black
pip install black
pip install black --upgrade
black --check --verbose deid
testing:
Expand Down
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ and **Merged pull requests**. Critical items to know are:
Referenced versions in headers are tagged on Github, in parentheses are for pypi.

## [vxx](https://github.com/pydicom/deid/tree/master) (master)
- updated pydicom dependency from 1.3.0 to 2.1.1 [#171] (https://github.com/pydicom/deid/issues/171) (0.2.25)
- adding support to manipulate file meta [#183](https://github.com/pydicom/deid/issues/183) (0.2.26)
- updated pydicom dependency from 1.3.0 to 2.1.1 [#171](https://github.com/pydicom/deid/issues/171) (0.2.25)
- bug fix for multivalued fields in %values lists [#174](https://github.com/pydicom/deid/issues/174)
- allowing other VR types for jitter [#175](https://github.com/pydicom/deid/issues/175)
- ensuring that an add/replace of an existing value is also updated in fields [#173](https://github.com/pydicom/deid/issues/173)
Expand Down
25 changes: 16 additions & 9 deletions deid/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,18 @@ def get_fields_lists(self, name=None):
"""return a values list by name"""
return self._get_named_section("fields", name)

def _get_actions(self, action=None, field=None, section="header"):
"""handler for header or filemeta actions."""
header = self._get_section(section) or []
if header is not None:
if action is not None:
action = action.upper()
header = [x for x in header if x["action"].upper() == action]
if field is not None:
field = field.upper()
header = [x for x in header if x["field"].upper() == field]
return header

def get_actions(self, action=None, field=None):
"""get deid actions to perform on a header, or a subset based on a type
Expand All @@ -130,16 +142,11 @@ def get_actions(self, action=None, field=None):
field: if not None, filter to field specified
"""
header = self._get_section("header")
if header is not None:
if action is not None:
action = action.upper()
header = [x for x in header if x["action"].upper() == action]
if field is not None:
field = field.upper()
header = [x for x in header if x["field"].upper() == field]
return self._get_actions(action, field)

return header
def get_filemeta_actions(self, action=None, field=None):
"""special set of actions for filemeta fields"""
return self._get_actions(action, field, "filemeta")

# Boolean properties

Expand Down
2 changes: 1 addition & 1 deletion deid/config/standards.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
formats = ["dicom"]

# Supported Sections
sections = ["header", "labels", "filter", "values", "fields"]
sections = ["header", "labels", "filter", "values", "fields", "filemeta"]

# Supported Header Actions
actions = ("ADD", "BLANK", "JITTER", "KEEP", "REPLACE", "REMOVE", "LABEL")
Expand Down
34 changes: 17 additions & 17 deletions deid/config/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,25 +86,23 @@ def load_combined_deid(deids):
deid["filter"] = next_deid["filter"]
else:
for name, group in next_deid["filter"].items():
if name in deid["filter"]:
deid["filter"][name] = deid["filter"][name] + group
else:
deid["filter"][name] = group

if "header" in next_deid:
if "header" not in deid:
deid["header"] = next_deid["header"]
else:
deid["header"] = deid["header"] + next_deid["header"]
deid["filter"][name] = (
deid["filter"].get("name", []) + group
)

for attr in ["header", "filemeta"]:
if attr in next_deid:
deid[attr] = deid.get(attr, []) + next_deid[attr]

else:
bot.warning("Problem loading %s, skipping." % single_deid)
return deid


def load_deid(path=None):
"""load_deid will return a loaded in (user) deid configuration file
that can be used to update a default config.json. If a file path is
"""Load_deid will return a loaded in (user) deid configuration file.
This can be used to update a default config.json. If a file path is
specified, it is loaded directly. If a folder is specified, we look
for a deid file in the folder. If nothing is specified, we assume
the user wants to load a deid file in the present working directory.
Expand Down Expand Up @@ -200,8 +198,9 @@ def load_deid(path=None):


def find_deid(path=None):
"""find_deid is a helper function to load_deid to find a deid file in
a folder, or return the path provided if it is the file.
"""find_deid is a helper function to load_deid to find a deid file.
It can be in a folder, or return the path provided if it is the file.
Parameters
==========
Expand Down Expand Up @@ -240,9 +239,10 @@ def find_deid(path=None):


def parse_format(line):
"""given a line that starts with FORMAT, parse the format of the
file and check that it is supported. If not, exit on error. If yes,
return the format.
"""given a line that starts with FORMAT, parse the file.
This means checking the format of the file and checking that it is
supported. If not, exit on error. If yes, return the format.
Parameters
==========
Expand Down
18 changes: 10 additions & 8 deletions deid/dicom/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

from deid.logger import bot
from pydicom.sequence import Sequence
from pydicom.dataset import RawDataElement, Dataset
from pydicom.dataset import RawDataElement, Dataset, FileMetaDataset
from pydicom.dataelem import DataElement
import re

Expand All @@ -34,10 +34,11 @@ class DicomField:
nested structure (e.g., SequenceName__CodeValue).
"""

def __init__(self, element, name, uid):
def __init__(self, element, name, uid, is_filemeta=False):
self.element = element
self.name = name # nested names (might not be unique)
self.uid = uid # unique id includes parent tags
self.is_filemeta = is_filemeta

def __str__(self):
return "%s [%s]" % (self.element, self.name)
Expand Down Expand Up @@ -228,17 +229,17 @@ def get_fields(dicom, skip=None, expand_sequences=True, seen=None):
if not isinstance(skip, list):
skip = [skip]

datasets = [dicom]
# Retrieve both dicom and file meta fields
datasets = [dicom, dicom.file_meta]

# helper function to add an element based on tag uid
def add_element(element, name, uid):
def add_element(element, name, uid, is_filemeta):
"""Add an element to fields, but only if it has not been seen.
The uid is derived from the tag (group, element) and includes
nesting, so the "same" tag on different levels is considered
different.
"""
if uid not in seen:
fields[uid] = DicomField(element, name, uid)
fields[uid] = DicomField(element, name, uid, is_filemeta)
seen.append(uid)

while datasets:
Expand All @@ -249,6 +250,7 @@ def add_element(element, name, uid):
# If the dataset does not have a prefix, we are at the start
dataset.prefix = getattr(dataset, "prefix", None)
dataset.uid = getattr(dataset, "uid", None)
is_filemeta = isinstance(dataset, FileMetaDataset)

# Includes private tags, sequences flattened, non-null values
for contender in dataset:
Expand All @@ -270,7 +272,7 @@ def add_element(element, name, uid):
if isinstance(contender.value, Sequence) and expand_sequences is True:

# Add the contender (usually type Dataset) to fields
add_element(contender, name, uid)
add_element(contender, name, uid, is_filemeta)

# A nested dataset can be parsed as such
for idx, item in enumerate(contender.value):
Expand All @@ -287,7 +289,7 @@ def add_element(element, name, uid):

# A DataElement can be extracted as is
elif isinstance(contender, DataElement):
add_element(contender, name, uid)
add_element(contender, name, uid, is_filemeta)

else:
bot.warning(
Expand Down
18 changes: 15 additions & 3 deletions deid/dicom/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,13 +221,23 @@ def parse(self, strip_sequences=False, remove_private=False):
dicom=self.dicom, actions=actions, fields=fields
)

# actions on the header
for action in self.recipe.get_actions():
self.perform_action(
field=action.get("field"),
value=action.get("value"),
action=action.get("action"),
)

# actions on the file_meta
for action in self.recipe.get_filemeta_actions():
self.perform_action(
field=action.get("field"),
value=action.get("value"),
action=action.get("action"),
filemeta=True,
)

# Next perform actions in default config, only if not done
for action in self.config["put"]["actions"]:
self.perform_action(
Expand Down Expand Up @@ -257,7 +267,9 @@ def get_fields(self, expand_sequences=True):
"""
if not self.fields:
self.fields = get_fields(
dicom=self.dicom, expand_sequences=expand_sequences, seen=self.seen,
dicom=self.dicom,
expand_sequences=expand_sequences,
seen=self.seen,
)
return self.fields

Expand Down Expand Up @@ -299,7 +311,7 @@ def find_by_name(self, name):

# Actions

def perform_action(self, field, value, action):
def perform_action(self, field, value, action, filemeta=False):
"""perform action takes an action (dictionary with field, action, value)
and performs the action on the loaded dicom.
Expand All @@ -310,7 +322,7 @@ def perform_action(self, field, value, action):
"field" (eg, PatientID) the header field to process
"action" (eg, REPLACE) what to do with the field
"value": if needed, the field from the response to replace with
filemeta (bool) perform on filemeta
"""
# Validate the action
if action not in valid_actions:
Expand Down
60 changes: 60 additions & 0 deletions deid/tests/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python

"""
Copyright (c) 2016-2021 Vanessa Sochat
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""


def create_recipe(actions, fields=None, values=None):
"""helper method to create a recipe file"""
from deid.config import DeidRecipe

recipe = DeidRecipe()

# .clear() only supported Python 3.3 and after
del recipe.deid["header"][:]
recipe.deid["header"] = actions

if fields is not None:
recipe.deid["fields"] = fields

if values is not None:
recipe.deid["values"] = values

return recipe


def get_dicom(dataset):
"""helper function to load a dicom"""
from deid.dicom import get_files
from pydicom import read_file

dicom_files = get_files(dataset)
return read_file(next(dicom_files))


def get_file(dataset):
"""helper to get a dicom file"""
from deid.dicom import get_files

dicom_files = get_files(dataset)
return next(dicom_files)
9 changes: 1 addition & 8 deletions deid/tests/test_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

from deid.utils import get_installdir
from deid.data import get_dataset
from deid.tests.common import get_file
from pydicom import read_file

global generate_uid
Expand Down Expand Up @@ -230,13 +231,5 @@ def test_pixel_cleaner_keepcoordinates_from(self):
self.assertTrue(compare.all())


def get_file(dataset):
"""helper to get a dicom file"""
from deid.dicom import get_files

dicom_files = get_files(dataset)
return next(dicom_files)


if __name__ == "__main__":
unittest.main()
9 changes: 8 additions & 1 deletion deid/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,14 @@ def test_standards(self):
self.assertEqual(len(unknown), 0)

print("Testing standards: allowed sections")
default_sections = ["header", "labels", "filter", "fields", "values"]
default_sections = [
"header",
"labels",
"filter",
"fields",
"values",
"filemeta",
]
[self.assertTrue(x in sections) for x in default_sections]
unknown = [x for x in sections if x not in default_sections]
self.assertEqual(len(unknown), 0)
Expand Down
10 changes: 1 addition & 9 deletions deid/tests/test_dicom_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from deid.utils import get_installdir
from deid.data import get_dataset
from deid.dicom.fields import get_fields
from deid.tests.common import get_dicom


class TestDicomFields(unittest.TestCase):
Expand Down Expand Up @@ -79,14 +80,5 @@ def test_field_expansion(self):
dicom = get_dicom(dataset)


def get_dicom(dataset):
"""helper function to load a dicom"""
from deid.dicom import get_files
from pydicom import read_file

dicom_files = get_files(dataset)
return read_file(next(dicom_files))


if __name__ == "__main__":
unittest.main()
10 changes: 1 addition & 9 deletions deid/tests/test_dicom_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from deid.dicom.fields import get_fields
from deid.dicom import get_identifiers, replace_identifiers
from deid.dicom.parser import DicomParser
from deid.tests.common import get_file, get_dicom


class TestDicomGroups(unittest.TestCase):
Expand Down Expand Up @@ -122,14 +123,5 @@ def test_extract_groups(self):
self.assertEqual(cleaned.get("OperatorsName"), "new-operator-id")


def get_dicom(dataset):
"""helper function to load a dicom"""
from deid.dicom import get_files
from pydicom import read_file

dicom_files = get_files(dataset)
return read_file(next(dicom_files))


if __name__ == "__main__":
unittest.main()

0 comments on commit 39d8e01

Please sign in to comment.