In [None]:
# default_exp data

# data

> Core utilities for extracting metadata from DICOM files and storing in MongoDB. Utilizes the `pydicom`, `fastcore` and `pymongo` packages. Some ideas borrowed from [fastai.medical.image](https://github.com/fastai/fastai/blob/master/fastai/medical/imaging.py).

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
from dicomtools.imports import *

import pydicom, pymongo
from pydicom.dataset import Dataset as DcmDataset
from pydicom.sequence import Sequence as DcmSequence
from pydicom.multival import MultiValue as DcmMultiValue

In [None]:
#export
def send_dcm2mongo(path, db_name: str, coll_name: str, first_dcm=False, ip_addr="127.0.0.1:27017", **kwargs):
    "Walk `path` to get DICOM file names, then read files into a `pandas.DataFrame`. If `first_dcm=True`, only read first file from each folder."
    fns = L()
    print("Finding DICOM files. This may take a few minutes.")
    if first_dcm:
        for r, d, f in os.walk(path):
            if f:
                if Path(f[0]).suffix.lower() == '.dcm':
                    fns.append(Path(f'{r}/{f[0]}'))
    else:
        fns = L(glob.glob(f'{path}/**/*.dcm', recursive=True))
        fns = fns.map(lambda x: Path(x))
    print("Reading DICOMs. This may take a few minutes, depending on the number of files to read...")
    docs = dcm2mongo(fns)
    client = pymongo.MongoClient(ip_addr)
    db = client[db_name]
    collection = db[coll_name]
    try:
        result = collection.insert_many(docs)
    except:
        print('Writing `docs` to `collection` failed. `docs` will be returned from function instead of `result`.')
        return docs
    return result

In [None]:
#export
@patch
def dcmread(fn: Path, no_pixels=True, force=True):
    "Reads a DICOM file and returns the corresponding pydicom.Dataset"
    try:
        ds = pydicom.dcmread(str(fn), stop_before_pixels=no_pixels, force=force)
        return ds
    except:
        print(f'{fn} is not a valid DICOM file.')
        return {"fname": fn}

In [None]:
#export
def _cast_dcm_special(x):
    if isinstance(x, DcmMultiValue): return [_cast_dcm_special(o) for o in x]
    cls = type(x)
    if not cls.__module__.startswith('pydicom'): return x
    if cls.__base__ == object: return str(x)
    return cls.__base__(x)

In [None]:
#export
@patch
def as_dict_mongo(self: DcmDataset, incl_fname=True):
    vals = [self[o] for o in self.keys() if 'AttributesSequence' not in self[o].keyword]
    items = [(v.keyword, v.value.name) if v.keyword == 'SOPClassUID' else (v.keyword, v.value) for v in vals]
    res = dict(items)
    if incl_fname: res['fname'] = self.filename
    for k, v in items:
        if isinstance(v, DcmSequence): res[k] = v[0].as_dict_mongo(incl_fname=False)
    for k in res: res[k] = _cast_dcm_special(res[k])
    return res

In [None]:
#export
def _dcm2dict_mongo(fn, excl_private=False, **kwargs):
    ds = fn.dcmread()
    if not isinstance(ds, DcmDataset): return ds
    if excl_private: ds.remove_private_tags()
    return ds.as_dict_mongo(**kwargs)

In [None]:
#export
@delegates(parallel)
def dcm2mongo(fns, n_workers=0, **kwargs):
    return list(parallel(_dcm2dict_mongo, fns, n_workers=n_workers, **kwargs))