In [3]:
import pyxnat
import os
import jsonlines
import json
import time
import pydicom
from typing import Dict, List

# Setup Directories
`data_dir` is where all the DICOM files are stores

`labels.jsonl` is a jsonlines file that contains labels for the corresponding DICOM file paths

In [5]:
data_dir = '/images/blob/padchest/converted_2021-07-26/dicoms-fhir'
labels_file = '/images/blob/padchest/converted_2021-07-26/dicoms-fhir/labels.jsonl'

# Setup XNAT client

In [6]:
xnat_server = 'http://10.2.0.9'
user = 'admin'
password = 'admin'
project = 'padchest2'
xnat_interface = pyxnat.Interface(server=xnat_server, user=user, password=password)

# Create an index for dicom files and corresponding labels
We'll build a following index from the JSONfile:
```
 '2.25.111560737383907677338472218743134259277.dcm': ['Opacity'],
 '2.25.106606579614019590132459583155518343843.dcm': ['Atelectasis',
  'Pleural_Effusion',
  'Pleural_Abnormalities'],
 '2.25.288856916057862677977653594428399214627.dcm': ['Atelectasis',
  'Pleural_Effusion',
  'Pleural_Abnormalities'],
 '2.25.212461397906913979117946429815718832885.dcm': ['No_Finding'],
 '2.25.21295911399383494291943739496412220897.dcm': ['Pleural_Effusion',
  'Pleural_Abnormalities',
  'Opacity'],
```
where the key is the dicom file and value is the list of lables

In [None]:
def build_index() -> Dict[str, List]:
    prefix = 'AmlDatastore://padchest/converted_2021-07-26/dicoms/'
    label_dict = {}
    with jsonlines.open(labels_file) as reader:
        for obj in reader:
            image_url = obj['image_url']
            labels = obj['label']
            image_id = image_url.split('/')[-2]
            label_dict[image_id] = labels
    return label_dict
index = build_index()


# Iterate over all scans and write label.json file

We walk over all the scans in the XNAT project. For each scan we create a new scan resources. We call this resources `LABEL` and create a file `label.json` with the corresponding annotation. 


In [None]:
project = xnat_interface.select.project(project)
subjects = project.subjects()
for subject in subjects:
    for experiment in subject.experiments():
        for scan in experiment.scans():
            id_ = scan.id().replace('_', '.')
            print(f'creating label for {id_}')
            label = index[id_]
            label_file = '/tmp/label.json'
            with open('/tmp/label.json', 'w') as f:
                json.dump(label, f)
            try:
                resource = scan.resource('LABEL')
                resource.create()
                f = resource.file('label.json')
                response = f.put(src=label_file, format='JSON', content='JSON', extract=False, overwrite=True)
            except Exception:
                print('Failed')
            else:
                print('Succeded')

# Additional helper methods

In [21]:
def read_dicom(file_path: str):
    with open(file_path, 'rb') as f:
        ds = pydicom.dcmread(file_path)
        return ds

In [22]:
def create_subject(name, project) -> str:
    uri = '/data/projects/%s/subjects/%s'%(project, name)
    response = xnat_interface.put(uri)
    subject_uid = response.content
    return str(subject_uid.decode())
    

In [40]:
def create_experiment(subject, experiment_id):
    print(subject_id)
    experiment = subject.experiment(experiment_id)
    options = {'xsiType': 'xnat:crSessionData'}
    experiment.create(**options)
    print('New experiment %s created' %experiment.id())
    return experiment

In [41]:
def create_scan(experiment, scan_id):
    options = {
        'xsiType': 'xnat:crScanData',
        'usability': 'usable'
    }
    scan = experiment.scan(scan_id)
    scan.create(**options)
    return scan

In [None]:
def clear_project():
    xnat_project = xnat_interface.select.project(project)
    for subject in list(xnat_project.subjects()):
        subject.delete()

creating label for 2.25.305307984233715575866436895013148178629
Failed
creating label for 2.25.214631097618828029598260715608822318864
Failed
creating label for 2.25.192231063686238359213327995682738012121
Failed
creating label for 2.25.244480674969191966880092685099992711023
Failed
creating label for 2.25.152769524351990736351165367861563254013
Failed
creating label for 2.25.172471882061777945660473329684218324745
Failed
creating label for 2.25.292401953150861310105895588150603752848
Failed
creating label for 2.25.300161708988940644333036515302329588926
Failed
creating label for 2.25.6328380530298980283066110703905898816
Succeded
creating label for 2.25.145638117649727110143439113733964907443
Failed
creating label for 2.25.31116149252745065360522636171105132684
Succeded
creating label for 2.25.9102763290507784543919350587188710063
Failed
creating label for 2.25.141347065401843142792669135503885869714
Succeded
creating label for 2.25.67570235204737764761268312084904600976
Succeded
crea

In [47]:
count = 0
experiments = {}

label_index = build_index()
for root, dirs, files in os.walk(data_dir):
    if files and files[0].endswith('.dcm'):
        dicom_file = files[0]
        file_path = root + '/'+ dicom_file
        if dicom_file not in label_index:
            continue
        count += 1
        dicom = read_dicom(file_path)
        subject_name = str(dicom['PatientName'].value).replace('^', '_')
        subject_id = None
        try:
            print(f'creating {subject_name}')
            subject_id = create_subject(subject_name, project)
        except Exception as e:
            print(f'Failed to create subject {e}')
            continue
        subject = xnat_interface.select.project(project).subject(subject_id)
        experiment = None
        try:
            experiment = create_experiment(subject, f'exp{count}')
            experiments[subject] = experiment
        except Exception as e:
            print(f'Failed to create experiment {e}')
            continue

        scan = create_scan(experiment, dicom_file.split(".dcm")[0].replace('.', '_'))
        print(scan)
        resource = scan.resource('DICOM')
        resource.create()

        f = resource.file(dicom_file)
        response = f.put(src=file_path, format='DICOM', content='RAW', extract=False, overwrite=True)
        file_uid = response
        label = label_index[dicom_file]
      
        label_file = '/tmp/label.json'
        with open('/tmp/label.json', 'w') as f:
            json.dump(label, f)
        
        resource = scan.resource('LABEL')
        resource.create()
        f = resource.file('label.json')
        response = f.put(src=label_file, format='JSON', content='JSON', extract=False, overwrite=True)
         
    if count == 10:
        break

creating Stroman228_Kurtis994
XNAT01_S00151
New experiment XNAT01_E00146 created
<Scan Object> 2_25_269000718940131394502394471219865557303 (``  frames)  http://10.2.0.9/data/projects/scenario2/subjects/XNAT01_S00151/experiments/exp1/scans/2_25_269000718940131394502394471219865557303?format=html
creating Kunze215_Elden718
XNAT01_S00152
New experiment XNAT01_E00147 created
<Scan Object> 2_25_3306247422047918493529936784705070920 (``  frames)  http://10.2.0.9/data/projects/scenario2/subjects/XNAT01_S00152/experiments/exp2/scans/2_25_3306247422047918493529936784705070920?format=html
creating Swift555_Donnie175
XNAT01_S00153
New experiment XNAT01_E00148 created
<Scan Object> 2_25_228574107663032879690392082227436105293 (``  frames)  http://10.2.0.9/data/projects/scenario2/subjects/XNAT01_S00153/experiments/exp3/scans/2_25_228574107663032879690392082227436105293?format=html
creating Schowalter414_Russell422
XNAT01_S00154
New experiment XNAT01_E00149 created
<Scan Object> 2_25_14174093705556

In [46]:
dicom_file.split(".dcm")[0].replace('.', '_')

'2_25_3012003400308846774347472988432950228'