In [33]:
import os
import shutil
import json
from pathlib import Path
import traceback

import pydicom
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import shape

from RoomOfRequirement.Quad import QUAD_Manager
from RoomOfRequirement.Evaluation import Evaluation

In [34]:
# generate the desired folder structure
dataset_path = '/Users/thomas/Documents/GitHub/cnn4cmr/Datasets/T1_dataset'
for p in ['Imgs', 'Gold', 'Additional_Info']: Path(os.path.join(dataset_path, p)).mkdir(parents=True, exist_ok=True)

In [35]:
# Get cohort

In [36]:
quad   = QUAD_Manager()
cohort = quad.coho_coll.find_one({'name': 'AI_Comparison'})

In [37]:
# get gold id
tasks     = list(quad.task_coll.find({'studyuids': {'$in': cohort['studyuids']}}))
gold_task = [t for t in tasks if len(t['studyuids'])==363 and t['displayname']=='Gold'][0]
gold_id   = gold_task['_id']

# gold evals
evals  = {eva['studyuid']:eva for eva in quad.eval_coll.find({'studyuid': {'$in': cohort['studyuids']}, 'imagetype': 'SAX T1 PRE', 'task_id': gold_id})}
evals  = {k: Evaluation(quad, studyuid=e['studyuid'], imagetype=e['imagetype'], task_id=e['task_id']) for k,e in evals.items()}

# get all image sops
sops_st1 = [sop for e in evals.values() for sop in e.depthandtime2sop.values()]
# get imgs
imgs_st1 = {sop: quad.dcm_coll.find_one({'sop': sop}) for sop in sops_st1}
# get annos
annos_st1 = {a['sop']:a for a in quad.anno_coll.find({'task_id': gold_id, 'sop': {'$in': sops_st1}})}

In [38]:
# limit to the sops with annotations and images
sops_st  = set([k for k in annos_st1.keys()]).intersection(set([i['sop'] for i in imgs_st1.values() if i is not None]))
imgs_st  = {s:i for s,i in imgs_st1.items() if s in sops_st}
annos_st = {k:a for k,a in annos_st1.items() if k in sops_st}
print(len(sops_st), len(annos_st), len(imgs_st))

721 721 721


In [39]:
# resort all to  -- dict: {studyuid -> {sop -> object}} -- 

In [40]:
# resorting imgs
imgs = {img['studyuid']:dict() for img in imgs_st.values()}
for img in imgs_st.values(): imgs[img['studyuid']][img['sop']] = img
print(len(imgs))

283


In [41]:
# resorting annos
annos = {a['studyuid']:dict() for a in annos_st.values()}
for a in annos_st.values(): annos[a['studyuid']][a['sop']] = a
print(len(annos))

283


In [42]:
# get bounding boxes
bbs = {suid:dict() for suid in annos.keys()}
for suid in bbs.keys():
    for sop in annos[suid].keys():
        xmin, ymin, xmax, ymax = shape(annos[suid][sop]['lv_myo']['cont']).bounds
        bbs[suid][sop] = [xmin, xmax, ymin, ymax]

In [43]:
# copy all over into their respective folders

In [44]:
annos_folder = os.path.join(dataset_path, 'Gold')
for suid_i, suid in enumerate(annos.keys()):
    anno_folder = os.path.join(annos_folder, suid)
    Path(anno_folder).mkdir(parents=True, exist_ok=True)
    for sop in annos[suid].keys():
        anno = annos[suid][sop]
        try:    anno.pop('_id'); anno.pop('task_id'); anno.pop('studyuid'); anno.pop('sop')
        except: continue; #print(traceback.format_exc()); continue
        anno_path = os.path.join(anno_folder, sop+'.json')
        with open(anno_path, "w") as outfile: 
            json.dump(anno, outfile, indent=4)

In [45]:
imgs_folder = os.path.join(dataset_path, 'Imgs')
for suid in annos.keys(): # annos on purpose (we only want the images with contours)
    img_folder = os.path.join(imgs_folder, suid)
    Path(img_folder).mkdir(parents=True, exist_ok=True)
    for sop in annos[suid].keys(): # annos on purpose (we only want the images with contours)
        img      = imgs[suid][sop]
        shutil.copyfile(img['path'], os.path.join(img_folder, sop+'.dcm'))

In [46]:
# create additional info files (contain bounding boxes)

In [47]:
additional_info_folder = os.path.join(dataset_path, 'Additional_Info')
for suid in annos.keys():
    ainfo_folder = os.path.join(additional_info_folder, suid)
    Path(ainfo_folder).mkdir(parents=True, exist_ok=True)
    for sop in annos[suid].keys():
        ainfo_path = os.path.join(ainfo_folder, sop+'.json')
        with open(ainfo_path, "w") as outfile:
            json.dump({'bounding_box': bbs[suid][sop]}, outfile, indent=4)