In [None]:
import os
import shutil
import json
from pathlib import Path
import traceback

import pydicom
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import shape
from shapely.affinity import scale

from RoomOfRequirement.Quad import QUAD_Manager
from RoomOfRequirement.Evaluation import Evaluation

In [None]:
# generate the desired folder structure
dataset_path = '/Users/thomas/Documents/GitHub/cnn4cmr/Datasets/T1_dataset'
for p in ['Imgs', 'Gold', 'Additional_Info']: Path(os.path.join(dataset_path, p)).mkdir(parents=True, exist_ok=True)

In [None]:
# Get cohort

In [None]:
quad   = QUAD_Manager()
cohort = quad.coho_coll.find_one({'name': 'AI_Comparison'})

In [None]:
# get gold id
tasks     = list(quad.task_coll.find({'studyuids': {'$in': cohort['studyuids']}}))
gold_task = [t for t in tasks if len(t['studyuids'])==363 and t['displayname']=='Gold'][0]
gold_id   = gold_task['_id']

# gold evals
evals  = {eva['studyuid']:eva for eva in quad.eval_coll.find({'studyuid': {'$in': cohort['studyuids']}, 'imagetype': 'SAX T1 PRE', 'task_id': gold_id})}
evals  = {k: Evaluation(quad, studyuid=e['studyuid'], imagetype=e['imagetype'], task_id=e['task_id']) for k,e in evals.items()}

# get all image sops
sops_st1 = [sop for e in evals.values() for sop in e.depthandtime2sop.values()]
# get imgs
imgs_st1 = {sop: quad.dcm_coll.find_one({'sop': sop}) for sop in sops_st1}
# get annos
annos_st1 = {a['sop']:a for a in quad.anno_coll.find({'task_id': gold_id, 'sop': {'$in': sops_st1}})}

In [None]:
# limit to the sops with annotations and images
sops_st  = set([k for k in annos_st1.keys()]).intersection(set([i['sop'] for i in imgs_st1.values() if i is not None]))
imgs_st  = {s:i for s,i in imgs_st1.items() if s in sops_st}
annos_st = {k:a for k,a in annos_st1.items() if k in sops_st}
print(len(sops_st), len(annos_st), len(imgs_st))

In [None]:
# resort all to  -- dict: {studyuid -> {sop -> object}} -- 

In [None]:
# resorting imgs
imgs = {img['studyuid']:dict() for img in imgs_st.values()}
for img in imgs_st.values(): imgs[img['studyuid']][img['sop']] = img
print(len(imgs))

In [None]:
# resorting annos
annos = {a['studyuid']:dict() for a in annos_st.values()}
for a in annos_st.values(): annos[a['studyuid']][a['sop']] = a
print(len(annos))

In [None]:
# get bounding boxes
bbs = {suid:dict() for suid in annos.keys()}
for suid in bbs.keys():
    for sop in annos[suid].keys():
        bbs[suid][sop] = dict()
        for scale_f in [1.0, 1.5, 2.0, 2.5, 3.0, 3.5]:
            xmin,  ymin,  xmax,  ymax = scale(shape(annos[suid][sop]['lv_myo']['cont']), xfact=scale_f, yfact=scale_f, origin='center').bounds
            bbs[suid][sop]['bounding_box_scale_'+str(scale_f)] = [xmin, xmax, ymin, ymax]

In [None]:
# copy all over into their respective folders

In [None]:
annos_folder = os.path.join(dataset_path, 'Gold')
for suid_i, suid in enumerate(annos.keys()):
    anno_folder = os.path.join(annos_folder, suid)
    Path(anno_folder).mkdir(parents=True, exist_ok=True)
    for sop in annos[suid].keys():
        anno = annos[suid][sop]
        try:    anno.pop('_id'); anno.pop('task_id'); anno.pop('studyuid'); anno.pop('sop')
        except: continue; #print(traceback.format_exc()); continue
        anno_path = os.path.join(anno_folder, sop+'.json')
        with open(anno_path, "w") as outfile: 
            json.dump(anno, outfile, indent=4)

In [None]:
imgs_folder = os.path.join(dataset_path, 'Imgs')
for suid in annos.keys(): # annos on purpose (we only want the images with contours)
    img_folder = os.path.join(imgs_folder, suid)
    Path(img_folder).mkdir(parents=True, exist_ok=True)
    for sop in annos[suid].keys(): # annos on purpose (we only want the images with contours)
        img      = imgs[suid][sop]
        shutil.copyfile(img['path'], os.path.join(img_folder, sop+'.dcm'))

In [None]:
# create additional info files (contain bounding boxes)

In [None]:
additional_info_folder = os.path.join(dataset_path, 'Additional_Info')
for suid in annos.keys():
    ainfo_folder = os.path.join(additional_info_folder, suid)
    Path(ainfo_folder).mkdir(parents=True, exist_ok=True)
    for sop in annos[suid].keys():
        ainfo_path = os.path.join(ainfo_folder, sop+'.json')
        with open(ainfo_path, "w") as outfile:
            json.dump(bbs[suid][sop], outfile, indent=4)

In [None]:
# FOR CARDIOMETRY

indataset  = os.path.join(dataset_path,'Gold')
outdataset = os.path.join(dataset_path,'Gold2')
for suid in os.listdir(indataset):
    suid_folder = os.path.join(indataset, suid)
    if '.DS_Store' in suid_folder: continue
    for p in os.listdir(suid_folder):
        if '.DS_Store' in p: continue
        sop = p.replace('.json','')
        in_anno_path = os.path.join(suid_folder, p)
        out_anno_path = os.path.join(suid_folder.replace('Gold', 'Gold2'), p)
        anno_dict     = json.load(open(in_anno_path))
        new_anno_dict = dict()
        for k in anno_dict.keys():
            new_anno_dict[k] = dict()
            for k2 in anno_dict[k].keys():
                if k2=='cont': new_anno_dict[k]['geom'] = anno_dict[k][k2]
        Path(suid_folder.replace('Gold','Gold2')).mkdir(parents=True, exist_ok=True)
        with open(out_anno_path, 'w') as f: 
            json.dump(new_anno_dict, f)