In [None]:
import os
import shutil
import json
from pathlib import Path
import traceback

import pydicom
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import shape, Polygon
from shapely.affinity import scale

from RoomOfRequirement.Quad import QUAD_Manager
from RoomOfRequirement.Evaluation import Evaluation

In [None]:
# generate the desired folder structure
dataset_path = '/Users/thomas/Documents/GitHub/cnn4cmr/Datasets/SAXCINE_dataset'
for p in ['Imgs', 'Gold', 'Additional_Info']: Path(os.path.join(dataset_path, p)).mkdir(parents=True, exist_ok=True)

In [None]:
# Get cohort

In [None]:
quad   = QUAD_Manager()
cohort = quad.coho_coll.find_one({'name': 'CineCS'})

In [None]:
# get gold id
tasks     = list(quad.task_coll.find({'studyuids': {'$in': cohort['studyuids']}}))
gold_task = [t for t in tasks if len(t['studyuids'])==150 and t['displayname']=='Gold'][0]
gold_id   = gold_task['_id']

# gold evals
evals = {eva['studyuid']:eva for eva in quad.eval_coll.find({'studyuid': {'$in': cohort['studyuids']}, 'imagetype': 'SAX CINE', 'task_id': gold_id})}
evals = {k: Evaluation(quad, studyuid=e['studyuid'], imagetype=e['imagetype'], task_id=e['task_id']) for k,e in evals.items()}

# get es & ed, get all images for the two contoured phases
phases   = {}
sops_st1 = []
for suid, eva in evals.items():
    if eva.imagetype=='SAX CS' and eva.name in ['ECSPRESS-038', 'ECSPRESS-114']: continue
    try:    phases_tmp = [eva.clinical_parameters[s][0] for s in ['LVESP', 'LVEDP', 'RVESP', 'RVEDP']]
    except: print(suid, eva.name)
    phases[suid] = dict()
    if phases_tmp[0]!=phases_tmp[2] or phases_tmp[1]!=phases_tmp[3]: 
        print('Different phases!!!: ', phases_tmp, suid, eva.name)

    base_idxs = {'rvesb':-1, 'lvesb':-1, 'lvmesb':-1, 'rvedb':-1, 'lvedb':-1, 'lvmedb':-1}
    for d in range(eva.nr_slices):
        es_sop, ed_sop = eva.depthandtime2sop[(d,phases_tmp[0])], eva.depthandtime2sop[(d,phases_tmp[1])]
        sops_st1.extend([es_sop, ed_sop])
        phases[suid][es_sop] = {'ESP': phases_tmp[0], 'Slice': d}
        phases[suid][ed_sop] = {'EDP': phases_tmp[1], 'Slice': d}

        for cname, phasesop, phase, base_contname in zip(['rv_endo',     'rv_endo',     'lv_endo',     'lv_endo',     'lv_myo',      'lv_myo'], 
                                                         [es_sop,        ed_sop,        es_sop,        ed_sop,        es_sop,        ed_sop],
                                                         [phases_tmp[0], phases_tmp[1], phases_tmp[0], phases_tmp[1], phases_tmp[0], phases_tmp[1]], 
                                                         list(base_idxs.keys())):
            if base_idxs[base_contname]==-1:
                if eva.get_anno(d,phase).has_contour(cname): phases[suid][phasesop][cname] = 'Basal'; base_idxs[base_contname]=d
                else: phases[suid][phasesop][cname+' pos'] = 'Out Basal'
            else:
                if eva.get_anno(d,phase).has_contour(cname): # then MIDV or apic
                    if d==eva.nr_slices-1 or not eva.get_anno(d+1,phase).has_contour(cname): phases[suid][phasesop][cname+' pos'] = 'Apex' # apical
                    else: phases[suid][phasesop][cname+' pos'] = 'Midventricle'
                else: 
                    phases[suid][phasesop][cname+' pos'] = 'Out Apex'
        

# get imgs
imgs_st1 = {sop: quad.dcm_coll.find_one({'sop': sop}) for sop in sops_st1}
# get annos
annos_st1 = {a['sop']:a for a in quad.anno_coll.find({'task_id': gold_id, 'sop': {'$in': sops_st1}})}

In [None]:
# limit to the sops with annotations and images
sops_st  = sops_st1 #set([k for k in annos_st1.keys()]).union(set([i['sop'] for i in imgs_st1.values() if i is not None]))
imgs_st  = {s:i for s,i in imgs_st1.items() if s in sops_st}
annos_st = {k:a for k,a in annos_st1.items() if k in sops_st}
print('all sops: ', len(sops_st), '\nannos:\t', len(annos_st), '\nimages: ', len(imgs_st))

In [None]:
# resort all to  -- dict: {studyuid -> {sop -> object}} -- 

In [None]:
# resorting imgs
imgs = {img['studyuid']:dict() for img in imgs_st.values()}
for img in imgs_st.values(): imgs[img['studyuid']][img['sop']] = img
print(len(imgs))

In [None]:
# resorting annos
annos = {a['studyuid']:dict() for a in annos_st.values()}
for a in annos_st.values(): annos[a['studyuid']][a['sop']] = a
print(len(annos))

In [None]:
# get bounding boxes
# add es, ed, postion + geomname
c = 0
bbs = {suid:dict() for suid in imgs.keys()}
for suid in bbs.keys():
    for sop in imgs[suid].keys():
        bbs[suid][sop] = dict()
        heart_shape = Polygon()
        for cname in ['lv_myo', 'lv_epi', 'rv_endo']:
            try: heart_shape = heart_shape.union(shape(annos[suid][sop][cname]['cont'])) # here it is still 'cont'
            except: pass#print(traceback.format_exc()); continue
        for scale_f in [1.0, 1.5, 2.0, 2.5, 3.0, 3.5]:
            # get / merge all shapes available
            xmin,  ymin,  xmax,  ymax = scale(heart_shape, xfact=scale_f, yfact=scale_f, origin='center').bounds
            #if np.isnan(xmin): xmin,  ymin,  xmax,  ymax = np.nan, np.nan, np.nan, np.nan
            if np.isnan(xmin): xmin,  ymin,  xmax,  ymax = "","","",""
            bbs[suid][sop]['bounding_box_scale_'+str(scale_f)] = [xmin, xmax, ymin, ymax]
        bbs[suid][sop] |= phases[suid][sop]

In [None]:
# copy all over into their respective folders

In [None]:
annos_folder = os.path.join(dataset_path, 'Gold')
for suid_i, suid in enumerate(annos.keys()):
    anno_folder = os.path.join(annos_folder, suid)
    Path(anno_folder).mkdir(parents=True, exist_ok=True)
    for sop in annos[suid].keys():
        anno = annos[suid][sop]
        try:    anno.pop('_id'); anno.pop('task_id'); anno.pop('studyuid'); anno.pop('sop')
        except: continue; #print(traceback.format_exc()); continue
        try:
            for k in anno.keys():
                if 'cont' in anno[k].keys(): anno[k]['geom'] = anno[k].pop('cont')
        except: continue; #print(traceback.format_exc()); continue
        anno_path = os.path.join(anno_folder, sop+'.json')
        with open(anno_path, "w") as outfile: 
            json.dump(anno, outfile, indent=4)

In [None]:
imgs_folder = os.path.join(dataset_path, 'Imgs')
for suid in imgs.keys(): # annos on purpose (we only want the images with contours)
    img_folder = os.path.join(imgs_folder, suid)
    Path(img_folder).mkdir(parents=True, exist_ok=True)
    for sop in imgs[suid].keys(): # annos on purpose (we only want the images with contours)
        img      = imgs[suid][sop]
        shutil.copyfile(img['path'], os.path.join(img_folder, sop+'.dcm'))

In [None]:
# create additional info files (contain bounding boxes)

In [None]:
additional_info_folder = os.path.join(dataset_path, 'Additional_Info')
for suid in bbs.keys():
    ainfo_folder = os.path.join(additional_info_folder, suid)
    Path(ainfo_folder).mkdir(parents=True, exist_ok=True)
    for sop in bbs[suid].keys():
        ainfo_path = os.path.join(ainfo_folder, sop+'.json')
        with open(ainfo_path, "w") as outfile:
            json.dump(bbs[suid][sop], outfile, indent=4)