In [1]:
import os
import sys
import shutil
import numpy as np
import pandas as pd
from workspace import paths_get, paths_join

sys.path.insert(1, '../mdai/')

import warnings
warnings.filterwarnings('ignore')

## Mapping filenames from dataset with the labels

the id numbers on md.AI are different from the OCT id numbers - so we're matching them on the last two values of SOPInstanceUID

In [4]:
meta_data = pd.read_csv("../mdai/mdai_labelled_data_meta.csv")

mdai_to_odd_dict = [
    [1, 'ODD-245_OS', '74', '73'],
    [2, 'ODD-123_OS', '10', '43'],
    [3, 'ODD-227_OS', '87', '51'],
    [5, 'ODD-123_OD', '37', '46'],
    [6, 'ODD-227_OD', '03', '81'],
    [7, 'ODD-231_OS', '52', '13'],
]  # created manually

mdai_to_odd_map = pd.DataFrame(
    mdai_to_odd_dict, columns=['Exam_id', 'ODD_id', 'Series_id', 'SOP_id'])

''' This is a mapper made from the last two Series/SOP values so that we know which segmentation maps onto which training data '''

meta_data['SOP_id'] = meta_data['SOPInstanceUID'].apply(lambda x: x[-2:])
meta_data = pd.merge(meta_data, mdai_to_odd_map, on='SOP_id', how='left')
meta_data['Slice_ODD_id'] = meta_data['frameNumber'].astype(str) + '_' + meta_data['ODD_id']

''' Only some OCT B Scans were labelled, so we match not just the ODD-id but the slice numbers too. '''

display(meta_data.iloc[:5,:5].head()) 

meta_data.to_csv('../data/mdai-odd/id_map.csv', index=False)

Unnamed: 0,id,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,labelName
0,A_Km5eJK,1.2.826.0.1.3680043.8.498.18322454431392330409...,1.2.826.0.1.3680043.8.498.26876949399425251869...,1.2.826.0.1.3680043.8.498.70189596488780155609...,Drusen
1,A_K0a9yV,1.2.826.0.1.3680043.8.498.18322454431392330409...,1.2.826.0.1.3680043.8.498.26876949399425251869...,1.2.826.0.1.3680043.8.498.70189596488780155609...,Drusen
2,A_VovNDK,1.2.826.0.1.3680043.8.498.18322454431392330409...,1.2.826.0.1.3680043.8.498.26876949399425251869...,1.2.826.0.1.3680043.8.498.70189596488780155609...,Drusen
3,A_gjq2PL,1.2.826.0.1.3680043.8.498.18322454431392330409...,1.2.826.0.1.3680043.8.498.26876949399425251869...,1.2.826.0.1.3680043.8.498.70189596488780155609...,Drusen
4,A_KAkM1L,1.2.826.0.1.3680043.8.498.18322454431392330409...,1.2.826.0.1.3680043.8.498.26876949399425251869...,1.2.826.0.1.3680043.8.498.70189596488780155609...,Drusen


## Store and rename image files.
We're taking .png files that are in PNG_dir, and storing them in a subfolder 'images_with_masks'.

In [7]:
base_dir, data_dir, _, _, _ = paths_get('oct2l')
png_dir = paths_join(data_dir, 'project-files-png')
img_dir = 'images_with_masks'
img_mdai_dir = 'images_with_masks_mdai'

# -- make folder if it doesn't already exist

if ~os.path.exists(paths_join(png_dir, img_dir)):
    try:
        os.mkdir(paths_join(png_dir, img_dir))
    except FileExistsError:
        print("'~/%s' folder already exists, can proceed." % img_dir)

src_dir = png_dir
dst_dir = paths_join(png_dir, img_dir)

for slice_odd_id in meta_data.Slice_ODD_id.unique():
    fnames = [fname for fname in os.listdir(png_dir) if slice_odd_id in fname]
    src_fnames = [os.path.join(src_dir,fname) for fname in fnames]
    dst_fnames = [os.path.join(dst_dir,fname) for fname in fnames]
    src_to_dst = dict(zip(src_fnames, dst_fnames))
    for src, dst in src_to_dst.items():
        shutil.move(src, dst)   # move over.

bdots
'~/images_with_masks' folder already exists, can proceed.


These images are then renamed so that they match the masks/labels data from mdai 'images_with_masks_mdai'.

In [8]:
new_src_dir = paths_join(png_dir, img_dir)
new_dst_dir = paths_join(png_dir, img_mdai_dir)  # mdai_ids

for this_png in os.listdir(new_src_dir):
    slice_odd_id = os.path.splitext(this_png)[0]
    mdai_ids = meta_data[meta_data['Slice_ODD_id']==slice_odd_id]['id']
    # there can be multiple mdai_ids to one slice_odd_id!
    for mdai_id in mdai_ids:
        src = paths_join(new_src_dir, this_png)
        dst = paths_join(new_dst_dir, (mdai_id + '.png'))
        shutil.copy(src, dst)   # impt: use shutil.copy! if not it'll move over after the first instance and the programme won't be able to find the img for subsequent labels.

Now, you should have:
- /data_dir/png_dir/img_dir/      -> images with Slice_ODD_id
- /data_dir/png_dir/img_mdai_dir/ -> images with mdai_id