In [1]:
%matplotlib inline

import pandas as pd
import cv2
import os
from glob import glob
import pydicom as dicom
import numpy as np
import shutil
from matplotlib import pyplot as plt

OS_ENV = 'LINUX' # 'LINUX' or 'WIN'

WIDTH = 1400
HEIGHT = 1750

In [2]:
if OS_ENV == 'LINUX':
    _base_path = "/home/huray/data/NCC"
else:
    _base_path = "D:/Work/NCC/data/NCC"
    
dicom_base_path = os.path.join(_base_path, "dicom")
jpg_base_path = dicom_base_path.replace('dicom', 'img_retinanet')

dcmtojpg_img_path = []
errored_data_counter = 0
files_with_wrong_size = []

jpg_sub_path = 'dicom_to_jpg'
img_save_path = os.path.join(jpg_base_path, jpg_sub_path)

if not os.path.exists(img_save_path):
    os.makedirs(img_save_path)

files = sorted(glob(dicom_base_path+"/**/*.dcm", recursive=True))
print(files[:5])

['/home/huray/data/NCC/dicom/abn_1/_ACD_/00000/0001.dcm', '/home/huray/data/NCC/dicom/abn_1/_ACD_/00000/0002.dcm', '/home/huray/data/NCC/dicom/abn_1/_ACD_/00000/0003.dcm', '/home/huray/data/NCC/dicom/abn_1/_ACD_/00000/0004.dcm', '/home/huray/data/NCC/dicom/abn_1/_ACD_/00001/0001.dcm']


In [3]:
def dicom2jpg(f_path):
    mammo_dcm = dicom.read_file(f_path)

    if os.path.getsize(mammo_path)/(1024*1024) <= 1: # dcm파일이 1메가보다 작으면 pass
        files_with_wrong_size.append(mammo_path)
        print('DCM file is too small ' + mammo_path)
        return []

    try:
        mammo_arr = mammo_dcm.pixel_array
        mammo_arr = mammo_arr.astype(np.uint16)
    except AttributeError: # 종종 파일 자체가 문제가 있는 경우 있음.
        try:
            pixel_data = mammo_dcm[0x7fe0,0x0010].value # 파일 자체에 저장된 pixel_data값
            rows = mammo_dcm[0x0028, 0x0010].value # metadata로 들어있는 row
            cols = mammo_dcm[0x0028, 0x0011].value # metadata로 들어있는col

            mammo_arr = np.fromstring(pixel_data[:-1], dtype=np.uint16)
            mammo_arr = np.reshape(mammo_arr, (rows, cols))
        except ValueError:
            print('corrupted file: ' + mammo_path[:70])
            errored_data_counter += 1
            return []
        else:
            print('Attribute error" ' + mammo_path[:70])
    except Exception as e:
        print('different error: ' + mammo_path[:70])
        print(e)
        errored_data_counter += 1
        raise
        return []

    mammo_arr_final_ = (mammo_arr - np.amin(mammo_arr))/(np.amax(mammo_arr) - np.amin(mammo_arr)) * 255
    mammo_arr_final_ = mammo_arr_final_.astype(np.uint8)
    mammo_arr_final_ = cv2.resize(mammo_arr_final_, (WIDTH, HEIGHT))
    mammo_arr_final_ = np.asarray(np.dstack((mammo_arr_final_, mammo_arr_final_, mammo_arr_final_)), dtype=np.uint8)

    return mammo_arr_final_

In [4]:
for mammo_path in files:
    
    mammo_arr_final = dicom2jpg(mammo_path)
    
    if len(mammo_arr_final) <= 0:
        continue

    prefix_fname = ''
    if 'abn_1' in mammo_path:
        prefix_fname = 'abn1'
    elif 'abn_2' in mammo_path:
        prefix_fname = 'abn2'
    elif 'normal' in mammo_path:
        prefix_fname = ''
    else:
        raise
        
        
    _split = mammo_path.split('/')
    img_file_name = '{}-{}-{}'.format(_split[-3], _split[-2], _split[-1].replace('.dcm', ''))

    mammo_jpg_path = os.path.join(img_save_path, prefix_fname+img_file_name+'.jpg')
    cv2.imwrite(mammo_jpg_path, mammo_arr_final)

    dcmtojpg_img_path.append(mammo_jpg_path)
            
print("ERRORED DATA COUNT: ", errored_data_counter)
print("FILES WITH WRONG SIZE COUNT: ", len(files_with_wrong_size))
print("FILES WITH WRONG SIZE: ", files_with_wrong_size)

_data_df = pd.DataFrame({'img_path':dcmtojpg_img_path, 'x1':'', 'y1':'', 'x2':'', 'y2':'', 'class_name':''})
_data_df = _data_df[['img_path', 'x1', 'y1', 'x2', 'y2', 'class_name']]

print('DATA COUNTS: ', len(_data_df))

_data_df.to_csv(jpg_base_path+'/dicom2jpg_data.csv', header=False, index=False)

DCM file is too small /home/huray/data/NCC/dicom/normal/normal/00027/30001.dcm
DCM file is too small /home/huray/data/NCC/dicom/normal/normal/00027/30002.dcm
DCM file is too small /home/huray/data/NCC/dicom/normal/normal/00027/30003.dcm
DCM file is too small /home/huray/data/NCC/dicom/normal/normal/00027/30004.dcm
DCM file is too small /home/huray/data/NCC/dicom/normal/normal/00033/20001.dcm
DCM file is too small /home/huray/data/NCC/dicom/normal/normal/00033/20002.dcm
DCM file is too small /home/huray/data/NCC/dicom/normal/normal/00033/20003.dcm
DCM file is too small /home/huray/data/NCC/dicom/normal/normal/00033/20004.dcm
DCM file is too small /home/huray/data/NCC/dicom/normal/normal/00059/10001.dcm
DCM file is too small /home/huray/data/NCC/dicom/normal/normal/00059/10002.dcm
DCM file is too small /home/huray/data/NCC/dicom/normal/normal/00059/10003.dcm
DCM file is too small /home/huray/data/NCC/dicom/normal/normal/00059/10004.dcm
DCM file is too small /home/huray/data/NCC/dicom/nor