In [0]:
import pydicom
import os
import numpy as np
import pandas as pd
from matplotlib import cm
from matplotlib import pyplot as plt
import cv2
import seaborn as sns
from tqdm import tqdm
import xml.etree.cElementTree as ET
import math

In [0]:
def plt_show(image, figsize = (10,10)):
  plt.figure(figsize = figsize)
  plt.imshow(image, cmap=plt.cm.bone)
  plt.show()

In [0]:
train_folder = 'train_images/'
train_labels_folder = 'Dataset/train_labels.csv'
train_img_folder = 'Dataset/train/Pneumonia'
test_img_folder = 'Dataset/test/Pneumonia'

In [0]:
train_data = []
class_df = pd.read_csv(train_labels_folder)

In [0]:
counter = 0
for file_name in tqdm(os.listdir(train_folder)):
  file_path = os.path.join(train_folder, file_name)
  data = pydicom.dcmread(file_path)
  rows = class_df[class_df.patientId == data.PatientID]
  #cv2.imwrite(train_img_folder + str(data.PatientID) + '.jpg', data.pixel_array)
  print()
    
  for patientId, x, y, width, height, Target in rows.values:
    if Target == 1:
      show_dcm_info(data)
      show_class_df(patientId, x, y, width, height, Target)

      suspicious_area = cv2.rectangle(data.pixel_array,
                                      ((int(x)),(int(y))),
                                      ((int(x))+(int(width)),(int(y))+(int(height))),
                                      (0,255,0),2)
      plt_show(suspicious_area, (5,5))
  
  print()
  if counter == 20:
    break
  counter +=1

In [0]:
def show_class_df(patientId, x, y, width, height, Target):
  print("Patient Id ....:", patientId)
  print("x ....:", x)
  print("y ....:", y)
  print("width.....:", width)
  print("height....:", height)
  print("Target....:", Target)

In [0]:
def show_dcm_info(data):
  print("Filename.........:", file_path)
  print("Storage type.....:", data.SOPClassUID)

  pat_name = data.PatientName
  display_name = pat_name.family_name + ", " + pat_name.given_name
  print("Patient's name......:", display_name)
  print("Patient id..........:", data.PatientID)
  print("Patient's Age.......:", data.PatientAge)
  print("Patient's Sex.......:", data.PatientSex)
  print("Modality............:", data.Modality)
  print("Body Part Examined..:", data.BodyPartExamined)
  print("View Position.......:", data.ViewPosition)

  if 'PixelData' in data:
    rows = int(data.Rows)
    cols = int(data.Columns)
    print("Image size ....:{rows:d} x {cols:d}, {size:d} bytes".format(
        rows=rows, cols=cols, size=len(data.PixelData)))
    if 'PixelSpacing' in data:
      print("Pixel spacing ...:", data.PixelSpacing)

In [0]:
def create_xml_file(target_rows, folder="train"): 
    root = ET.Element("annotation")
    folder = ET.SubElement(root, "folder").text = folder
    filename = ET.SubElement(root, "filename").text = target_rows[0][0]['patientId'] + '.jpg'
    path = ET.SubElement(root, "path").text = os.getcwd() + '\\images\\train\\' + target_rows[0][0]['patientId'] + '.jpg'
    source = ET.SubElement(root, "source")
    database = ET.SubElement(source, "database").text = "Unknown"
    size = ET.SubElement(root, "size")
    width = ET.SubElement(size, "width").text = "1024"
    height = ET.SubElement(size, "height").text = "1024"
    depth = ET.SubElement(size, "depth").text = "1"
    segmented = ET.SubElement(root, "segmented").text = "0"
    
    for rows in target_rows:
        for i, row in enumerate(rows):
            _object = ET.SubElement(root, "object")
            name = ET.SubElement(_object, "name").text = "Pneumonia"
            pose = ET.SubElement(_object, "pose").text = "Unspecified"
            truncated = ET.SubElement(_object, "truncated").text = "0"
            difficult = ET.SubElement(_object, "difficult").text = "0"
            bndbox = ET.SubElement(_object, "bndbox")                   
            xmin = ET.SubElement(bndbox, "xmin").text = str(int(row['x']))
            ymin = ET.SubElement(bndbox, "ymin").text = str(int(row['y']))
            xmax = ET.SubElement(bndbox, "xmax").text = str(int(row['x']) + int(row['width']))
            ymax = ET.SubElement(bndbox, "ymax").text = str(int(row['y']) + int(row['height']))

    tree = ET.ElementTree(root)
    tree.write(os.getcwd() + '/images/train/' + target_rows[0][0]['patientId'] + '.xml')

In [0]:
train_data = []
class_df = pd.read_csv(train_labels_folder)

In [0]:
counter1 = 0
target_rows = []
_row = []
for file_name in tqdm(os.listdir(train_folder)):
  file_path = os.path.join(train_folder, file_name)
  data = pydicom.dcmread(file_path)
  rows = class_df[class_df.patientId == data.PatientID]
  _row = []
  target_rows = []
  for patientId, x, y, width, height, Target in rows.values:
    if Target == 1:
      _row.append({'patientId': patientId,
                                'x': x,
                                'y': y,
                                'width': width,
                                'height': height,
                                'Target': Target})
      cv2.imwrite(train_img_folder + str(patientId) + '.jpg', data.pixel_array)
  if len(_row) != 0:
    target_rows.append(_row)
    create_xml_file(target_rows)