<a href="https://colab.research.google.com/github/bhuwanupadhyay/codes/blob/main/ipynbs/reshape_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
pip install pydicom

Collecting pydicom
  Downloading pydicom-2.2.2-py3-none-any.whl (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 25.5 MB/s 
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-2.2.2


In [28]:
# Import tensorflow
import logging

import tensorflow as tf
import keras.backend as K

# Helper libraries
import math
import numpy as np
import pandas as pd
import pydicom
import os
import sys
import time

# Imports for dataset manipulation
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator

# Improve progress bar display
import tqdm
import tqdm.auto

tqdm.tqdm = tqdm.auto.tqdm

#tf.enable_eager_execution() #comment this out if causing errors
logger = tf.get_logger()
logger.setLevel(logging.DEBUG)


###             SET MODEL CONFIGURATIONS             ###
# Data Loading
CSV_PATH = 'label_data/CCC_clean.csv'
IMAGE_BASE_PATH = './data/'
test_size_percent = 0.15  # percent of total data reserved for testing

print(IMAGE_BASE_PATH)

# Data Augmentation
mirror_im = False

# Loss
lambda_coord = 5
epsilon = 0.00001

# Learning
step_size = 0.00001
BATCH_SIZE = 5
num_epochs = 1

# Saving
shape_path = 'trained_model/model_shape.json'
weight_path = 'trained_model/model_weights.h5'

# TensorBoard
tb_graph = False
tb_update_freq = 'batch'

###         GET THE DATASET AND PREPROCESS IT        ###

print("Loading and processing data\n")

data_frame = pd.read_csv(CSV_PATH)

"""
Construct numpy ndarrays from the loaded csv to use as training
and testing datasets.
"""
# zip all points for each image label together into a tuple
points = zip(data_frame['start_x'], data_frame['start_y'],
             data_frame['end_x'], data_frame['end_y'])
img_paths = data_frame['imgPath']

def path_to_image(path):
    """
    Load a matrix of pixel values from the DICOM image stored at the
    input path.

    @param path - string, relative path (from IMAGE_BASE_PATH) to
                  a DICOM file
    @return image - numpy ndarray (int), 2D matrix of pixel
                    values of the image loaded from path
    """
    # load image from path as numpy array
    image = pydicom.dcmread(os.path.join(IMAGE_BASE_PATH, path)).pixel_array
    return image


# normalize dicom image pixel values to 0-1 range
def normalize_image(img):
    """
    Normalize the pixel values in img to be withing the range
    of 0 to 1.

    @param img - numpy ndarray, 2D matrix of pixel values
    @return img - numpy ndarray (float), 2D matrix of pixel values, every
                  element is valued between 0 and 1 (inclusive)
    """
    img = img.astype(np.float32)
    img += abs(np.amin(img))  # account for negatives
    img /= np.amax(img)
    return img


# normalize the ground truth bounding box labels wrt image dimensions
def normalize_points(points):
    """
    Normalize values in points to be within the range of 0 to 1.

    @param points - 1x4 tuple, elements valued in the range of 0
                    512 (inclusive). This is known from the nature
                    of the dataset used in this program
    @return - 1x4 numpy ndarray (float), elements valued in range
              0 to 1 (inclusive)
    """
    imDims = 512.0  # each image in our dataset is 512x512
    points = list(points)
    for i in range(len(points)):
        points[i] /= imDims
    return np.array(points).astype(np.float32)


"""
Convert the numpy array of paths to the DICOM images to pixel
matrices that have been normalized to a 0-1 range.
Also normalize the bounding box labels to make it easier for
the model to predict on them.
"""

# apply preprocessing functions
points = map(normalize_points, points)
imgs = map(path_to_image, img_paths)
imgs = map(normalize_image, imgs)

print(list(imgs))

# reshape input image data to 4D shape (as expected by the model)
# and cast all data to np arrays (just in case)
imgs = np.array(imgs)
points = np.array(points)
imgs = imgs.reshape((-1, 512, 512, 1))

./data/
Loading and processing data

[array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)]


ValueError: ignored