In [3]:
import tensorflow as tf
# print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
tf.test.gpu_device_name()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

In [1]:
import os

normal_scan_paths = [
    os.path.join(os.getcwd(), "data/studies/CT-0", x)
    for x in os.listdir("data/studies/CT-0")
]

abnormal_scan_paths = []
for i in range(1, 5):
    abnormal_scan_paths.extend([
            os.path.join(os.getcwd(), f"data/studies/CT-{i}", x)
            for x in os.listdir(f"data/studies/CT-{i}")
        ])

print("CT scans with normal lung tissue: " + str(len(normal_scan_paths)))
print("CT scans with abnormal lung tissue: " + str(len(abnormal_scan_paths)))

CT scans with normal lung tissue: 254
CT scans with abnormal lung tissue: 856


In [2]:
import numpy as np
from tqdm import tqdm
from preprocess import process_scan

# Read and process the scans.
# Each scan is resized across height, width, and depth and rescaled.
try:
    abnormal_scans = np.load("data/resized/abnormal.npy")
    normal_scans = np.load("data/resized/normal.npy")
except OSError:
    abnormal_scans = np.array([process_scan(path) for path in tqdm(abnormal_scan_paths)])
    normal_scans = np.array([process_scan(path) for path in tqdm(normal_scan_paths)])
    np.save("data/resized/abnormal.npy", abnormal_scans)
    np.save("data/resized/normal.npy", normal_scans)

# For the CT scans having presence of viral pneumonia
# assign 1, for the normal ones assign 0.
abnormal_labels = np.array([1 for _ in range(len(abnormal_scans))])
normal_labels = np.array([0 for _ in range(len(normal_scans))])

In [3]:
from sklearn.model_selection import train_test_split

# Split data in the ratio 70-30 for training and validation.
scans = np.concatenate((abnormal_scans, normal_scans), axis=0)
labels = np.concatenate((abnormal_labels, normal_labels), axis=0)

x_train, x_test, y_train, y_test = train_test_split(
    scans, labels, train_size=0.7, random_state=42
)

print(
    "Number of samples in train and validation are %d and %d."
    % (x_train.shape[0], x_test.shape[0])
)

Number of samples in train and validation are 777 and 333.


In [4]:
import random

from scipy import ndimage
import tensorflow as tf


@tf.function
def rotate(volume):
    """Rotate the volume by a few degrees"""

    def scipy_rotate(volume):
        # define some rotation angles
        angles = [-20, -10, -5, 5, 10, 20]
        # pick angles at random
        angle = random.choice(angles)
        # rotate volume
        volume = ndimage.rotate(volume, angle, reshape=False)
        volume[volume < 0] = 0
        volume[volume > 1] = 1
        return volume

    augmented_volume = tf.numpy_function(scipy_rotate, [volume], tf.float32)
    return augmented_volume


def train_preprocessing(volume, label):
    """Process training data by rotating and adding a channel."""
    # Rotate volume
    volume = rotate(volume)
    volume = tf.expand_dims(volume, axis=3)
    return volume, label


def validation_preprocessing(volume, label):
    """Process validation data by only adding a channel."""
    volume = tf.expand_dims(volume, axis=3)
    return volume, label




In [5]:
# Define data loaders.
train_loader = tf.data.Dataset.from_tensor_slices((x_train, y_train))
validation_loader = tf.data.Dataset.from_tensor_slices((x_test, y_test))

batch_size = 2
# Augment the on the fly during training.
train_dataset = (
    train_loader.shuffle(len(x_train))
    .map(train_preprocessing)
    .batch(batch_size)
    .prefetch(2)
)
# Only rescale.
validation_dataset = (
    validation_loader.shuffle(len(x_test))
    .map(validation_preprocessing)
    .batch(batch_size)
    .prefetch(2)
)

In [6]:
import matplotlib.pyplot as plt

data = train_dataset.take(1)
images, labels = list(data)[0]
images = images.numpy()
image = images[0]
print("Dimension of the CT scan is:", image.shape)
plt.imshow(np.squeeze(image[:, :, 30]), cmap="gray")

: 