In [0]:
# Run this cell to mount your Google Drive.
from google.colab import drive
drive.mount('/content/drive')

In [0]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import cv2
import numpy as np

In [0]:
data_gen = ImageDataGenerator()

In [0]:
#define a function to load images from the given path using opencv and resize it to the given target width and height
def get_data(path, width, height, label):
  data = []
  labels = []

  for image in os.listdir(path):
    img = cv2.imread(os.path.join(path, image))
    data.append(cv2.resize(img, (width, height)))
    labels.append(label)
    
  data = np.array(data).astype(np.float32)
  labels = np.array(labels).astype(np.float32)
  return data, labels

In [0]:
#define path for training data and also give label to it
train_normal_path = "./drive/My Drive/Data/chest x-ray dataset/chest_xray/train/NORMAL"
train_pneumonia_path = "./drive/My Drive/Data/chest x-ray dataset/chest_xray/train/PNEUMONIA"

X_train_normal_data, y_train_normal_label = get_data(train_normal_path, 224, 224, 0.)
X_train_pneumonia_data, y_train_pneumonia_label = get_data(train_pneumonia_path, 224, 224, 1.)

In [0]:
#display training data info to check if it is loaded fully or not
print("Total NORMAL Train Images: ", len(X_train_normal_data))
print("Shape of NORMAL Train Images: ", X_train_normal_data[0].shape)
print("Output for NORMAL Train Images: ", y_train_normal_label[0])

print("+-----------------------+")

#check PNEUMONIA training data
print("Total PNEUMONIA Train Images: ", len(X_train_pneumonia_data))
print("Shape of PNEUMONIA Train Images: ", X_train_pneumonia_data[0].shape)
print("Output for PNEUMONIA Train Images: ", y_train_pneumonia_label[0])

print("+-----------------------+")

print("Total training images: ", (len(X_train_normal_data) + len(X_train_pneumonia_data)))

Total NORMAL Train Images:  1261
Shape of NORMAL Train Images:  (224, 224, 3)
Output for NORMAL Train Images:  0.0
+-----------------------+
Total PNEUMONIA Train Images:  3795
Shape of PNEUMONIA Train Images:  (224, 224, 3)
Output for PNEUMONIA Train Images:  1.0
+-----------------------+
Total training images:  5056


In [0]:
#concatenate output training data arrays to make one training array, consist of features and labels

train_input = np.concatenate((X_train_normal_data, X_train_pneumonia_data), axis=0)
train_output = np.concatenate((y_train_normal_label, y_train_pneumonia_label), axis=0)

print("Total output labels: ", len(train_output))
print("NORMAL output labels: ", train_output[len(X_train_normal_data) - 1])
print("PNEUMONIAL output labels: ", train_output[len(X_train_normal_data)])

Total output labels:  5056
NORMAL output labels:  0.0
PNEUMONIAL output labels:  1.0


In [0]:
#feed final training data array to keras.preprocessing.image ImageDataGenerator
train_data = data_gen.flow(x=train_input, y=train_output, batch_size=32)

In [0]:
#checking input shape
X, y = train_data.next()
print("Input data shape: ", X[0].shape)
print('Batch shape=%s, min=%.3f, max=%.3f' % (X.shape, X.min(), X.max()))
print("sample output: ", y[0])
print("sample output: ", y[1])
print("sample output: ", y[2])
print("sample output: ", y[3])
print("sample output: ", y[4])
print("sample output: ", y[5])
print("sample output: ", y[6])
print("sample output: ", y[8])
print("sample output: ", y[9])
print("sample output: ", y[10])
print("sample output: ", y[11])
print("sample output: ", y[12])
print("sample output: ", y[13])
print("sample output: ", y[14])

Input data shape:  (224, 224, 3)
Batch shape=(32, 224, 224, 3), min=0.000, max=255.000
sample output:  0.0
sample output:  0.0
sample output:  1.0
sample output:  1.0
sample output:  0.0
sample output:  1.0
sample output:  1.0
sample output:  1.0
sample output:  0.0
sample output:  1.0
sample output:  0.0
sample output:  1.0
sample output:  0.0
sample output:  1.0
