In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import imread
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow_hub as hub


In [None]:
normal_list = os.listdir("/content/drive/My Drive/ChestXRays/NORMAL")
diseased_list = os.listdir("/content/drive/My Drive/ChestXRays/PNEUMONIA")
nzeros = [0]*len(normal_list)
dones = [1]*len(diseased_list)

xray_images = normal_list + diseased_list
xrays = np.array(xray_images)
classification_values = nzeros+dones
target = np.array(classification_values)
len(xrays), len(target)


(5216, 5216)

In [None]:
tablet = pd.DataFrame({"Xrays":xrays, "target":target})
tablet.head()

Unnamed: 0,Xrays,target
0,NORMAL2-IM-0815-0001.jpeg,0
1,NORMAL2-IM-0819-0001.jpeg,0
2,NORMAL2-IM-0824-0001.jpeg,0
3,NORMAL2-IM-0832-0001.jpeg,0
4,NORMAL2-IM-0822-0001.jpeg,0


In [None]:
table = tablet.sample(frac=1).reset_index(drop=True)
table.head()

Unnamed: 0,Xrays,target
0,IM-0317-0001.jpeg,0
1,person1486_bacteria_3885.jpeg,1
2,IM-0413-0001.jpeg,0
3,NORMAL2-IM-0403-0001.jpeg,0
4,person502_bacteria_2118.jpeg,1


In [None]:
table = pd.read_csv("/content/drive/My Drive/ChestXRays/table.csv")
table.head()

Unnamed: 0,Xrays,target
0,person1224_virus_2074.jpeg,1
1,person354_bacteria_1634.jpeg,1
2,person472_bacteria_2008.jpeg,1
3,IM-0691-0001.jpeg,0
4,NORMAL2-IM-0589-0001.jpeg,0


In [None]:
filenames = ["/content/drive/My Drive/ChestXRays/XRays/" + fname for fname in table["Xrays"]]
len(filenames)
labels = table["target"].to_numpy()
binary_labels = [label == [1,0] for label in labels]
labels[4], binary_labels[4]

(0, array([False,  True]))

In [None]:
X= filenames
y= binary_labels

In [None]:
  image1 = tf.io.read_file(X[2])
  image1 = tf.image.decode_jpeg(image1, channels=1)
  image1 = tf.image.convert_image_dtype(image1, tf.float32)
  image1.shape

TensorShape([680, 1120, 1])

In [None]:
NUM_IMAGES = 1000 #@param {type:"slider", min:1000, max:5000, step:500}

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X[:NUM_IMAGES],y[:NUM_IMAGES], test_size = 0.2, random_state=42)


In [None]:
IMG_SIZE = 256
def tensorify(image_path):
  """
  turn an image into a tensor
  """
  image = tf.io.read_file(image_path)
  image = tf.image.decode_jpeg(image, channels=3)
  image = tf.image.convert_image_dtype(image, tf.float32)
  image = tf.image.resize(image, size=[IMG_SIZE, IMG_SIZE])
  return image


def get_image_label(image_path, label):
  """
  gives label
  """
  image = tensorify(image_path)
  return image, label



In [None]:

BATCH_SIZE = 32

# Create a function to turn data into batches


def create_data_batches(X, y=None, batch_size=BATCH_SIZE, valid_data=False, test_data=False):
  """
  Creates batches of data out of image (X) and label (y) pairs.
  Shuffles the data if it's training data but doesn't shuffle if it's validation data.
  Also accepts test data as input (no labels).
  """
  # If the data is a test dataset, we probably don't have have labels
  if test_data:
    print("Creating test data batches...")
    data = tf.data.Dataset.from_tensor_slices((tf.constant(X))) # only filepaths (no labels)
    data_batch = data.map(process_image).batch(BATCH_SIZE)
    return data_batch
  
  # If the data is a valid dataset, we don't need to shuffle it
  elif valid_data:
    print("Creating validation data batches...")
    data = tf.data.Dataset.from_tensor_slices((tf.constant(X), # filepaths
                                               tf.constant(y))) # labels
    data_batch = data.map(get_image_label).batch(BATCH_SIZE)
    return data_batch

  else:
    print("Creating training data batches...")
    # Turn filepaths and labels into Tensors
    data = tf.data.Dataset.from_tensor_slices((tf.constant(X),
                                               tf.constant(y)))
    # Shuffling pathnames and labels before mapping image processor function is faster than shuffling images
    data = data.shuffle(buffer_size=len(X))

    # Create (image, label) tuples (this also turns the iamge path into a preprocessed image)
    data = data.map(get_image_label)

    # Turn the training data into batches
    data_batch = data.batch(BATCH_SIZE)
  return data_batch
  

In [None]:
train_data = create_data_batches(X_train, y_train)
test_data = create_data_batches(X_test, y_test)

Creating training data batches...
Creating training data batches...


In [None]:
train_data.element_spec

(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.float32, name=None),
 TensorSpec(shape=(None, 2), dtype=tf.bool, name=None))

In [None]:
INPUT_SHAPE = [None, IMG_SIZE, IMG_SIZE, 3]
OUTPUT_SHAPE = 2
MODEL_URL = "https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4"

In [None]:
def create_model(input_shape=INPUT_SHAPE, output_shape=OUTPUT_SHAPE, model_url=MODEL_URL):
  print("Building model with:", MODEL_URL)

  model = tf.keras.Sequential([
    hub.KerasLayer(MODEL_URL), # Layer 1 (input layer)
    tf.keras.layers.Dense(units=OUTPUT_SHAPE,
                          activation="sigmoid") # Layer 2 (output layer)
  ])

  # Compile the model
  model.compile(
      loss=tf.keras.losses.BinaryCrossentropy(),
      optimizer=tf.keras.optimizers.Adam(),
      metrics=[tf.keras.metrics.AUC(
        name='auc_precision_recall', curve='PR', num_thresholds=10000)]
  )

  # Build the model
  model.build(INPUT_SHAPE)

  return model

In [None]:
model = create_model()
model.summary()

Building model with: https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer_1 (KerasLayer)   (None, 2048)              23564800  
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 4098      
Total params: 23,568,898
Trainable params: 4,098
Non-trainable params: 23,564,800
_________________________________________________________________


In [None]:
NUM_EPOCHS = 30 #@param {type:"slider", min:10, max:100, step:10}

In [None]:
%load_ext tensorboard
early_stopping = tf.keras.callbacks.EarlyStopping(monitor="auc_precision_recall",
                                                  patience=3)
def train_model():
  """
  Trains a given model and returns the trained version.
  """
  model = create_model()
  model.fit(x=train_data,
            epochs=NUM_EPOCHS,
            validation_data=test_data,
            validation_freq=1,
            callbacks=[early_stopping])
  return model

model = train_model()

Building model with: https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30


In [None]:

# Create a function to save a model
def save_model(model, suffix=None):
  """
  Saves a given model in a models directory and appends a suffix (string).
  """

  model_path = "/content/drive/My Drive/ChestXRays" + "/" + suffix + ".h5" 
  print(f"Saving model to: {model_path}...")
  model.save(model_path)
  return model_path

save_model(model, suffix="PneumoniaChestXRayClassifier")

Saving model to: /content/drive/My Drive/ChestXRays/PneumoniaChestXRayClassifier.h5...


'/content/drive/My Drive/ChestXRays/PneumoniaChestXRayClassifier.h5'

In [None]:
ext_val1 = X[NUM_IMAGES+1:NUM_IMAGES+1000];
ext_val1

def process_image(image_path, img_size=IMG_SIZE):
  """
  Takes an image file path and turns the image into a Tensor.
  """
  # Read in an image file
  image = tf.io.read_file(image_path)
  # Turn the jpeg image into numerical Tensor with 3 colour channels (Red, Green, Blue)
  image = tf.image.decode_jpeg(image, channels=3)
  # Convert the colour channel values from 0-255 to 0-1 values
  image = tf.image.convert_image_dtype(image, tf.float32)
  # Resize the image to our desired value (256, 256)
  image = tf.image.resize(image, size=[IMG_SIZE, IMG_SIZE])

  return image

val1 = create_data_batches(ext_val1, test_data = True)









Creating test data batches...


In [None]:
r1 = model.predict(val1)
r11 = []
for i in range(len(r1)):
  if r1[i][0] > r1[i][1] :
    r11.append(1)
  else:
    r11.append(0)


In [None]:
r2 = y[NUM_IMAGES+1:NUM_IMAGES+1000]
r21 = []
for i in range(len(r2)):
  if r2[i][0] > r2[i][1] :
    r21.append(1)
  else:
    r21.append(0)

print(r11)
print(r21)




[0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 

In [None]:
l1 = []

for i in range(len(r11)):
  if r11[i] == r21[i]:
    l1.append(True)
  else:
    l1.append(False)

l1

count = 0

for i in range(len(l1)):
  if l1[i] == False:
    count += 1

count


57