In [None]:
# hci4ai
# saving to disk 1000 jpeg x-ray images from the training set:
# - 500 classified as pneumonia
# - 500 classified as normal
# all 1000 images are selected so that the model (model__2023-04-26__22-42-58) correcly classifies them (true label = predicted label)

In [1]:
import re
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

###############################################################################

from google.colab import drive
drive.mount('/content/drive')
%cd '/content/drive/My Drive/marco_colab_data/hci4ai'

# load model
model_name = 'model__2023-04-26__22-42-58'
model = tf.keras.models.load_model(model_name)

###############################################################################

AUTOTUNE = tf.data.AUTOTUNE
IMAGE_SIZE = [180, 180]

###############################################################################

def get_label(file_path):
    # convert the path to a list of path components
    parts = tf.strings.split(file_path, "/")
    # The second to last is the class-directory
    return parts[-2] == "PNEUMONIA"


def decode_img(img):
    # convert the compressed string to a 3D uint8 tensor
    img = tf.image.decode_jpeg(img, channels=3)
    # resize the image to the desired size.
    return tf.image.resize(img, IMAGE_SIZE)


def process_path(image, path):
    label = get_label(path)
    # load the raw data from the file as a string
    img = decode_img(image)
    return img, label

###############################################################################

train_images = tf.data.TFRecordDataset(
    "gs://download.tensorflow.org/data/ChestXRay2017/train/images.tfrec"
)
train_paths = tf.data.TFRecordDataset(
    "gs://download.tensorflow.org/data/ChestXRay2017/train/paths.tfrec"
)

ds = tf.data.Dataset.zip((train_images, train_paths))

ds = ds.map(process_path, num_parallel_calls=AUTOTUNE)

###############################################################################

%cd '/content/drive/My Drive/marco_colab_data/hci4ai/export'

import PIL

def save_image(x, label, count):
  x = np.array(x, dtype=np.uint8)
  x = x[:, :, 0]
  x = PIL.Image.fromarray(x)
  x.save(label + '_' + str(count) + '.jpg')

###############################################################################

def predicted_pneumonia(image):
  x = tf.expand_dims(image, axis=0)
  prediction = model.predict(x)[0]
  return prediction > 0.5

###############################################################################

p_count = 0
n_count = 0

tot = 500 - 1

for image, label in ds:
  if label:
    if p_count <= tot and predicted_pneumonia(image):
      save_image(image, 'p', p_count)
      p_count += 1
  else:
    if n_count <= tot and not predicted_pneumonia(image):
      save_image(image, 'n', n_count)
      n_count += 1
  if p_count > tot and n_count > tot:
    break
  
###############################################################################

drive.flush_and_unmount()
print('All changes made in this colab session should now be visible in Drive.')



Mounted at /content/drive
/content/drive/My Drive/marco_colab_data/hci4ai
/content/drive/My Drive/marco_colab_data/hci4ai/export
All changes made in this colab session should now be visible in Drive.
