<a href="https://colab.research.google.com/github/mateopolancecofficial/DeepLearningProjects/blob/main/ComputerVision/ImageClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [126]:
# set installation txt file with all relevant packages
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import PIL.Image
import pathlib
import shutil
import random
import imghdr

import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [127]:
tf.debugging.set_log_device_placement(True)

if tf.config.list_physical_devices('GPU'):
  if len(tf.config.list_physical_devices('GPU')) > 1:
    strategy = tf.distribute.MirroredStrategy()
    print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
  else:
    print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [128]:
# download images folder
#!wget https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
#from shutil import unpack_archive
#unpack_archive('/content/flower_photos.tgz', '/content/')

In [129]:
# load data parameters

MAX_WIDTH, MAX_HEIGHT = 100, 100
ORIGIN = os.path.dirname(os.path.abspath("ImageClassification.ipynb"))
print(ORIGIN)
SOURCE = 'flower_photos'
#test_path = ''
# available color modes are "grayscale", "rgb", "rgba", 
# (images will be converted to have 1, 3, or 4 channels)
COLOR_MODE = "rgb"
SOURCE_PATH = os.path.join(ORIGIN, SOURCE)
#test_source_folder_path = os.path.join(ORIGIN, test_path)
print(SOURCE_PATH)

BATCH_SIZE = 32
TEST_SPLIT = 0.2
VALIDATION_SPLIT = 0.2
SEED = 100

IMAGE_EXTENSIONS = ['png', 'jpg', 'jpeg', 'tiff', 'bmp', 'gif']


/content
/content/flower_photos


In [130]:
# split dataset on test and train if is not currently

def train_test_split():

  train_path = os.path.join(ORIGIN, SOURCE_PATH)
  test_path = "test_data"
  test_path = os.path.join(ORIGIN, test_path)

  if os.path.exists(test_path):
    shutil.rmtree(test_path)
  os.mkdir(test_path)

  for dir in os.listdir(train_path):
    files = os.listdir(os.path.join(train_path, dir))
    if len(files) > 0:
      if not os.path.exists(os.path.join(test_path, dir)):
        os.mkdir(os.path.join(test_path, dir))
      test_num = len(files) * TEST_SPLIT
      print(test_num)
      indicies = random.sample(range(len(files)), round(test_num))
      test_files_names = [files[i] for i in indicies]
      # move file from train dataset to test dataset
      for f in test_files_names:
        image_type = imghdr.what(os.path.join(os.path.join(train_path, dir), f))
        #print(image_type)
        if image_type in IMAGE_EXTENSIONS:
          os.replace(os.path.join(os.path.join(train_path, dir), f), 
                     os.path.join(os.path.join(test_path, dir), f))

  return train_path, test_path


In [134]:
def get_min_img_size(train_path, test_path=None):
  """
  Check if all images are of the same size and get min image width and min image
  height.
  param train_path:              path of a train source folder
  param test_path:               path of a test source folder
  return img_width, img_height:  min image width and height scalar values
  """

  img_width_list = []
  img_height_list = []

  def get_image_params(data_path):
    img_width, img_height = None, None
    dirs = os.listdir(data_path)
    for dir in dirs:
      files = os.listdir(data_path + "/" + dir)
      for f in files:
        image_path = data_path + "/" + dir + "/" + f
        image = tf.keras.preprocessing.image.load_img(image_path)
        image = keras.preprocessing.image.img_to_array(image)
        if img_width == None:
          img_width, img_height = image.shape[0], image.shape[1]
        else:
          if image.shape[0] < img_width:
            img_width = image.shape[0]
          if image.shape[1] < img_height:
            img_height = image.shape[1]

    return img_width, img_height

  if test_path == None:
    return get_image_params(train_path)
    
  else:
    img_width_train, img_height_train = get_image_params(train_path)
    img_width_list.append(img_width_train)
    img_height_list.append(img_height_train)

    img_width_test, img_height_test = get_image_params(test_path)
    img_width_list.append(img_width_test)
    img_height_list.append(img_height_test)
  
    return min(img_width_list), min(img_height_list)

In [135]:
def set_resize_image_parameters(img_width, img_height):
  """
  Set parameters for resizing input images.
  param img_width, img_height:   image height and image with scalar values
  return img_width, img_height:  resized image height and image with scalar values     
  """

  if img_width > MAX_WIDTH:
    img_width = MAX_WIDTH

  if img_height > MAX_HEIGHT:
    img_height = MAX_HEIGHT
  
  return img_height, img_width



In [136]:
# set tensorflow dataset parameters
# remove unneccessary files from source directory
train_path, test_path = train_test_split()
img_width, img_height = get_min_img_size(train_path, test_path=test_path)
img_width, img_height = set_resize_image_parameters(img_height, img_width)
image_size = (img_width, img_height)
print(image_size)

71.60000000000001
81.80000000000001
64.8
65.60000000000001
91.80000000000001
(100, 100)


In [137]:
def get_image_dataset(data_path, image_size, subset = None):
  """
  Create tensorflow dataset from data directory object.
  param data_path:        string, source folder path
  param image_size:       touple (image_with, image_height)
  param subset:           optional, name of subset ('train', 'validation')
  return dataset:         dict, tensorflow dataset and class names
  """
    
  if subset:
    validation_split = VALIDATION_SPLIT
    seed = SEED
  
  else:
    validation_split = None
    seed = None

  dataset = tf.keras.preprocessing.image_dataset_from_directory(
      data_path,
      color_mode=COLOR_MODE,
      image_size=image_size,
      batch_size=BATCH_SIZE,
      seed=seed,
      validation_split=validation_split, 
      subset=subset
    )
    
  dataset.class_names.sort()
    
  return {
      "data": dataset.cache().prefetch(
      buffer_size = tf.data.AUTOTUNE
      ),
      "classNames": dataset.class_names
        }

In [139]:

training_ds = get_image_dataset(
    train_path,
    image_size,
    subset = "training"
)

validation_ds = get_image_dataset(
    train_path,
    image_size,
    subset = "validation"
)


test_ds = get_image_dataset(
    test_path,
    image_size
)


Found 1501 files belonging to 6 classes.
Using 1201 files for training.
Executing op TensorSliceDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op TensorSliceDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ZipDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Equal in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Equal in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op LogicalAnd in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op SelectV2 in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op DummySeedGenerator in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ShuffleDatasetV3 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op BatchDatasetV2 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op DummyMemoryCache in devi