<a href="https://colab.research.google.com/github/pcashman21/feral-cat-census/blob/main/src/notebooks/generate_cluster_test_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.image import imread
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [8]:
# We're going to read raw images, so transform them by normalizing the pixels
# and resizing the image as expected by the VGG model.  This will be done
# before applying any random transformations to generate test data.
transformer_nn = tf.keras.Sequential([
        layers.Resizing(244,244),
        layers.Rescaling(1./255)])

In [4]:
# The Image Data Generator will be used to transform
# images to create test data.
image_gen = ImageDataGenerator(rotation_range=20, # rotate the image 20 degrees
                               width_shift_range=0.10, # Shift the pic width by a max of 5%
                               height_shift_range=0.10, # Shift the pic height by a max of 5%
                               rescale=1/255, # Rescale the image by normalzing it.
                               shear_range=0.1, # Shear means cutting away part of the image (max 10%)
                               zoom_range=0.1, # Zoom in by 10% max
                               horizontal_flip=True, # Allo horizontal flipping
                               fill_mode='nearest' # Fill in missing pixels with the nearest filled value
                              )

In [10]:
test_data_path = '/content/gdrive/My Drive/Cat images/cluster_test_data/'
case_1_path = test_data_path + 'case_1/'
case_2_path = test_data_path + 'case_2/'
case_3_path = test_data_path + 'case_3/'
case_4_path = test_data_path + 'case_4/'
case_5_path = test_data_path + 'case_5/'
image_1 = '00000500_001.jpg'
image_2 = '00000500_003.jpg'
image_3 = '00000500_005.jpg'
image_4 = '00000500_008.jpg'
image_5 = '00000500_010.jpg'

In [11]:
# Case 1: Five images, all different
#
# The images are present, and need only to be resized and rescaled.
case_1_1_image = imread(case_1_path + image_1)
case_1_1_image = transformer_nn(case_1_1_image).numpy()
plt.imsave(case_1_path + image_1,case_1_1_image)

In [14]:
case_1_2_image = imread(case_1_path + image_2)
case_1_2_image = transformer_nn(case_1_2_image).numpy()
plt.imsave(case_1_path + image_2,case_1_2_image)
case_1_3_image = imread(case_1_path + image_3)
case_1_3_image = transformer_nn(case_1_3_image).numpy()
plt.imsave(case_1_path + image_3,case_1_3_image)
case_1_4_image = imread(case_1_path + image_4)
case_1_4_image = transformer_nn(case_1_4_image).numpy()
plt.imsave(case_1_path + image_4,case_1_4_image)
case_1_5_image = imread(case_1_path + image_5)
case_1_5_image = transformer_nn(case_1_5_image).numpy()
plt.imsave(case_1_path + image_5,case_1_5_image)

In [16]:
# Case 2: Five images, all the same cat
#
# First image needs to be rescaled/resized, then transformed 4 times
case_2_1_image = imread(case_2_path + image_1)
case_2_1_image = transformer_nn(case_2_1_image).numpy()
plt.imsave(case_2_path + image_1,case_2_1_image)
# Perform four transforms and save each one
for i in range(4):
  transformed_image = image_gen.random_transform(case_2_1_image)
  plt.imsave(case_2_path + image_1.split('.')[0] + '_' + str(i+1) + '.' + image_1.split('.')[1],transformed_image)

In [17]:
# Case 3: Ten images, five diffferent cats, 6 images
# of first cat, once each of the rest
#
# Resize/rescale first image, then transform five times
case_3_1_image = imread(case_3_path + image_1)
case_3_1_image = transformer_nn(case_3_1_image).numpy()
plt.imsave(case_3_path + image_1,case_3_1_image)
for i in range(5):
  transformed_image = image_gen.random_transform(case_3_1_image)
  plt.imsave(case_3_path + image_1.split('.')[0] + '_' + str(i+1) + '.' + image_1.split('.')[1],transformed_image)
# For remaining images, just resize/rescale
case_3_2_image = imread(case_3_path + image_2)
case_3_2_image = transformer_nn(case_3_2_image).numpy()
plt.imsave(case_3_path + image_2,case_3_2_image)
case_3_3_image = imread(case_3_path + image_3)
case_3_3_image = transformer_nn(case_3_3_image).numpy()
plt.imsave(case_3_path + image_3,case_3_3_image)
case_3_4_image = imread(case_3_path + image_4)
case_3_4_image = transformer_nn(case_3_4_image).numpy()
plt.imsave(case_3_path + image_4,case_3_4_image)
case_3_5_image = imread(case_3_path + image_5)
case_3_5_image = transformer_nn(case_3_5_image).numpy()
plt.imsave(case_3_path + image_5,case_3_5_image)

In [18]:
# Case 4: Ten images, five diffferent cats, 2 images of each
#
# Resize/rescale each image, then transform
case_4_1_image = imread(case_4_path + image_1)
case_4_1_image = transformer_nn(case_4_1_image).numpy()
plt.imsave(case_4_path + image_1,case_4_1_image)
transformed_image = image_gen.random_transform(case_4_1_image)
plt.imsave(case_4_path + image_1.split('.')[0] + '_' + str(i+1) + '.' + image_1.split('.')[1],transformed_image)
# For remaining images, just resize/rescale
case_4_2_image = imread(case_4_path + image_2)
case_4_2_image = transformer_nn(case_4_2_image).numpy()
plt.imsave(case_4_path + image_2,case_4_2_image)
transformed_image = image_gen.random_transform(case_4_2_image)
plt.imsave(case_4_path + image_2.split('.')[0] + '_' + str(i+1) + '.' + image_2.split('.')[1],transformed_image)
case_4_3_image = imread(case_4_path + image_3)
case_4_3_image = transformer_nn(case_4_3_image).numpy()
plt.imsave(case_4_path + image_3,case_4_3_image)
transformed_image = image_gen.random_transform(case_4_3_image)
plt.imsave(case_4_path + image_3.split('.')[0] + '_' + str(i+1) + '.' + image_3.split('.')[1],transformed_image)
case_4_4_image = imread(case_4_path + image_4)
case_4_4_image = transformer_nn(case_4_4_image).numpy()
plt.imsave(case_4_path + image_4,case_4_4_image)
transformed_image = image_gen.random_transform(case_4_4_image)
plt.imsave(case_4_path + image_4.split('.')[0] + '_' + str(i+1) + '.' + image_4.split('.')[1],transformed_image)
case_4_5_image = imread(case_4_path + image_5)
case_4_5_image = transformer_nn(case_4_5_image).numpy()
plt.imsave(case_4_path + image_5,case_4_5_image)
transformed_image = image_gen.random_transform(case_4_5_image)
plt.imsave(case_4_path + image_5.split('.')[0] + '_' + str(i+1) + '.' + image_5.split('.')[1],transformed_image)

In [19]:
# Case 5: Ten images, two diffferent cats, 9 images
# of first cat, one of the other
#
# Resize/rescale first image, then transform eight times
case_5_1_image = imread(case_5_path + image_1)
case_5_1_image = transformer_nn(case_5_1_image).numpy()
plt.imsave(case_5_path + image_1,case_5_1_image)
for i in range(8):
  transformed_image = image_gen.random_transform(case_5_1_image)
  plt.imsave(case_5_path + image_1.split('.')[0] + '_' + str(i+1) + '.' + image_1.split('.')[1],transformed_image)
# For remaining image, just resize/rescale
case_5_2_image = imread(case_5_path + image_2)
case_5_2_image = transformer_nn(case_5_2_image).numpy()
plt.imsave(case_5_path + image_2,case_5_2_image)