During the augmentation process, a random number generator selects an integer which will be used to determine what augmentation operation is performed on the current image. Augmentation options include:

1. Rotate 90 degrees clockwise.
2. Rotate 90 degrees counter clockwise.
3. Flip image vertically.
4. Flip image horizontally.
5. Flip image both vertically and horizontally. (Same as 180 degree rotation.)

In [None]:
# Mount google drive to notebook.
from google.colab import drive
drive.mount('/content/drive/')

# To force remounting google drive to notebook.
#drive.mount('/content/drive', force_remount = True)

# Dismount google drive from notebook.
#drive.flush_and_unmount()

# Ensure we are in the correct working directory.
%cd /content/drive/MyDrive/CX4041-Group-Project/
%ls\

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
/content/drive/MyDrive/CX4041-Group-Project
 [0m[01;34mCheckpoint[0m/                           Submission_Sample.xlsx
'CX4041 Group Project Report.docx'     Test_Ensemble.ipynb
 [01;34mModel-Figures[0m/                        [01;34mTest_Plant_Images[0m/
 [01;34mPreprocessed_Test_Images[0m/             [01;34mTrain_Plant_Images[0m/
 [01;34mPreprocessed_Train_Images[0m/            Train_Test_InceptionResNetV2.ipynb
 Preprocessing_Train_Images.ipynb      Train_Test_InceptionV3.ipynb
 Preprocess_Rename_Test_Images.ipynb   Train_Test_ResNet50.ipynb


In [None]:
# Import the required libraries.
import cv2
import os
import glob
from PIL import Image
from random import randint
import numpy as Numpy
from numpy import asarray

In [None]:
# Function to perform data augmentation for one category of images only.
# Images are saved to the target directory using this function.
def augment_one_category(train_images_list, classname):

  # Specify the target directory for each category of preprocessed train images and create it if it does not exist.
  target_class_directory = '/content/drive/MyDrive/CX4041-Group-Project/Augmented_Train_Images/' + classname + '/'

  # State the number of images desired per category.
  images_per_category = 1000

  if not os.path.isdir(target_class_directory):
    os.makedirs(target_class_directory)

  print('Number of images read in for category', classname, 'is', len(train_images_list))

  # If there are fewer than the desired number of images per category.
  if len(train_images_list) < images_per_category:

    additional_required_images = 1000 - len(train_images_list)
    print('Additonal number of images needed for category', classname, 'is', additional_required_images)

    # Get the original number of training images for the current category.
    # This original number must be stored in another variable because the length of the list of
    # train images in the current category will increase as augmented images are added to it.
    original_class_count = len(train_images_list)

    # While the number of images per category is still not the desired number.
    while additional_required_images > 0:
      
      # Retrieve one image from the category to augment.
      # Use modulo of the number of additional images required over the number of train images,
      # To ensure the even selection of train images for augmenting.
      copied_image = train_images_list[additional_required_images % original_class_count]

      # Generate a random integer and augment the image according to that integer's function.
      # Numpy array 'augmented_image' represents the augmented image as a numpy array.
      random_choice = randint(1, 5)

      if random_choice == 1:
        # Rotate the image by 90 degrees clockwise.
        augmented_image = cv2.rotate(copied_image, cv2.ROTATE_90_CLOCKWISE)

      elif random_choice == 2:
        # Rotate the image by 90 degrees anti clockwise.
        augmented_image = cv2.rotate(copied_image, cv2.ROTATE_90_COUNTERCLOCKWISE)

      elif random_choice == 3:
        # Flip the image along its vertical axis with a parameter of 0.
        augmented_image = cv2.flip(copied_image, 0)

      elif random_choice == 4:
        # Flip the image along its horizontal axis with a parameter of 1.
        augmented_image = cv2.flip(image, 1)

      elif random_choice == 5:
        # Flip the image along both its horizontal and vertical axis with a
        # parameter of -1, also equivalent to making a 180 degree rotation.
        augmented_image = cv2.flip(image, -1)

      # Append the augmented image to the end of the list of train images for the current category.
      train_images_list.append(augmented_image)
      additional_required_images -= 1

  # Check that there are 1000 images (original + augmented) in the current category now.
  print('Number of original + augmented images for category', classname, 'is', len(train_images_list))

  # Save all the preprocessed images to the target directory.
  for image_index in range(len(train_images_list)):
    current_image = train_images_list[image_index]

    # The augmented image is currently in BGR colour format and must be converted back into RGB format.
    current_image = cv2.cvtColor(current_image, cv2.COLOR_BGR2RGB)

    # Convert the numpy array back into an image.
    current_image = Image.fromarray(current_image)

    # Save the current numpy array as an image to its train category subfolder and name it accordingly.
    image_name = classname + '_' + str(image_index + 1) + '.png'
    current_image.save(target_class_directory + image_name)

  print('Finished augmenting images for class:', classname)

In [None]:
# Read in a list of all the training image subfolders.
train_image_folders = glob.glob('/content/drive/MyDrive/CX4041-Group-Project/Train_Plant_Images/*')

# Sort the training image folders by name.
train_image_folders = sorted(train_image_folders)

# Read in the training images from each of their subfolders.
for folder in range(len(train_image_folders)):

  # Extract the category name.
  classname = os.path.basename(train_image_folders[folder])
  #print('Begin reading images from category:', classname)
  
  # Get a list all the training images in each subfolder category at a time.
  # List 'source_list_class' contain a list of the paths of all the images for the current category.
  source_class_path_list = glob.glob('/content/drive/MyDrive/CX4041-Group-Project/Train_Plant_Images/' + classname + '/*')

  # List to hold all the images in numpy array format for one category.
  train_images_list = []

  # Read in all the images from that category and store them into a list.
  for image_index in range(len(source_class_path_list)):
    current_image = cv2.imread(source_class_path_list[image_index])

    # Check if the image type must be a numpy array, otherwise convert it from PIL image into a numpy array.
    if type(current_image).__module__ != 'numpy':
      current_image = Numpy.asarray(current_image)

    # Append the numpy array image to the list of images for one category.
    train_images_list.append(current_image)

  # Perform error checking to ensure that all the read in images are of numpy data type.
  # No output should be produced by the code below if there are no errors.
  for image in train_images_list:
      if type(image).__module__ != 'numpy':
        print('Detected image not of numpy data type.')
  
  # Pass the list of images and the class name to the function for performing data augmentation.
  # Images are saved to the target directory from this function.
  augment_one_category(train_images_list, classname)
  print()

Number of images read in for category Black Grass is 263
Additonal number of images needed for category Black Grass is 737
Number of original + augmented images for category Black Grass is 1000
Finished augmenting images for class: Black Grass

Number of images read in for category Charlock is 390
Additonal number of images needed for category Charlock is 610
Number of original + augmented images for category Charlock is 1000
Finished augmenting images for class: Charlock

Number of images read in for category Cleavers is 287
Additonal number of images needed for category Cleavers is 713
Number of original + augmented images for category Cleavers is 1000
Finished augmenting images for class: Cleavers

Number of images read in for category Common Chickweed is 611
Additonal number of images needed for category Common Chickweed is 389
Number of original + augmented images for category Common Chickweed is 1000
Finished augmenting images for class: Common Chickweed

Number of images read in