In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import os
# load and display an image with Matplotlib
from matplotlib import image
from matplotlib import pyplot
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import ImageDataGenerator
from numpy import asarray
import os
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input


# Task 1 - Getting the data in

# ift. opdelingen af test,train og validering har jeg simpelthen bare taget det manuelt fra den store mappe og opdelt det i 20% test og 20% af train til validering
# Dette betyder at vi fra det originale data har:
# test: 520 billeder, 260 af hver
# train: 1664 billeder, 832 af hver
# Validation: 416, 206 af hver

In [2]:
# goes through the folder containing the data and adds them to a list, generates labels and add data to validate labels against

def load_image_function(path):

    images = [] # empty list placeholder
    labels = [] # empty list placeholder
    animal_checker = 'cat' # used to create labels
    container = [] # container to validate correct labels

    for filename in os.listdir(path):

      container.append(filename) # add filename to container

      if animal_checker in filename:
        labels.append('cat') # check if cat is in filename. If so append cat to label
      elif animal_checker not in filename:
        labels.append('dog') # else add dog to label

      animal_photo = load_img(path + f'/{filename}', target_size = (150, 150)) # standardize photo size + loads
      animal_photo = img_to_array(animal_photo) # creates an array

      images.append(animal_photo) # append the photo to the images. The images list contains a list of arrays
    
    return asarray(images), asarray(labels), container; # not interested in list of arrays, but array containing lists. Asarray does this. Returns three arrays

In [27]:
train_data_cats, train_labels_cats, train_container_cats = load_image_function('./Train/cats') # load train data
train_data_cats.shape

train_data_dogs, train_labels_dogs, train_container_dogs = load_image_function('./Train/dogs') # load train data
train_data_dogs.shape

train_data = np.concatenate((train_data_cats,train_data_dogs))
train_labels = np.concatenate((train_labels_cats,train_labels_dogs))
train_container = np.concatenate((train_container_cats,train_container_dogs))
train_data.shape

(1664, 150, 150, 3)

In [28]:
print(f'The training labels is: {train_labels[:5]} and the imported is {train_container[:5]}') # check train data is correct loaded

The training labels is: ['cat' 'cat' 'cat' 'cat' 'cat'] and the imported is ['cat.1500.jpg' 'cat.1501.jpg' 'cat.1502.jpg' 'cat.1503.jpg'
 'cat.1504.jpg']


In [26]:
test_data_cats, test_labels_cats, test_container_cats = load_image_function('./Test/cats') # load train data
test_data_cats.shape

test_data_dogs, test_labels_dogs, test_container_dogs = load_image_function('./Test/dogs') # load train data
test_data_dogs.shape

test_data = np.concatenate((test_data_cats,test_data_dogs))
test_labels = np.concatenate((test_labels_cats,test_labels_dogs))
test_container = np.concatenate((test_container_cats,test_container_dogs))
test_data.shape

print(f'The test labels is: {test_labels[:5]} and the imported is {test_container[:5]}') # check test data is correct loaded

The test labels is: ['cat' 'cat' 'cat' 'cat' 'cat'] and the imported is ['cat.0.jpg' 'cat.1.jpg' 'cat.10.jpg' 'cat.100.jpg' 'cat.101.jpg']


In [29]:
val_data_cats, val_labels_cats, val_container_cats = load_image_function('./Validation/cats') # load train data
val_data_cats.shape

val_data_dogs, val_labels_dogs, val_container_dogs = load_image_function('./Validation/dogs') # load train data
val_data_dogs.shape

val_data = np.concatenate((val_data_cats,val_data_dogs))
val_labels = np.concatenate((val_labels_cats,val_labels_dogs))
val_container = np.concatenate((val_container_cats,val_container_dogs))
val_data.shape

print(f'The val labels is: {val_labels[:5]} and the imported is {val_container[:5]}') # check val data is correct loaded

The val labels is: ['cat' 'cat' 'cat' 'cat' 'cat'] and the imported is ['cat.230.jpg' 'cat.231.jpg' 'cat.232.jpg' 'cat.233.jpg' 'cat.234.jpg']


In [30]:
# checking if reshape is necessary. 

print(f'Train has shape: {train_data.shape} \nTest has shape: {test_data.shape} \nValidation has shape: {val_data.shape}')

# Since they all have (# rows, width, height, # of color channelse) reshaping is not necessary 

Train has shape: (1664, 150, 150, 3) 
Test has shape: (520, 150, 150, 3) 
Validation has shape: (416, 150, 150, 3)


# Task 2 - Create the datagenerators

Due to the high amount of data, you need to write a datagenerator to load the images. This requires you to use the _ImageDataGenerator_ (as shown in Keras Intro - 2) in which you must apply at least one on-the-fly data augmentation. Which type of data augmentation is up to you, but you need to justify your choice in your report. On top of the _ImagaDataGenerator_ you need to apply the _.flow_from_directory_ function to make it work with the directory tree in Task 1

We need to use (at least) one on-the-fly augmentation. Which is better to use? Zooming, flipping, shifting, changing brightness? Should we use more than one? 

ImageDataGenerator: 
https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator
flow_from_directory: https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator#flow_from_directory

In [8]:
os.getcwd()

'C:\\Users\\Andreas Gahner\\Dropbox\\Egne dokumenter\\DS809 - Deep Learning (5 ECTS)\\Project Deep Learning\\Deep_learning_Data'

24-10-2021dd

In [42]:
train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) 
test_datagen = ImageDataGenerator(rescale=1./255) 

train_generator = train_datagen.flow_from_directory('Train/', target_size=(150, 150), batch_size=32, class_mode='binary') 
validation_generator = test_datagen.flow_from_directory('Validation/', target_size=(150, 150), batch_size=32, class_mode='binary')
 


Found 1664 images belonging to 2 classes.
Found 416 images belonging to 2 classes.


In [44]:
model = tf.keras.models.Sequential([tf.keras.layers.Dense(320, activation='relu', kernel_regularizer = tf.keras.regularizers.l2(0.001)),     
                                    tf.keras.layers.Dropout(.5, input_shape=(320,)),     
                                    tf.keras.layers.Dense(160, activation='relu'),     
                                    tf.keras.layers.Dense(80, activation='relu'),     
                                    tf.keras.layers.Dense(1, activation='softmax'),])   
model.compile(loss='SparseCategoricalCrossentropy', optimizer='Adam',metrics=['accuracy'],)

In [34]:
model.fit(train_generator, steps_per_epoch=2000, epochs=50,  validation_data=validation_generator, validation_steps=800)

ValueError: Asked to retrieve element 0, but the Sequence has length 0