In [1]:
#import data manipulation packages 
import pandas as pd 
import numpy as np 
import os
import cv2
import matplotlib.pyplot as plt
import warnings

In [2]:
#import deep learning tools 
from tensorflow.keras.layers import Input, Lambda, Dense, Flatten, Dropout, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image, image_dataset_from_directory
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow import keras
import tensorflow
from tensorflow.keras.applications.vgg19 import VGG19,preprocess_input

In [3]:
tensorflow.test.is_gpu_available()

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


True

In [4]:
tensorflow.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [5]:
print("Num GPUs Available: ", len(tensorflow.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [6]:
# Set the seed value for experiment reproducibility.
seed = 1842
tensorflow.random.set_seed(seed)
np.random.seed(seed)
# Turn off warnings for cleaner looking notebook
warnings.simplefilter('ignore')

## Load in the Data

In [7]:
INPUT_SIZE=224

In [8]:
#define image dataset 
#why do we rescale?
image_generator = ImageDataGenerator(rescale=1/255, validation_split=0.2) #shear_range =.25, zoom_range =.2, horizontal_flip = True, rotation_range=20)     

#Train & Validation Split 
train_dataset = image_generator.flow_from_directory(batch_size=32,
                                                 directory='data_cleaned/Train',
                                                 shuffle=True,
                                                 target_size=(224, 224), 
                                                 subset="training",
                                                 class_mode='categorical')

validation_dataset = image_generator.flow_from_directory(batch_size=32,
                                                 directory='data_cleaned/Train',
                                                 shuffle=True,
                                                 target_size=(224, 224), 
                                                 subset="validation",
                                                 class_mode='categorical')

#Organize data for our predictions 
image_generator_submission = ImageDataGenerator(rescale=1/255) 
submission = image_generator_submission.flow_from_directory(
                                                 directory='data_cleaned/scraped_images',
                                                 shuffle=False,
                                                 target_size=(224, 224), 
                                                 class_mode=None)

Found 430 images belonging to 2 classes.
Found 106 images belonging to 2 classes.
Found 100 images belonging to 1 classes.


In [None]:
#show flowers for the first batch
batch_1_img = train_dataset[0]
for i in range(0,32):
    img = batch_1_img[0][i]
    lab = batch_1_img[1][i]
    plt.imshow(img)
    plt.title(lab)
    plt.axis('off')
    plt.show()

## Build Convolutional Neural Net

In [None]:
#experiment with Convolutional neural net
model = keras.models.Sequential([
    keras.layers.Conv2D(64, 3, activation='relu', input_shape = (224, 224,3)),
    keras.layers.MaxPooling2D(3,3),
    keras.layers.Conv2D(32, 3, activation='relu'),
    keras.layers.MaxPooling2D(),
    keras.layers.Dropout(0.5),
    keras.layers.Flatten(),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(2, activation ='softmax')
])

In [None]:
model.compile(loss = 'binary_crossentropy',optimizer='adam', metrics = ['accuracy'])

callback = keras.callbacks.EarlyStopping(monitor='val_loss',
                                         patience=3,
                                         restore_best_weights=True)

In [None]:
model.fit(train_dataset, epochs=20, validation_data=validation_dataset, callbacks=callback)
#save model
#early stopping

In [None]:
loss, accuracy = model.evaluate(validation_dataset)
print("Loss:", loss)
print("Accuracy:", accuracy)

In [None]:
#model.predict(submission)

## Data Augmentation

In [None]:
["Recall","Precision"]

In [9]:
data_augmentation = keras.models.Sequential([
    keras.layers.RandomFlip("horizontal", input_shape = (224, 224,3)),
    keras.layers.RandomRotation(0.1),
    keras.layers.RandomZoom(0.1),
    keras.layers.RandomContrast(0.1),
])



In [None]:
model_a = keras.Sequential([data_augmentation,model])

In [None]:
model_a.compile(loss = 'binary_crossentropy',optimizer='adam', metrics = ['accuracy'])

In [None]:
model_a.fit(train_dataset, epochs=20, validation_data=validation_dataset, callbacks=callback)
#save model
#early stopping

In [None]:
loss, accuracy = model_a.evaluate(validation_dataset)
print("Loss:", loss)
print("Accuracy:", accuracy)

## Hyperparameter tuning

In [10]:
import keras_tuner
from keras_tuner.tuners import RandomSearch

In [16]:
train_images = []
train_lables = []

for i in range(len(train_dataset)):
    batch = train_dataset[i]
    for j in range(len(batch[0])):
        train_images.append(batch[0][j])
        train_lables.append(batch[1][j])

test_images = []
test_lables = []

for i in range(len(validation_dataset)):
    batch = validation_dataset[i]
    for j in range(len(batch[0])):
        test_images.append(batch[0][j])
        test_lables.append(batch[1][j])
        
train_images = np.array(train_images)
train_lables = np.array(train_lables)
test_images = np.array(test_images)
test_lables = np.array(test_lables)

In [12]:
def build_model(hp):
    model = keras.Sequential()
    #model.add(data_augmentation)
    model.add(keras.layers.AveragePooling2D(4,2))

    for i in range(hp.Int("Conv Layers", min_value=0, max_value=3)):
        model.add(keras.layers.Conv2D(hp.Choice(f"layer_{i}_filters", [16,32,64]), 3 ,activation='relu'))
        model.add(keras.layers.MaxPool2D(2,2))
    

    model.add(keras.layers.Dropout(0.5))
    model.add(keras.layers.Flatten())

    model.add(keras.layers.Dense(hp.Choice("Dense layer", [64, 128, 256, 512, 1024]), activation='relu'))

    model.add(keras.layers.Dense(2, activation='softmax'))

    model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

    return model

tuner = RandomSearch(
    build_model,
    objective='val_loss',
    overwrite=True,
    max_trials=32,
)

In [13]:
tuner.search(train_images, train_lables, validation_data=(test_images, test_lables), epochs=10, batch_size=32)

Trial 32 Complete [00h 00m 08s]
val_loss: 0.16318279504776

Best val_loss So Far: 0.0909949541091919
Total elapsed time: 00h 09m 17s
INFO:tensorflow:Oracle triggered exit


In [14]:
best_model = tuner.get_best_models()[0]

In [18]:
best_model.evaluate(test_images, test_lables)



[0.0909949541091919, 0.9716981053352356]

In [23]:
loss, accuracy = best_model.evaluate(validation_dataset)
print("Loss:", loss)
print("Accuracy:", accuracy)

Loss: 0.0909949541091919
Accuracy: 0.9716981053352356


In [19]:
best_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 average_pooling2d (AverageP  (None, 111, 111, 3)      0         
 ooling2D)                                                       
                                                                 
 conv2d (Conv2D)             (None, 109, 109, 64)      1792      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 54, 54, 64)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 52, 52, 32)        18464     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 26, 26, 32)       0         
 2D)                                                             
                                                        

## Transfer Learning

In [24]:
vgg = VGG19(input_shape=(224,224,3),weights='imagenet',include_top=False)

In [25]:
for layer in vgg.layers:
    layer.trainable = False

In [28]:
x = Flatten()(vgg.output)
prediction = keras.models.Sequential([
    keras.layers.Dense(2, activation ='softmax')
])(x)

modelvgg = Model(inputs=vgg.input, outputs=prediction)

In [40]:
modelvgg.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

In [34]:
modelvgg.compile(
  loss='binary_crossentropy',
  optimizer="adam",
  metrics=['accuracy']
)
callback = keras.callbacks.EarlyStopping(monitor='val_loss',
                                            patience=3,
                                            restore_best_weights=True)

In [35]:
modelvgg.fit(train_dataset, epochs=10, validation_data=validation_dataset, callbacks=callback)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x24b30debf70>

In [36]:
loss, accuracy = modelvgg.evaluate(validation_dataset)
print("Loss: ", loss)
print("Accuracy: ", accuracy)

Loss:  0.17076781392097473
Accuracy:  0.9528301954269409


In [41]:
onlyfiles = [f.split('.')[0] for f in os.listdir(os.path.join('data_cleaned/scraped_images/image_files')) if os.path.isfile(os.path.join(os.path.join('data_cleaned/scraped_images/image_files'), f))]
submission_df = pd.DataFrame(onlyfiles, columns =['images'])
submission_df[['la_eterna','other_flower']] = .5
submission_df.head()

Unnamed: 0,images,la_eterna,other_flower
0,img_00,0.5,0.5
1,img_01,0.5,0.5
2,img_02,0.5,0.5
3,img_03,0.5,0.5
4,img_04,0.5,0.5


In [42]:
submission_df.to_csv('submission_file_vgg.csv', index = False)