In [None]:
pip install gdown

In [None]:
import gdown
url = 'https://drive.google.com/uc?id=1ng9cI_stT2tMco3UFVotR549HAmGhofk'
output = 'train_test_zip'
gdown.download(url, output)

In [None]:
!unzip train_test_zip

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import tensorflow.keras as tfk
import tensorflow.keras.layers as tfkl
import random
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import preprocess_input
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import shutil

In [None]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
# os.chdir('/')
# %cd 'gdrive/MyDrive/Colab Notebooks/AN2&DL competition/dataset'

In [None]:
os.chdir('/')

In [None]:
dataset_dir = r'kaggle/working'

training_dir = os.path.join(dataset_dir,'train')
val_dir      = os.path.join(dataset_dir,'val')

# train_gen = train_data_gen.flow_from_directory(directory=training_dir,
#                                                color_mode='rgb',
#                                                classes=None,
#                                                batch_size=8,
#                                                shuffle=True,
#                                                seed=seed)

# test_gen  = test_data_gen.flow_from_directory(directory=test_dir,
#                                                color_mode='rgb',
#                                                classes=None,
#                                                batch_size=8,
#                                                shuffle=True,
#                                                seed=seed)

# val_gen = val_data_gen.flow_from_directory(directory=val_dir,
#                                                color_mode='rgb',
#                                                classes=None,
#                                                batch_size=8,
#                                                shuffle=True,
#                                                seed=seed)

In [None]:
os.chdir('/')
!ls

In [None]:
labels = {
    0:'Apple',     
    1:'Blueberry', 
    2:'Cherry',    
    3:'Corn',      
    4:'Grape',     
    5:'Orange',    
    6:'Peach',     
    7:'Pepper',    
    8:'Potato',    
    9:'Raspberry', 
    10:'Soybean',   
    11:'Squash',     
    12:'Strawberry',
    13:'Tomato'
}

In [None]:
import os
import shutil



gen = ImageDataGenerator(rotation_range=30,
                         height_shift_range=50,
                         width_shift_range=50,
                         zoom_range=0.3,
                         horizontal_flip=True,
                         vertical_flip=True, 
                         fill_mode='reflect',
                         )



# create directories like .../Apple/Apple
for i in labels:
  if i < 13:
    existing_class_directory = os.path.join(training_dir, labels[i])
    folder_to_create = os.path.join(existing_class_directory, labels[i])
    os.mkdir(folder_to_create)


# move images from e.g. .../Apple to .../Apple/Apple
for i in labels:
  if i < 13:
    os.chdir('/')
    src_folder = os.path.join(training_dir, labels[i])
    for item in os.listdir(src_folder):
      if os.path.isfile(os.path.join(src_folder, item)):
        src                = os.path.join(src_folder, item)
        destination_folder = os.path.join(os.path.join(training_dir, labels[i]), labels[i])
        destination        = os.path.join(destination_folder, item)
        shutil.move(src, destination)

# Augment every class except for Tomato
for i in labels:
  if i < 13:
    os.chdir('/')
    save_here = os.path.join(training_dir, labels[i])
    aug_iter = gen.flow_from_directory(directory = os.path.join(training_dir, labels[i]),
                                            target_size = (256, 256),
                                            color_mode = 'rgb',
                                            classes = None,
                                            class_mode= 'categorical',
                                            batch_size = 30,
                                            seed = seed,
                                            save_to_dir = save_here,
                                            save_prefix = 'aug',
                                            save_format = 'jpg')
    for j in range(len(aug_iter)):
      batch = next(aug_iter)
      # j is batch number, i.e. 1st batch, 2nd batch, ...
      print(j)


# move images from e.g. .../Apple/Apple to .../Apple
for i in labels:
  if i < 13:
    os.chdir('/')
    src_folder = os.path.join(os.path.join(training_dir, labels[i]), labels[i])
    for item in os.listdir(src_folder):
      src         = os.path.join(src_folder, item)
      destination_folder = os.path.join(training_dir, labels[i])
      destination = os.path.join(destination_folder, item)
      shutil.move(src, destination)

# remove created directories, e.g. .../Apple/Apple
for i in labels:
  if i < 13:
    folder_to_remove = os.path.join(os.path.join(training_dir, labels[i]), labels[i])
    os.rmdir(folder_to_remove)

In [None]:
# delete augmented images
for index in labels:
  path1 = os.path.join(training_dir, labels[index])
  for item in os.listdir(path1):
    if os.path.isfile(os.path.join(path1, item)):
      os.remove(os.path.join(path1, item))


In [None]:
os.chdir('working')
!ls

In [None]:
# Creating an instance of ImageDataGenerator without Data Augmentation
noaug_train_data_gen = ImageDataGenerator(preprocessing_function=tfk.applications.xception.preprocess_input)
valid_data_gen       = ImageDataGenerator(preprocessing_function=tfk.applications.xception.preprocess_input)
test_data_gen        = ImageDataGenerator(preprocessing_function=tfk.applications.xception.preprocess_input)

os.chdir('/')

noaug_train_gen = noaug_train_data_gen.flow_from_directory(directory=training_dir,
                                                           target_size=(256,256),
                                                           color_mode='rgb',
                                                           classes=None,
                                                           class_mode='categorical',
                                                           batch_size=64,
                                                           shuffle=True,
                                                           seed=seed)

valid_gen = valid_data_gen.flow_from_directory(directory=val_dir,
                                               target_size=(256,256),
                                               color_mode='rgb',
                                               classes=None,
                                               class_mode='categorical',
                                               batch_size=64,
                                               shuffle=False, # no need to shuffle validation set
                                               seed=seed)

In [None]:
# supernet = tfk.applications.VGG16(
#     include_top=False,
#     weights="imagenet",
#     input_shape=(256,256,3)
# )

supernet = tfk.applications.InceptionResNetV2(
    include_top=False,
    weights="imagenet",
    input_shape=(256,256,3),
)

In [None]:
# Use the supernet as feature extractor
supernet.trainable = True

inputs = tfk.Input(shape=(256,256,3))
# x = tfkl.Resizing(64, 64, interpolation="bicubic")(inputs)
x = supernet(inputs)
x = tfkl.GlobalAveragePooling2D(name="gl_avg_pool")(x)
# x = tfkl.Flatten(name='Flattening')(x)
x = tfkl.Dropout(0.3, seed=seed)(x)
# x = tfkl.BatchNormalization()(x)
x = tfkl.Dense(
    256, 
    activation='relu',
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)
x = tfkl.Dropout(0.3, seed=seed)(x)
# x = tfkl.BatchNormalization()(x)
x = tfkl.Dense(
    256, 
    activation='relu',
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)
x = tfkl.Dropout(0.3, seed=seed)(x)
# x = tfkl.BatchNormalization()(x)
x = tfkl.Dense(
    256, 
    activation='relu',
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)
x = tfkl.Dropout(0.3, seed=seed)(x)
# x = tfkl.BatchNormalization()(x)
x = tfkl.Dense(
    256, 
    activation='relu',
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)
x = tfkl.Dropout(0.3, seed=seed)(x)
# x = tfkl.BatchNormalization()(x)
outputs = tfkl.Dense(
    14, 
    activation='softmax',
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)


# Connect input and output through the Model class
tl_model = tfk.Model(inputs=inputs, outputs=outputs, name='model')

# Freeze first N layers, e.g., until 14th
for i, layer in enumerate(tl_model.get_layer('inception_resnet_v2').layers[:-14]):
  layer.trainable=False
for i, layer in enumerate(tl_model.get_layer('inception_resnet_v2').layers):
   print(i, layer.name, layer.trainable)


# Compile the model
tl_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics=[tfk.metrics.CategoricalAccuracy(), tfk.metrics.AUC()])

In [None]:
tl_model.summary()

In [None]:
supernet.trainable = True
# # Freeze first N layers, e.g., until 14th
# for i, layer in enumerate(tl_model.get_layer('vgg16').layers[:14]):
#     layer.trainable=False
# for i, layer in enumerate(tl_model.get_layer('vgg16').layers):
#     print(i, layer.name, layer.trainable)

In [None]:
from sklearn.utils import compute_class_weight

class_weights = compute_class_weight(class_weight = "balanced",
                                     classes = np.unique(noaug_train_gen.classes),
                                     y = noaug_train_gen.classes                                                    
                                    )
class_weights = dict(zip(np.unique(noaug_train_gen.classes), class_weights))
class_weights

In [None]:
# Train the model
os.chdir('/')
tl_history = tl_model.fit(
    x = noaug_train_gen,
    epochs = 150,
    validation_data = valid_gen,
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=5, restore_best_weights=True)]
).history


In [None]:
os.getcwd()

In [None]:
import os
os.chdir('/')
os.chdir('kaggle/working')
tl_model.save('inc_rn_model_with_data_aug')

In [None]:
shutil.make_archive("inc_rn", "zip", "inc_rn_model_with_data_aug")

In [None]:
!ls

In [None]:
from IPython.display import FileLink
FileLink(r'inc_rn.zip')

In [None]:
# Predict the test set with the CNN
os.chdir('/')
predictions = tl_model.predict(val_gen)
predictions.shape

In [None]:
one_hot_preds = tfk.utils.to_categorical(np.argmax(predictions, axis=-1), num_classes=14)
len(one_hot_preds)

In [None]:
y_test = np.array([])

for i in range(len(test_gen)):
  for j in range(len(test_gen[i][1])):
    y_test = np.append(y_test, np.argmax(test_gen[i][1][j]))

In [None]:
y_test

In [None]:
# Compute the confusion matrix
cm = confusion_matrix(y_test, np.argmax(predictions, axis=-1))

labels = {
    0:'Apple',     
    1:'Blueberry', 
    2:'Cherry',    
    3:'Corn',      
    4:'Grape',     
    5:'Orange',    
    6:'Peach',     
    7:'Pepper',    
    8:'Potato',    
    9:'Raspberry', 
    10:'Soybean',   
    11:'Squash',     
    12:'Strawberry',
    13:'Tomato'
}

# Compute the classification metrics
accuracy = accuracy_score(y_test, np.argmax(predictions, axis=-1))
precision = precision_score(y_test, np.argmax(predictions, axis=-1), average='weighted')
recall = recall_score(y_test, np.argmax(predictions, axis=-1), average='weighted')
f1 = f1_score(y_test, np.argmax(predictions, axis=-1), average='weighted')
print('Accuracy:',accuracy.round(4))
print('Precision:',precision.round(4))
print('Recall:',recall.round(4))
print('F1:',f1.round(4))

# Plot the confusion matrix
plt.figure(figsize=(10,8))
sns.heatmap(cm.T, xticklabels=list(labels.values()), yticklabels=list(labels.values()))
plt.xlabel('True labels')
plt.ylabel('Predicted labels')
plt.show()

In [None]:
# Set all layers to True
ft_model.get_layer('vgg16').trainable = True
for i, layer in enumerate(ft_model.get_layer('vgg16').layers):
   print(i, layer.name, layer.trainable)

In [None]:
# Freeze first N layers, e.g., until 14th
for i, layer in enumerate(ft_model.get_layer('vgg16').layers[:14]):
  layer.trainable=False
for i, layer in enumerate(ft_model.get_layer('vgg16').layers):
   print(i, layer.name, layer.trainable)

In [None]:
# Compile the model
ft_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(1e-4), metrics='accuracy')

In [None]:
# Predict the test set with the CNN
predictions = ft_model.predict(X_test)
predictions.shape

# Compute the confusion matrix
cm = confusion_matrix(np.argmax(y_test, axis=-1), np.argmax(predictions, axis=-1))

# Compute the classification metrics
accuracy = accuracy_score(np.argmax(y_test, axis=-1), np.argmax(predictions, axis=-1))
precision = precision_score(np.argmax(y_test, axis=-1), np.argmax(predictions, axis=-1), average='macro')
recall = recall_score(np.argmax(y_test, axis=-1), np.argmax(predictions, axis=-1), average='macro')
f1 = f1_score(np.argmax(y_test, axis=-1), np.argmax(predictions, axis=-1), average='macro')
print('Accuracy:',accuracy.round(4))
print('Precision:',precision.round(4))
print('Recall:',recall.round(4))
print('F1:',f1.round(4))

# Plot the confusion matrix
plt.figure(figsize=(10,8))
sns.heatmap(cm.T, xticklabels=list(labels.values()), yticklabels=list(labels.values()))
plt.xlabel('True labels')
plt.ylabel('Predicted labels')
plt.show()