In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sn
import os
import pathlib

from PIL import Image
from sklearn.datasets import load_files
from sklearn.model_selection import train_test_split
from numpy import asarray
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Check for TensorFlow GPU access
print(tf.config.list_physical_devices())

# See TensorFlow version
print(tf.__version__)

In [None]:
base_dir = '../data/fruits-360/'
test_dir = os.path.join(base_dir, 'Test')
train_dir = os.path.join(base_dir, 'Training')

In [None]:
train_img_count = sum(len(files) for _, _, files in os.walk(train_dir))
test_img_count = sum(len(files) for _, _, files in os.walk(test_dir))

print(f'Number of files in the training folder: {train_img_count}')
print(f'Number of files in the test folder: {test_img_count}\n')

nr_classes = sorted((f for f in os.listdir(train_dir) if not f.startswith(".")), key=str.lower)

print(f'Number of classes: {len(nr_classes)}')
print(f'Names of classes: {nr_classes}')

In [None]:
train_dir = pathlib.Path(train_dir)
test_dir = pathlib.Path(test_dir)

fruit6 = ['Kiwi', 'Pear', 'Peach', 'Avocado', 'Blueberry', 'Tomato 3']

plt.figure(figsize=(10, 10))

for i,fruit in enumerate (fruit6):    
    ax = plt.subplot(3, 3, i + 1)
    
    fruit_list = list(train_dir.glob(fruit + '/*'))
    
    plt.imshow(Image.open(str(fruit_list[i])))
    plt.title(fruit6[i])
    plt.axis("off")    

In [None]:
# Loading the dataset

def load_fruits360(path):
    data_loading = load_files(path)
    files = np.array(data_loading['filenames'])
    target_fruits = np.array(data_loading['target'])
    target_labels_fruits = np.array(data_loading['target_names'])
    
    return files, target_fruits, target_labels_fruits

X_train, y_train, target_labels = load_fruits360(train_dir)
X_test, y_test, _ = load_fruits360(test_dir)

In [None]:
print(f'Number of classes: {len(np.unique(y_train))}')

X_train.shape
y_train.shape
X_test.shape
y_test.shape

In [None]:
# Splitting the test set into validation and test set (ratio 1/2)

#X_test, X_valid = X_test[11344:],X_test[:11344]
#y_test, y_vaild = y_test[11344:],y_test[:11344]

# Splitting the training set into validation and training set (10000 validation)

X_train, X_valid, y_train, y_valid  = train_test_split(X_train, y_train, test_size = 0.20, random_state = 1)

In [None]:
def convert_image_to_nparray(files):
    images = []
    for file in files:
        images.append(asarray(Image.open(file)))
    return images

X_train = np.array(convert_image_to_nparray(X_train))
print(f'Training set shape: {X_train.shape}')

X_valid = np.array(convert_image_to_nparray(X_valid))
print(f'Validation set shape: {X_valid.shape}')

X_test = np.array(convert_image_to_nparray(X_test))
print(f'Test set shape: {X_test.shape}')


In [None]:
# Nr classes, image height and width

num_classes = len(target_labels)

img_height = 100
img_width = 100

In [None]:
# Create the model 

model = Sequential([
  layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Dropout(0.2),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes)
])

In [None]:
# Compile the model

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
# Model summary

model.summary()

In [None]:
# Train the model

history = model.fit(
  X_train, # Train images
  y_train, # Train labels
  batch_size = 32,
  epochs = 10,
  validation_data = (X_valid, y_valid), # Validation images & labels
)


In [None]:
# Accuracy on test set

accuracy = model.evaluate(X_test, y_test) # Test the model
print(f'Test accuracy: {accuracy[1]:.3f}')

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

epochs_range = range(10)

plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

In [None]:
# Save model

model.save('../models/cnn-split-train-val')

In [None]:
# Convert the model to tflite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the model
with open('models.tflite', 'wb') as f:
  f.write(tflite_model)

In [None]:
# Predictions

# model = keras.models.load_model('../models/cnn') # Load model

predictions = model.predict(X_test)

In [None]:
# Plot random images with predictions scores

fig = plt.figure(figsize=(16, 9))
for i, idx in enumerate(np.random.choice(X_test.shape[0], size=16, replace=False)):
    ax = fig.add_subplot(4, 4, i + 1, xticks=[], yticks=[])
    ax.imshow(np.squeeze(X_test[idx]))
    pred_idx = np.argmax(predictions[idx])
    score = tf.nn.softmax(predictions[idx])
    ax.set_title("{}, Confidence: {:.2f}".format(target_labels[pred_idx], 100 * np.max(score)))

In [None]:
# Data augmentation to avoid overfitting -> create new neural network (add dropout layer)

data_augmentation = keras.Sequential(
  [
    layers.RandomFlip("horizontal_and_vertical",
                      input_shape=(img_height,
                                  img_width,
                                  3)),
    layers.RandomRotation(0.2),
    layers.RandomZoom(0.2),
  ]
)

model = Sequential([
  data_augmentation,
  layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Dropout(0.2),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes, name="outputs")
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.summary()

In [None]:
# Increase epochs - training stops 

history = model.fit(
  X_train,
  y_train,
  batch_size = 32,
  epochs = 10,
  validation_data = (X_valid, y_valid),
  verbose = 1 
)


In [None]:
model.save('../models/cnn-data-augmented') # Save model