# Deep Fruits - Image Recognition

### General Modules

In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline
import glob
import cv2
import os

## Load Training and Validation Data

In [2]:
# Data downloaded from:
# https://www.kaggle.com/moltean/fruits/data
# unziped and "fruits-360" folder placed on root (same folder as this notebook)

# Define paths to both training and validation data
train_path = 'fruits-360/Training/*'
valid_path = 'fruits-360/Validation/*'

In [5]:
def loadData(path):
    ''' Load the images and labels to raw numpy arrays
    Args: 
        path (str), path to folder to parse
        images (list), empty list where array of images will be stored
        labels (list), empty list where lables will be stored
    return:
        images (list, numpy array), contains the images loaded
        labels (list, numpy array), contains the labels for each image
    '''
    
    images = []
    labels = []
    
    # Count the unique labels in the set
    unique_labels = []
    
    for folder in glob.glob(path):
        label = folder.split('/')[-1]
        
        for file in glob.glob(os.path.join(folder, '*.jpg')):
            image = cv2.imread(file, cv2.IMREAD_COLOR)
            # imread loads an image from the specified file and returns it
            image = cv2.resize(image, (100, 100))
            # resize resizes the image to the specified size
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            # converts an input image from one color space to another
            
            images.append(image)
            labels.append(label)
        
        # Append to unique labels in the set
        unique_labels.append(label)
    
    images = np.array(images)
    labels = np.array(labels)
    
    print('%s has %s unique labels, as follows %s' %(path, len(unique_labels), unique_labels))
    print(' ')
    print('%s contains %s images and %s labels' %(path, len(images), len(labels)))
    
    return images, labels

In [6]:
# TRAINING Data labels and description
train_images, train_labels = loadData(train_path)

fruits-360/Training/* has 60 unique labels, as follows ['Training\\Apple Braeburn', 'Training\\Apple Golden 1', 'Training\\Apple Golden 2', 'Training\\Apple Golden 3', 'Training\\Apple Granny Smith', 'Training\\Apple Red 1', 'Training\\Apple Red 2', 'Training\\Apple Red 3', 'Training\\Apple Red Delicious', 'Training\\Apple Red Yellow', 'Training\\Apricot', 'Training\\Avocado', 'Training\\Avocado ripe', 'Training\\Banana', 'Training\\Banana Red', 'Training\\Cactus fruit', 'Training\\Carambula', 'Training\\Cherry', 'Training\\Clementine', 'Training\\Cocos', 'Training\\Dates', 'Training\\Granadilla', 'Training\\Grape Pink', 'Training\\Grape White', 'Training\\Grape White 2', 'Training\\Grapefruit Pink', 'Training\\Grapefruit White', 'Training\\Guava', 'Training\\Huckleberry', 'Training\\Kaki', 'Training\\Kiwi', 'Training\\Kumquats', 'Training\\Lemon', 'Training\\Lemon Meyer', 'Training\\Limes', 'Training\\Litchi', 'Training\\Mandarine', 'Training\\Mango', 'Training\\Maracuja', 'Training\\

In [7]:
# VALIDATION Data labels and description
valid_images, valid_labels = loadData(valid_path)

fruits-360/Validation/* has 60 unique labels, as follows ['Validation\\Apple Braeburn', 'Validation\\Apple Golden 1', 'Validation\\Apple Golden 2', 'Validation\\Apple Golden 3', 'Validation\\Apple Granny Smith', 'Validation\\Apple Red 1', 'Validation\\Apple Red 2', 'Validation\\Apple Red 3', 'Validation\\Apple Red Delicious', 'Validation\\Apple Red Yellow', 'Validation\\Apricot', 'Validation\\Avocado', 'Validation\\Avocado ripe', 'Validation\\Banana', 'Validation\\Banana Red', 'Validation\\Cactus fruit', 'Validation\\Carambula', 'Validation\\Cherry', 'Validation\\Clementine', 'Validation\\Cocos', 'Validation\\Dates', 'Validation\\Granadilla', 'Validation\\Grape Pink', 'Validation\\Grape White', 'Validation\\Grape White 2', 'Validation\\Grapefruit Pink', 'Validation\\Grapefruit White', 'Validation\\Guava', 'Validation\\Huckleberry', 'Validation\\Kaki', 'Validation\\Kiwi', 'Validation\\Kumquats', 'Validation\\Lemon', 'Validation\\Lemon Meyer', 'Validation\\Limes', 'Validation\\Litchi', '

In [None]:
# Is the Data balanced?
plt.subplot(121)
plt.hist(train_labels, bins=60)
plt.title('Labels on Training Data')
plt.xlabel('Labels')
plt.ylabel('Occurences')
plt.subplot(122)
plt.hist(valid_labels, bins=60)
plt.title('Labels on Validation Data')
plt.show()

In [None]:
# Show a random TRAINING image and its label
rmd=np.random.randint(0,len(train_images))
plt.title(train_labels[rmd])
plt.imshow(np.asarray(train_images[rmd],dtype="uint8"),interpolation="bicubic")

In [None]:
# Show a random VALIDATION image and its label
rmd=np.random.randint(0,len(valid_images))
plt.title(valid_labels[rmd])
plt.imshow(np.asarray(valid_images[rmd],dtype="uint8"),interpolation="bicubic")

In [None]:
# Training and validation data distribution
labels = 'Training', 'Validation'
sizes = [len(train_images), len(valid_images)]

fig1, ax1 = plt.subplots()
plt.title('Training vs Validation data distribution')
ax1.pie(sizes, labels=labels, autopct='%1.1f%%', shadow=True)
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()

## Training and Validation Data Normalization

In [None]:
#ToDo

### Convert to one hot
There are 60 different classes (labels) possible

In [None]:
def convertToOneHot(vector, num_classes=None):
    result = np.zeros((len(vector), num_classes), dtype='int32')
    result[np.arange(len(vector)), vector] = 1
    return result

In [None]:
train_labels[2000]

In [None]:
# Need to first convert the list of string labels to an int for the label, i.e. from 0 to 59
# Something like results = list(map(int, results))? or a loop?

In [None]:
train_labels_onehot = convertToOneHot(train_labels, num_classes=59)

## Define Network

### Network Modules

In [None]:
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras import backend as K
from keras.layers.normalization import BatchNormalization

### Layers

In [None]:
# Variables
'Training input has %s, training labels has %s' %(train_images.shape, train_labels.shape)

In [None]:
# Input shapes
inp_col = train_images.shape[1]
inp_row = train_images.shape[2]
inp_cha = 3
# Variables are train_images, train_labels and valid_images, valid_labels

In [None]:
'Input shape is %s cols by %s rows with %s channels' %(inp_col, inp_row, inp_cha)

### Model 1: First run with a Fully Connected

In [None]:
# Image flattening for the FC network

train_flatten=np.zeros([len(train_images), 45*45*3])
for i in range(0, len(train_images)):
    train_flatten[i]=np.reshape(train_images[i], newshape=((45*45*3),))
    
valid_flatten=np.zeros([len(valid_images), 45*45*3])
for i in range(0, len(valid_images)):
    valid_flatten[i]=np.reshape(valid_images[i], newshape=((45*45*3),))

'After flattening, training input has %s and valisation input has %s' %(train_flatten.shape, valid_flatten.shape)

In [None]:
batch_size = 128
nb_classes = 59  # Labels
nb_epoch = 30

In [None]:
# Simple Fully Connected Network
# No normalization, no dropout

model = Sequential()
name = 'simple_fc'

model.add(Dense(300, input_dim=(inp_col*inp_row*inp_cha)))
#model.add(BatchNormalization())
model.add(Activation('relu'))
#model.add(Dropout(0.5))

model.add(Dense(300))
#model.add(BatchNormalization())
model.add(Activation('relu'))
#model.add(Dropout(0.5))

model.add(Dense(300))
#model.add(BatchNormalization())
model.add(Activation('relu'))
#model.add(Dropout(0.5))

model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

### Model 1 Summary

In [None]:
model.summary()

### Model 1: Forward Pass of untrained network

In [None]:
model.evaluate(train_flatten, train_labels)

## Train

In [None]:
#ToDo

## Evaluate