# 1. Install Dependencies and Setup

We now have new and improved documentation at:

https://docs.google.com/document/d/1Nbtx1lg2J6yfMFdQdcewOzdcUaUSZ7qScDBjyGNJVQE/edit?usp=sharing

^^^ Do all of this stuff first before proceeding here! ^^^

Note: The first time you do this in Visual Studio Code, it may ask you which Python environment to use. Select the one you previously initialized.

In [None]:
import tensorflow as tf
import os

In [None]:
# Avoid OOM errors by setting GPU Memory Consumption Growth
# This was in a tutorial. Not sure if it's really necessary but run just in case..

## IMPORTANT: If you are on Windows, uncomment this line: ##
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus: 
    tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
# Shows us which GPUs our system has access to. It's okay if you don't have any.
tf.config.list_physical_devices('GPU')

# 2. Check out our training set

In [None]:
import cv2
from PIL import Image

In [None]:
#TODO: Change 'data' to the name of your training set directory
# You should see a list of classes
data_dir = 'data'
os.listdir(data_dir)

# 3. Load Data

In [None]:
import numpy as np
from matplotlib import pyplot as plt

In [None]:
# This formats our data...
# TODO: Ensure the image size is kept at (100, 100)
data = tf.keras.utils.image_dataset_from_directory(data_dir, shuffle=True)

In [None]:
# Each time we call this, it gives us a new set of data
data_iterator = data.as_numpy_iterator()

In [None]:
# 32 images per batch, 100x100, 3 channels (R, G, B)
batch = data_iterator.next()
batch[0].shape

# 4. Scale Data

1. Our tensorflow model works with values between 0 and 1.
2. Our images give us pixel R, G, B values from 0-255.

Thus, we need to scale our input data down.

In [None]:
# Hint: Pixel values range from 0-255. We want to scale x to range between 0-1.
# x represents our data, and y represents our class. Therefore, we shouldn't worry about y

# TODO: Uncomment + complete the following statement:
# data = data.map(lambda x,y: (x, y))

In [None]:
# This will now give us an iterator with our SCALED data!
scaled_iterator = data.as_numpy_iterator()
batch = scaled_iterator.next()


In [None]:
# Once previous TODOs are complete, you should see 4 100x100 images here (of fruits, hopefully)
fig, ax = plt.subplots(ncols=4, figsize=(20,20))
for idx, img in enumerate(batch[0][:4]):
    ax[idx].imshow(img)
    ax[idx].title.set_text(batch[1][idx])

# 5. Include Test Data

1. Now, it's your turn. Using the '+ Code' button, complete the same steps. Except this time, with the testing directory...

**Note: for functionality, you'll only need to pattern-match some of the lines.**

# 6 Create Validation Set

Steal from our training set to form our validation set. This is not usually optimal, but should work for our purposes, especially since we've now shuffled our data when we pull from our Fruits Training directory

In [None]:
train_size = int(len(data) * 0.7)
val_size = int(len(data) * 0.3)
# Leave our test data alone

# TODO: put in the name of your test_data here
test_size = int(len(test_data))

# TODO: Make sure train_size + val_size + test_size lines up with the total size of your data...

In [None]:
# Notice how we separate the training + validation data...
train = data.take(train_size)
val = data.skip(train_size).take(val_size)
test = test_data.take(test_size)

# 7. Build Deep Learning Model
(We will get into this more the second week)

In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.regularizers import l2

In [None]:
model = Sequential()

In [None]:
# TODO: Add in all your layers here...

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
model.summary()

# 8. Train

In [None]:
logdir='logs'

In [None]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

In [None]:
# TODO: Complete the arguments for model.fit() You should now be able to train your model!
hist = model.fit()

# 9. Plot Performance

In [None]:
# Show our loss + validation loss (should be decreasing)
fig = plt.figure()
plt.plot(hist.history['loss'], color='teal', label='loss')
plt.plot(hist.history['val_loss'], color='orange', label='val_loss')
fig.suptitle('Loss', fontsize=20)
plt.legend(loc="upper left")
plt.show()

In [None]:
# Show our accuracy (should be increasing)
fig = plt.figure()
plt.plot(hist.history['accuracy'], color='teal', label='accuracy')
plt.plot(hist.history['val_accuracy'], color='orange', label='val_accuracy')
fig.suptitle('Accuracy', fontsize=20)
plt.legend(loc="upper left")
plt.show()

# 10. Throw in our own image

In [None]:
# TODO: put an image of a fruit into the local directory, and replace 'pomelo.png' with the file name

# The colors here will probably show up unexpected. This is because cv2 uses G, B, R instead of R, G, B...
# Let's fix that in the next cell
import cv2
img = cv2.imread('pomelo.png')
plt.imshow(img)
plt.show()

In [None]:
# Resize the image
resize = tf.image.resize(img, (100, 100))

# Convert GBR to RGB by reordering the channels
resize_rgb = tf.reverse(resize, axis=[-1])

# Display the corrected RGB image
plt.imshow(resize_rgb.numpy().astype(int))
plt.show()

# 11. Predict our image

In [None]:
# The following line just wraps our image in another set of parenthesis
# This allows the argmax call to work
np.expand_dims(resize_rgb, 0).shape
yhat = model.predict(np.expand_dims(resize_rgb/255, 0))

# This takes the argmax (shows us our predicted class)
predicted_class_index = np.argmax(yhat[0])
print(predicted_class_index)


In [None]:
# However, we want more than just the class number... we want its name so we can use it in the API!

# This is the (kind of) gross and inefficient way we found of doing it...
# Feel free to implement this differently if you like- perhaps by outputting the dictionary then hardcoding it

# TODO: Replace "Test" with the name of your testing or training directory

# Goes thorugh your directory, sorts each name alphabetically.
# Tensorflow will sort your directory alphabetically, so this match names to the previous cell's number
directory_names = sorted([name for name in os.listdir("Test") if os.path.isdir(os.path.join("Test", name))])

fruits_dict = {}

for index, name in enumerate(directory_names):
    fruits_dict.update({index: name})

fruit = fruits_dict[predicted_class_index]

print(fruit)

# 12. Save the Model
So we can later use it in our API!

In [None]:
from tensorflow.keras.models import load_model

# Save the model and put it into a file called fruitclassifier.keras
# After running this cell, you should have a black-box like predictor file!
model.save('fruitclassifier.keras')
new_model = load_model('fruitclassifier.keras')