# Multiclass image classification

## Get the data

In [None]:
import zipfile
import urllib.request
from os.path import exists

if (not exists("./datasets/10_food_classes_all_data.zip")):
    print("Downloading archive...")
    urllib.request.urlretrieve("https://storage.googleapis.com/ztm_tf_course/food_vision/10_food_classes_all_data.zip", "./datasets/10_food_classes_all_data.zip")
else:
    print("Archive already downloaded, unzipping...")

zip_ref = zipfile.ZipFile("datasets/10_food_classes_all_data.zip")
zip_ref.extractall("./datasets")
zip_ref.close()

In [None]:
import os

# Walk thru 10 classes of food image data

for dirpath, dirnames, filenames in os.walk("datasets/10_food_classes_all_data"):
    print(f"There are {len(dirnames)} directories and {len(filenames)} images in {dirpath}")

In [None]:
# Setup train and test directories

train_dir = "datasets/10_food_classes_all_data/train/"
test_dir = "datasets/10_food_classes_all_data/test/"

In [None]:
# Get the subdirectories (the class names)

import pathlib
import numpy as np

data_dir = pathlib.Path(train_dir)
class_names = np.array(sorted([item.name for item in data_dir.glob("*")]))

print(class_names)

In [None]:
# Visualize the data

from image import view_random_image
import random
import matplotlib.pyplot as plt

cls = random.choice(class_names)
img = view_random_image(target_dir=train_dir, target_class=cls)
plt.imshow(img)
plt.axis(False)
plt.title(f"{cls} {img.shape}")



## Preprocess the data

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1/255)
test_datagen = ImageDataGenerator(rescale=1/255)

train_data = train_datagen.flow_from_directory(train_dir, target_size=(244, 244), batch_size=32, class_mode="categorical")
test_data = train_datagen.flow_from_directory(test_dir, target_size=(244, 244), batch_size=32, class_mode="categorical")

In [None]:
# Create a baseline

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, MaxPool2D, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy

model_1 = Sequential([
    Conv2D(10, 3, activation="relu", input_shape=(244, 244, 3)),
    Conv2D(10, 3, activation="relu"),
    MaxPool2D(),
    Conv2D(10, 3, activation="relu"),
    Conv2D(10, 3, activation="relu"),
    MaxPool2D(),
    Flatten(),
    Dense(10, activation="softmax") # Softmax for multi class problem
])

model_1.compile(loss=CategoricalCrossentropy(), metrics=["accuracy"], optimizer=Adam())

history_1 = model_1.fit(train_data, epochs=5, steps_per_epoch=len(train_data), validation_data=test_data, validation_steps=len(test_data))

## Evaluate the model

In [None]:
model_1.evaluate(test_data)

In [None]:
from plot import plot_loss_curve

plot_loss_curve(history_1)

It's shit right now because it is overfitting the train data. Improve it.

## Improve the model

In [None]:
# Begin by shuffling train and test sets

train_datagen = ImageDataGenerator(rescale=1/255)
test_datagen = ImageDataGenerator(rescale=1/255)

train_data = train_datagen.flow_from_directory(train_dir, target_size=(244, 244), batch_size=32, class_mode="categorical", shuffle=True)
test_data = train_datagen.flow_from_directory(test_dir, target_size=(244, 244), batch_size=32, class_mode="categorical", shuffle=True)

In [None]:
# Simplify the model

# Create a baseline

model_2 = Sequential([
    Conv2D(10, 3, activation="relu", input_shape=(244, 244, 3)),
    MaxPool2D(),
    Conv2D(10, 3, activation="relu"),
    MaxPool2D(),
    Flatten(),
    Dense(10, activation="softmax") # Softmax for multi class problem
])

model_2.compile(loss=CategoricalCrossentropy(), metrics=["accuracy"], optimizer=Adam())

history_2 = model_2.fit(train_data, epochs=5, steps_per_epoch=len(train_data), validation_data=test_data, validation_steps=len(test_data))

In [None]:
model_2.evaluate(test_data)

Still shit and still overfitting

## Augmented data

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1/255,
    rotation_range=0.2,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)
test_datagen = ImageDataGenerator(
    rescale=1/255,
    rotation_range=0.2,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

train_data_augmented = train_datagen.flow_from_directory(
    train_dir,
    target_size=(244, 244),
    batch_size=32,
    class_mode="categorical",
    shuffle=True
)
test_data_augmented = train_datagen.flow_from_directory(
    test_dir,
    target_size=(244, 244),
    batch_size=32,
    class_mode="categorical",
    shuffle=True
)

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, MaxPool2D, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy

model_3 = Sequential([
    Conv2D(10, 3, activation="relu", input_shape=(244, 244, 3)),
    MaxPool2D(),
    Conv2D(10, 3, activation="relu"),
    MaxPool2D(),
    Flatten(),
    Dense(10, activation="softmax") # Softmax for multi class problem
])
model_3.compile(loss=CategoricalCrossentropy(), metrics=["accuracy"], optimizer=Adam())

model_3.fit(
    train_data_augmented,
    epochs=5,
    steps_per_epoch=len(train_data_augmented),
    validation_data=test_data,
    validation_steps=len(test_data)
)

In [None]:
model_3.evaluate(test_data_augmented)