# Multiclass classification of stork nest images

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/sschmutz/stork-net/blob/master/scripts/06_multiclass-classification.ipynb)  
To use Google Colab, click on the link above and then change the Runtime type to Python 3 under "Runtime" - "Change runtime type". And for faster computation select GPU under "Hardware accelerator".

Code is adapted from the [TensorFlow Tutorial on Image classification](https://www.tensorflow.org/tutorials/images/classification).

The goal is to classify images of a stork nest in four categories, how many storks are present (0-3). The images were collected from a publicly available [webcam](https://www.berner-storch.ch/webcam/) and manually labeled.

In [None]:
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import os
import pathlib
import numpy as np
import matplotlib.pyplot as plt

## Load images
Labeled images are already split and [available on GitHub](https://github.com/sschmutz/stork-net-dataset).
The full dataset will be downloaded with the following comand, this enables us to use this notebook in google colab.

In [None]:
data_dir = tf.keras.utils.get_file(origin="https://github.com/sschmutz/stork-net-dataset/archive/master.zip", fname="stork-net-dataset-master.zip", extract=True)
data_dir = pathlib.Path(os.path.splitext(data_dir)[0])

train_dir = pathlib.Path(data_dir, "2019_train", "train")
validation_dir = pathlib.Path(data_dir, "2019_train", "validation")
test_dir = pathlib.Path(data_dir, "2019_test")

In [None]:
num_0_stork_train = len(list(train_dir.glob("0_stork/*.jpg")))
num_1_stork_train = len(list(train_dir.glob("1_stork/*.jpg")))
num_2_stork_train = len(list(train_dir.glob("2_stork/*.jpg")))
num_3_stork_train = len(list(train_dir.glob("3_stork/*.jpg")))

num_0_stork_val = len(list(validation_dir.glob("0_stork/*.jpg")))
num_1_stork_val = len(list(validation_dir.glob("1_stork/*.jpg")))
num_2_stork_val = len(list(validation_dir.glob("2_stork/*.jpg")))
num_3_stork_val = len(list(validation_dir.glob("3_stork/*.jpg")))

num_0_stork_test = len(list(test_dir.glob("0_stork/*.jpg")))
num_1_stork_test = len(list(test_dir.glob("1_stork/*.jpg")))
num_2_stork_test = len(list(test_dir.glob("2_stork/*.jpg")))
num_3_stork_test = len(list(test_dir.glob("3_stork/*.jpg")))

total_train = len(list(train_dir.glob("*/*.jpg")))
total_val = len(list(validation_dir.glob("*/*.jpg")))
total_test = len(list(test_dir.glob("*/*.jpg")))

class_names = np.array([item.name for item in train_dir.glob("*")])

In [None]:
# I'm not sure if the numbers have to be divisible by the batch size.
batch_size = 64 
epochs = 15
img_height = 480
img_width = 640
channels = 3 #set to 1 if greyscale is used

Data augmentation can be defined already inside ***ImageDataGenerator()***, see the respective section on the [keras website](https://keras.io/api/preprocessing/image/).  
I've tried data augmentation (if applied, only do it on the training data) as described in [this tutorial](https://www.tensorflow.org/tutorials/images/classification). It didn't improve the model.

In [None]:
# The 1./255 is to convert from uint8 to float32 in range [0,1]
train_image_generator = ImageDataGenerator(rescale=1./255)
validation_image_generator = ImageDataGenerator(rescale=1./255)
test_image_generator = ImageDataGenerator(rescale=1./255)

Should we change the color-images to grayscale? This way one can maybe use images from the infrared camera at night.
This could be done in ***flow_from_directory()***, just define following parameter: ***color_mode="grayscale"*** (default is "rgb").  
I've tried doing this. Didn't really improve the model.

If we use a multiclass-classification problem, we can define ***class_mode="categorical"***. Labels will be automatically be 2D one-hot encoded.

In [None]:
train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
                                                           directory=train_dir,
                                                           shuffle=True,
                                                           target_size=(img_height, img_width),
                                                           class_mode="categorical",
                                                           classes = list(class_names))

In [None]:
val_data_gen = validation_image_generator.flow_from_directory(batch_size=batch_size,
                                                              directory=validation_dir,
                                                              shuffle=True,
                                                              target_size=(img_height, img_width),
                                                              class_mode="categorical",
                                                              classes = list(class_names))

In [None]:
test_data_gen = test_image_generator.flow_from_directory(batch_size=batch_size,
                                                         directory=test_dir,
                                                         shuffle=True,
                                                         target_size=(img_height, img_width),
                                                         class_mode="categorical",
                                                         classes = list(class_names))

In [None]:
sample_training_images, sample_training_labels = next(train_data_gen)
sample_validation_images, sample_validation_labels = next(val_data_gen)
sample_test_images, sample_test_labels = next(test_data_gen)

# decode one-hot encoded labels
sample_training_labels = tf.argmax(sample_training_labels, axis=1)
sample_validation_labels = tf.argmax(sample_validation_labels, axis=1)
sample_test_labels = tf.argmax(sample_test_labels, axis=1)


plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(sample_training_images[i], cmap=plt.cm.binary)
    plt.xlabel(class_names[sample_training_labels[i]])
plt.show()

## Create and train model

In [None]:
# added dropout of 10% to first and last max pool layer

model = Sequential([
    Conv2D(16, 3, padding="same", activation="relu", input_shape=(img_height, img_width, channels)),
    MaxPooling2D(),
    Dropout(0.1),
    Conv2D(32, 3, padding="same", activation="relu"),
    MaxPooling2D(),
    Conv2D(64, 3, padding="same", activation="relu"),
    MaxPooling2D(),
    Dropout(0.1),
    Flatten(),
    Dense(64, activation="relu"),
    Dense(4, activation="softmax")
])

In [None]:
model.compile(optimizer="adam",
              loss="categorical_crossentropy",
              metrics=["accuracy"])

In [None]:
model.summary()

In [None]:
history = model.fit(
    train_data_gen,
    steps_per_epoch=total_train // batch_size,
    epochs=epochs,
    validation_data=val_data_gen,
    validation_steps=total_val // batch_size
)

In [None]:
# save model
model.save("stork_net_v2.h5")

In [None]:
acc = history.history["accuracy"]
val_acc = history.history["val_accuracy"]

loss=history.history["loss"]
val_loss=history.history["val_loss"]

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label="Training Accuracy")
plt.plot(epochs_range, val_acc, label="Validation Accuracy")
plt.legend(loc="lower right")
plt.title("Training and Validation Accuracy")

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label="Training Loss")
plt.plot(epochs_range, val_loss, label="Validation Loss")
plt.legend(loc="upper right")
plt.title("Training and Validation Loss")
plt.show()

In [None]:
test_loss, test_acc = model.evaluate(sample_test_images,  sample_test_labels, verbose=2)

print('\nTest loss:', test_loss)
print('\nTest accuracy:', test_acc)

Make predictions using the trained model. Here it's only done on the test data.

In [None]:
predictions = model.predict(sample_test_images)

# decode one-hot encoded labels
predictions = tf.argmax(predictions, axis=1)

In [None]:
print("Predictions:", predictions)
print("Labels:", sample_test_labels)

In [None]:
plt.figure(figsize=(10,10))
n_misclassified = 0

for i in range(64):
    prediction = class_names[predictions[i]]
    label = class_names[sample_test_labels[i]]

    if prediction != label:
        n_misclassified +=1
        plt.subplot(5,5,n_misclassified)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(sample_test_images[i], cmap=plt.cm.binary)
        plt.xlabel("prediction: %s \n label: %s" % (prediction, label))
    else:
        continue

plt.show()