In [None]:
# Import Libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import PIL
import tensorflow as tf

import pathlib # To import dataset and working with paths

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [None]:
# Get the dataset from the folder
data_url = '/home/kayd/cs/projects/gho-kira-detection-ml/gho-kira-images'
data_dir = pathlib.Path(data_url)

image_count = len(list(data_dir.glob('*/*')))
print('Image count: ', image_count)

In [None]:
# Some data
kiras = list(data_dir.glob('kira/*'))
# print(kiras)
PIL.Image.open(kiras[4])

### To-Do
- [] Research on what batch size I need to use
- [] Research on what image size I need to put

In [None]:
# Load data using keras utility

# Parameters
batch_size = 14
img_height = 240
img_width = 240

# Use validation split - 80/20
train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split = 0.2,
    seed = 111,
    subset = 'training', 
    image_size = (img_height, img_width),
    batch_size = batch_size
)

In [None]:
# Loading validation set
val_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split = 0.2,
    subset = 'validation',
    seed = 111,
    image_size = (img_height, img_width),
    batch_size = batch_size
)

In [None]:
class_names = train_ds.class_names
print('Class names: ', class_names)

# Visualize Dataset

In [None]:
plt.figure(figsize= (10, 10))

for images, label in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype('uint8'))
        plt.title(class_names[label[i]])
        plt.axis('off')

In [None]:
# Manually iterate and retrieve batches of image
for image_batch, labels_batch in train_ds:
    print('Image batch: ', image_batch.shape)
    print('Lables: ', label.shape)
    break

In [None]:
# Configure dataset for performace
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

# Standardize the data

Two ways to approach:
1. Implement in the dataset
2. Do it on the fly - we will use this approach

Example if we did the first approach:
```python
normalization_layer = layers.Rescaling(1./255)

normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0]
# Notice the pixel values are now in `[0,1]`.
print(np.min(first_image), np.max(first_image))
# Output: 0.0 1.0
```

# Create the Model

In [None]:
num_classes = len(class_names)

model = tf.keras.Sequential([
    layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
    layers.Conv2D(16, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes)
])

# Compile the Model

In [None]:
model.compile(
    optimizer='adam',
    loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

model.summary()

In [None]:
epochs=100
history = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=epochs
)

In [None]:
print(type(history))

# Visualize Training Results

The output here will tend towards overfitting as the sample size is low.
So, we use data augmentation and other methods to train and get better results
After we train the new model, we again visualize it.

Finally, we will create a function where, the new images can be fed in and the model will predict.

** To-Do: **
- [] Research Visualising Data
- [] Correct the error


In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()


# Data Augmentation

** To-Do **
- [] Add more augmentation like changing color and stuffs

In [None]:
data_augmentation = keras.Sequential([
    layers.RandomFlip('horizontal', 
                        input_shape=(img_height, img_width, 3)),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1)
])

# data_augmentation = keras.Sequential([
#     layers.RandomFlip('horizontal', input_shape=(img_height, img_width, 3)),
#     layers.RandomRotation(0.1),
#     layers.RandomZoom(0.1),
#     layers.RandomTranslation(0.2, 0.2),
#     layers.RandomHeight(0.1),
#     layers.RandomWidth(0.1),
#     layers.RandomContrast(0.5)
# ])

# data_augmentation = keras.Sequential([
#     layers.RandomFlip('horizontal', 
#                         input_shape=(img_height, img_width, 3)),
#     layers.RandomRotation(0.1),
#     layers.RandomZoom(0.1),
#     layers.RandomTranslation(0.2, 0.2),
#     layers.RandomHeight(0.1),
#     layers.RandomWidth(0.1),
#     layers.RandomContrast(0.5)
# ])

In [None]:
# Visualize few augmented images
plt.figure(figsize=(10, 10))
for images, _ in train_ds.take(1):
    for i in range(9):
        augmented_images = data_augmentation(images)
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(augmented_images[0].numpy().astype('uint8'))
        plt.axis('off')

# Dropout

In [None]:
model = tf.keras.Sequential([
    data_augmentation,
    layers.Rescaling(1./255),
    layers.Conv2D(16, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Dropout(0.2),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes)
])

# Compile and Train the Model

In [None]:
model.compile(
    optimizer = 'adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'])

model.summary()

# Train the Model

In [None]:
epochs=10
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs,
    verbose=2
)

# Visulaizing Training Results

** To-Do ** 
- [] same as before. Add the visualization logic.

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

# Predict New Data

In [None]:
# new_data_url = '/home/kayd/cs/projects/gho-kira-detection-ml/gho-kira-images/test-tibetan-1.jpg'
# new_data_path = pathlib.Path(new_data_url)

# img = tf.keras.utils.load_img(
#     new_data_path, target_size = (img_width, img_height)
# )

# img_array = tf.keras.utils.img_to_array(img)
# img_array = tf.expand_dims(img_array, 0) # Creating a batch

# predictions = model.predict(img_array)
# score = tf.nn.softmax(predictions[0])

# print(
#     "This image most likely belongs to {} with a {:.2f} percent confidence."
#     .format(class_names[np.argmax(score)], 100 * np.max(score))
# )

def predict_image(url):
    new_data_url = url
    new_data_path = pathlib.Path(new_data_url)

    img = tf.keras.utils.load_img(
        new_data_path, target_size = (img_width, img_height)
    )

    img_array = tf.keras.utils.img_to_array(img)
    img_array = tf.expand_dims(img_array, 0) # Creating a batch

    predictions = model.predict(img_array)
    score = tf.nn.softmax(predictions[0])

    print(
        "This image most likely belongs to {} with a {:.2f} percent confidence."
        .format(class_names[np.argmax(score)], 100 * np.max(score))
    )

test_gho1 = '/home/kayd/cs/projects/gho-kira-detection-ml/gho-kira-images/test-gho-1.jpg'
test_gho2 = '/home/kayd/cs/projects/gho-kira-detection-ml/gho-kira-images/test-gho-2.jpeg'
test_gho3 = '/home/kayd/cs/projects/gho-kira-detection-ml/gho-kira-images/test-gho-3.jpg'
test_gho4 = ''

test_kira1 = '/home/kayd/cs/projects/gho-kira-detection-ml/gho-kira-images/test-kira-1.jpg'
test_kira2 = '/home/kayd/cs/projects/gho-kira-detection-ml/gho-kira-images/test-kira-2.jpeg'
test_kira3 = '/home/kayd/cs/projects/gho-kira-detection-ml/gho-kira-images/test-kira-3.jpg'
test_kira4 = '/home/kayd/cs/projects/gho-kira-detection-ml/gho-kira-images/test-kira-4.jpg'

predict_image(test_gho1)
predict_image(test_gho2)
predict_image(test_gho3)

predict_image(test_kira1)
predict_image(test_kira2)
predict_image(test_kira3)
predict_image(test_kira4)