# Setup

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals


import os
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as display
from PIL import Image
import pathlib

import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
#session.close()
session = InteractiveSession(config=config)

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [None]:
tf.__version__

In [None]:
tf.compat.v2.test.is_built_with_cuda()

## load dataset

In [None]:
data_dir = pathlib.Path('ressources/mapillary_raw/labeled/')
DATASET_SIZE = len(list(data_dir.glob('*/*.jpg')))
DATASET_SIZE

In [None]:
#data_dir_validation = pathlib.Path('ressources/mapillary_raw/labeled/')
#image_count_val = len(list(data_dir_validation.glob('*/*.jpg')))
#image_count_val

In [None]:
CLASS_NAMES = np.array([item.name for item in data_dir.glob('*') if ((item.name != "LICENSE.txt") & (item.name != ".DS_Store"))])
CLASS_NAMES

In [None]:
for path in CLASS_NAMES:
    print(path + ": " + str(len(list(data_dir.glob(path+'/*.jpg')))))

In [None]:
dirt = list(data_dir.glob('asphalt/*'))

for image_path in dirt[:3]:
    display.display(Image.open(str(image_path)))


In [None]:
BATCH_SIZE = 32
IMG_HEIGHT = 224
IMG_WIDTH = 224

IMG_HEIGHT = 160
IMG_WIDTH = 160
STEPS_PER_EPOCH = np.ceil(DATASET_SIZE/BATCH_SIZE)


In [None]:
list_ds = tf.data.Dataset.list_files(str(data_dir/'*/*'))
#list_ds_val = tf.data.Dataset.list_files(str(data_dir_validation/'*/*'))


In [None]:
train_size = int(0.7 * DATASET_SIZE)
val_size = int(0.15 * DATASET_SIZE)
test_size = int(0.15 * DATASET_SIZE)

full_dataset = list_ds
full_dataset = full_dataset.shuffle(DATASET_SIZE)
train_dataset = full_dataset.take(train_size)
test_dataset = full_dataset.skip(train_size)
val_dataset = test_dataset.skip(val_size)
test_dataset = test_dataset.take(test_size)

In [None]:
for f in train_dataset.take(5):
  print(f.numpy())


In [None]:
def get_label(file_path):
  # convert the path to a list of path components
  parts = tf.strings.split(file_path, '/')
  # The second to last is the class-directory
  return parts[-2] == CLASS_NAMES


def decode_img(img):
  # convert the compressed string to a 3D uint8 tensor
  img = tf.image.decode_jpeg(img, channels=3)
  # Use `convert_image_dtype` to convert to floats in the [0,1] range.
  img = tf.image.convert_image_dtype(img, tf.float32)
  # resize the image to the desired size.
  return tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT])

def process_path(file_path):
  label = get_label(file_path)
  # load the raw data from the file as a string
  img = tf.io.read_file(file_path)
  img = decode_img(img)
  return img, label

In [None]:
# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
labeled_ds = train_dataset.map(process_path, num_parallel_calls=AUTOTUNE)
labeled_ds_test = test_dataset.map(process_path, num_parallel_calls=AUTOTUNE)
labeled_ds_val = val_dataset.map(process_path, num_parallel_calls=AUTOTUNE)

for image, label in labeled_ds.take(1):
  print("Image shape: ", image.numpy().shape)
  print("Label: ", label.numpy())

In [None]:
def prepare_for_training(ds, cache=True, shuffle_buffer_size=1000):
  # This is a small dataset, only load it once, and keep it in memory.
  # use `.cache(filename)` to cache preprocessing work for datasets that don't
  # fit in memory.
  if cache:
    if isinstance(cache, str):
      ds = ds.cache(cache)
    else:
      ds = ds.cache()

  ds = ds.shuffle(buffer_size=shuffle_buffer_size)

  # Repeat forever
  ds = ds.repeat()

  ds = ds.batch(BATCH_SIZE)

  # `prefetch` lets the dataset fetch batches in the background while the model
  # is training.
  ds = ds.prefetch(buffer_size=AUTOTUNE)

  return ds


In [None]:
def prepare_for_validation(ds, cache=True, shuffle_buffer_size=1000):
  # This is a small dataset, only load it once, and keep it in memory.
  # use `.cache(filename)` to cache preprocessing work for datasets that don't
  # fit in memory.
  if cache:
    if isinstance(cache, str):
      ds = ds.cache(cache)
    else:
      ds = ds.cache()

  ds = ds.shuffle(buffer_size=shuffle_buffer_size)

  # Repeat forever
  #ds = ds.repeat()

  ds = ds.batch(BATCH_SIZE)

  # `prefetch` lets the dataset fetch batches in the background while the model
  # is training.
  ds = ds.prefetch(buffer_size=AUTOTUNE)

  return ds

In [None]:
def show_batch(image_batch, label_batch):
  plt.figure(figsize=(10,10))
  for n in range(25):
      ax = plt.subplot(5,5,n+1)
      plt.imshow(image_batch[n])
      plt.title(CLASS_NAMES[label_batch[n]==1][0].title())
      plt.axis('off')


In [None]:
train_ds = prepare_for_training(labeled_ds)
test_ds = prepare_for_validation(labeled_ds_test)
validation_ds = prepare_for_validation(labeled_ds_val)

In [None]:
image_batch, label_batch = next(iter(train_ds))

show_batch(image_batch.numpy(), label_batch.numpy())

# create model

In [None]:
print(train_ds)


In [None]:
for image_batch, label_batch in train_ds.take(1):
   pass

image_batch.shape

In [None]:
IMG_SHAPE = (IMG_HEIGHT, IMG_WIDTH, 3)

# Create the base model from the pre-trained model MobileNet V2
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')


In [None]:
from tensorflow.python.platform import build_info as tf_build_info
#print(tf_build_info.cuda_version_number)
#print(tf_build_info.cudnn_version_number)


In [None]:

feature_batch = base_model(image_batch)
print(feature_batch.shape)


In [None]:
base_model.trainable = False

In [None]:
# Let's take a look at the base model architecture
base_model.summary()

In [None]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape)

In [None]:
prediction_layer = tf.keras.layers.Dense(len(CLASS_NAMES))
prediction_batch = prediction_layer(feature_batch_average)
print(prediction_batch.shape)

In [None]:
model = tf.keras.Sequential([
  base_model,
  global_average_layer,
  prediction_layer
])


In [None]:
base_learning_rate = 0.001
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
model.summary()

## training process

In [None]:
initial_epochs = 10
steps_per_epoch = round(DATASET_SIZE)//BATCH_SIZE
validation_steps = 20

loss0,accuracy0 = model.evaluate(test_ds, steps = validation_steps)


In [None]:
print("initial loss: {:.2f}".format(loss0))
print("initial accuracy: {:.2f}".format(accuracy0))


In [None]:
history = model.fit(train_ds,
                    epochs=10,
                    validation_data=test_ds,
                    steps_per_epoch=50,
                    validation_freq=2,
                    validation_steps=5)


In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
#plt.ylim([0,2.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()


In [None]:
base_model.trainable = True

In [None]:
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))

# Fine tune from this layer onwards
fine_tune_at = 100

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_model.layers[:fine_tune_at]:
  layer.trainable =  False


In [None]:
model.compile(loss='binary_crossentropy',
              optimizer = tf.keras.optimizers.RMSprop(lr=base_learning_rate/10),
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
len(model.trainable_variables)


In [None]:
fine_tune_epochs = 20
total_epochs =  initial_epochs + fine_tune_epochs

history_fine = model.fit(train_ds,
                    epochs=fine_tune_epochs,
                    validation_data=test_ds,
                    steps_per_epoch=50,
                   # validation_freq=2,
                    validation_steps=5)


In [None]:
type(model.)

In [None]:
acc += history_fine.history['accuracy']
val_acc += history_fine.history['val_accuracy']

loss += history_fine.history['loss']
val_loss += history_fine.history['val_loss']


In [None]:
plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.ylim([0.8, 1])
plt.plot([initial_epochs-1,initial_epochs-1],
          plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
#plt.ylim([0, 1.0])
plt.plot([initial_epochs-1,initial_epochs-1],
         plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()


In [None]:
tf.saved_model.save(model, "./ressources/models/v1")

## create predictions