<a href="https://colab.research.google.com/github/paolanustes/thesis/blob/main/MobileNET.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install -q rasterio

In [None]:
import os
import re
import random
import pathlib
import numpy as np
import pandas as pd
import tensorflow as tf
import rasterio
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import seaborn as sns

# Import input dataset images (GeoTIFF)

In [None]:
SEED = 1009
name = 'model_x'

In [None]:
files_dams = [str(f) for f in pathlib.Path('/content/drive/MyDrive/Dam_CA_tif/').glob('*.tif')]
files_nodams = [str(f) for f in pathlib.Path('/content/drive/MyDrive/No_dam_CA_tif/').glob('*.tif')] # TODO: add no dams

files = files_dams + files_nodams
labels = [1] * len(files_dams) + [0] * len(files_nodams)

df = pd.DataFrame({ 'file': files, 'label': labels })
df = df.sample(frac=1, random_state=SEED)
df.reset_index(drop=True, inplace=True)

In [None]:
BUFFER = len(files)

In [None]:
image = rasterio.open(files[0])
band_names = image.descriptions
print(band_names)

In [None]:
## define bands to use 

UseBands = ['R', 'G', 'B', 'occurrence', 'slope']
indexBands = [band_names.index(s) for s in UseBands]

In [None]:
def _bytes_feature(value):
  """Returns a bytes_list from a string / byte."""
  if isinstance(value, type(tf.constant(0))):
    value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
  """Returns a float_list from a float / double."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [None]:
SIZE = 220

def serialize_example(path, label):
  image = rasterio.open(path)
  
  image = image.read()

  image = image[:, :SIZE, :SIZE] # crop, sometimes images are of a different size?
  image[np.isnan(image)] = 0
  image = image.astype(np.float32)

  feature = {}

  for i, band_name in zip(indexBands, UseBands):

    im = image[i]

    if band_name in ['R','G','B']: # Normalize 
      im = im / 255.

    feature[band_name] = _bytes_feature(im.tobytes())
  
  feature['label'] = _int64_feature(label)

  return tf.train.Example(features=tf.train.Features(feature=feature)).SerializeToString()

# Write TFRecord

In [None]:
# Create a dictionary describing the format
tfrecord_format = {
  'label':  tf.io.FixedLenFeature(shape=[], dtype=tf.int64),
}

for band_name in UseBands:
  tfrecord_format[band_name] = tf.io.FixedLenFeature(shape=[], dtype=tf.string)

def _parse_image_function(example):
  example = tf.io.parse_example(example, tfrecord_format)

  images = []

  for band_name in UseBands:
    image = tf.io.decode_raw(example[band_name], out_type=float)
    image = tf.reshape(image, [SIZE, SIZE])
    images.append(image)

  image = tf.stack(images, 2)

  return image, example['label']

In [None]:
!rm *.tfrec

In [None]:
for index, row in df.iloc[10:20].iterrows():
  print(row.file, row.label)

In [None]:
count=20
total=len(df)

for i, start in enumerate(tqdm(range(0, total, count))):
  stop = min(start + count, total) # make sure we're not over max number of rows
  count = stop - start # number of examples to write
  filename = f'file_dams-{i:02d}-{count:02d}.tfrec'
  
  with tf.io.TFRecordWriter(filename) as writer:
    for index, row in df.iloc[start:stop].iterrows():
      example = serialize_example(row.file, row.label)
      writer.write(example)

In [None]:
!ls

# Split and visualize training, validation, and test datasets

In [None]:
BATCH_SIZE = 32

ds = tf.data.TFRecordDataset([str(f) for f in pathlib.Path('./').glob('*.tfrec')])
ds = ds.map(_parse_image_function)

ds = ds.shuffle(buffer_size=BUFFER, seed=SEED)
ds = ds.batch(BATCH_SIZE)
ds = ds.prefetch(tf.data.AUTOTUNE)


In [None]:
# example = next(ds.as_numpy_iterator())

In [None]:
# example[0].shape

In [None]:
# N = 3
# iter = ds.as_numpy_iterator()
# fig, ax = plt.subplots(N, 3, figsize=(12, 3*N))

# for i in range(N):
#   example = next(iter)
#   image = example[0][0] #np.transpose(example[0][0], (1, 2, 0))
#   print(f'Label: {example[1][0]}')
#   ax[i,0].imshow(image[:, :, :3])
#   ax[i,0].set_title('RGB')
#   ax[i,1].imshow(image[:, :, 4], cmap='Greys')
#   ax[i,1].set_title('ndwi')
#   ax[i,2].imshow(image[:, :, 4])
#   ax[i,2].set_title('elevation');

# plt.tight_layout()
# # fig, ax = plt.subplots(1, 7, figsize=(25, 3))
# # image = np.transpose(example[0][0], (1, 2, 0))
# # print(f'Label: {example[1][0]}')
# # ax[0].imshow(image[:, :, :3] / 255.0)
# # ax[0].set_title('RGB')
# # ax[1].imshow(image[:, :, [3, 3, 1]] / 255.0)
# # ax[1].set_title('NNG')
# # ax[2].imshow(image[:, :, 5], cmap='Greys')
# # ax[2].set_title('NDWI')
# # ax[3].imshow(image[:, :, 4])
# # ax[3].set_title('occurrence')
# # ax[4].imshow(image[:, :, 6])
# # ax[4].set_title('aspect')
# # ax[5].imshow(image[:, :, 7]) 
# # ax[5].set_title('slope')
# # ax[6].imshow(image[:, :, 8])
# # ax[6].set_title('elevation');

# plt.tight_layout()

In [None]:
# Count the records
records_n = sum(1 for record in ds)
print("records_n = {}".format(records_n))

In [None]:
train_size = int(0.7 * records_n)
val_size = int(0.20 * records_n)
test_size = int(0.10 * records_n)

train_dataset = ds.take(train_size)
test_dataset = ds.skip(train_size)
val_dataset = test_dataset.take(val_size)
test_dataset = test_dataset.skip(val_size)

train_dataset = train_dataset.shuffle(buffer_size=train_size, seed=SEED)
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)

val_dataset = val_dataset.shuffle(buffer_size=val_size, seed=SEED)
val_dataset = val_dataset.prefetch(tf.data.AUTOTUNE)

test_dataset = test_dataset.shuffle(buffer_size=test_size, seed=SEED)
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)

In [None]:
# for image, label in train_dataset:
#   image, label = next(train_dataset.as_numpy_iterator())

#   plt.figure(figsize=(20, 10))
#   for i in range(32):
#     ax=plt.subplot(4, 8, i+1)
#     plt.imshow(image[i])
#     plt.title(label[i])
#     plt.axis("off")

In [None]:
# for image, label in test_dataset:
#   image, label = next(test_dataset.as_numpy_iterator())

#   plt.figure(figsize=(20, 10))
#   for i in range(32):
#     ax=plt.subplot(4, 8, i+1)
#     plt.imshow(image[i])
#     plt.title(label[i])
#     plt.axis("off")

In [None]:
# for image, label in val_dataset:
#   image, label = next(val_dataset.as_numpy_iterator())

#   plt.figure(figsize=(20, 10))
#   for i in range(32):
#     ax=plt.subplot(4, 8, i+1)
#     plt.imshow(image[i])
#     plt.title(label[i])
#     plt.axis("off")

# Model set-up

In [None]:
import keras
from keras import layers

CHANNELS = len(UseBands)
IMG_SIZE = (220, 220, CHANNELS)
NUM_CLASSES = 2

def get_model(IMG_SIZE, NUM_CLASSES):
  dense_input = tf.keras.layers.Input(shape=IMG_SIZE)
  dense_filter = tf.keras.layers.Conv2D(3, CHANNELS, padding='same')(dense_input)

  # Create the base model from the pre-trained model MobileNet V2
  base_model = tf.keras.applications.MobileNetV2(include_top=False,
                                                weights='imagenet')

  base_model.trainable = False

  global_average_layer = tf.keras.layers.GlobalAveragePooling2D()

  prediction_layer = tf.keras.layers.Dense(1, activation='sigmoid')

  inputs = dense_input
  x = base_model(dense_filter)
  x = global_average_layer(x)
  x = tf.keras.layers.Dropout(0.2)(x)
  outputs = prediction_layer(x)
  model = tf.keras.Model(inputs, outputs)

  return model

base_learning_rate = 0.001
 
# Free up RAM in case the model definition cells were run multiple times
keras.backend.clear_session()

# # Build model

model = get_model(IMG_SIZE, NUM_CLASSES)

model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True, label_smoothing=0.01),
              metrics=['accuracy'])
 
model.summary()

In [None]:

checkpoint_path = f"/content/drive/MyDrive/Thesis/SA/{name}/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = [
              #  tf.keras.callbacks.EarlyStopping(patience=5),
               tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1),
               tf.keras.callbacks.CSVLogger(f"/content/drive/MyDrive/Thesis/Results/{name}.csv", separator=",", append=False)
]


In [None]:
epochs=150
# model.load_weights("/content/drive/MyDrive/Thesis/Results/MbNET_t4/cp.ckpt")

history = model.fit(
                    train_dataset,
                    validation_data=val_dataset,
                    epochs=epochs,
                    callbacks= [cp_callback]
                    )


## Learning curves

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(10, 5), dpi=200)
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training')
plt.plot(epochs_range, val_acc, label='Validation')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim(0, 1)

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training')
plt.plot(epochs_range, val_loss, label='Validation')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.ylim(0, 0.8)

plt.savefig(f'/content/drive/MyDrive/Thesis/Graphs/{name}.png')
plt.tight_layout()
plt.show()


## Model predictions

In [None]:
cm_predictions = []
cm_correct_labels = []

for image_batch, labels_batch in test_dataset:

  #Retrieve a batch of images from the test set
  image_batch, label_batch = test_dataset.as_numpy_iterator().next()
  predictions = model.predict_on_batch(image_batch).flatten()

  # Apply a sigmoid since our model returns logits
  # predictions = tf.nn.sigmoid(predictions)
  
  predictions = tf.where(predictions < 0.5, 0, 1)
  predictions = predictions.numpy()
  cm_predictions.append(predictions)

  cm_correct_labels.append(label_batch)

  plt.figure(figsize=(20, 10))
  for i in range(BATCH_SIZE):
    ax = plt.subplot(4, 8, i + 1)
    image = image_batch[i]
    plt.imshow(image[:, :, :3])
    correct = (predictions[i] == label_batch[i])
    title = (r'Pred: {} Label: [{}] '.format(predictions[i], label_batch[i]))
    plt.title(title, fontsize=12, color='red' if correct == False else 'black')
    plt.axis("off")

cm_predictions = np.asarray(cm_predictions).flatten()
cm_correct_labels = np.asarray(cm_correct_labels).flatten()

print("Correct   labels: \n", cm_correct_labels.shape, cm_correct_labels)

print('Predictions:\n', cm_predictions.shape, cm_predictions)

In [None]:
# confusion matrix
cf_matrix = confusion_matrix(cm_correct_labels, cm_predictions)

group_names = ['True Neg','False Pos','False Neg','True Pos']
group_counts = ["{0:0.0f}".format(value) for value in
                cf_matrix.flatten()]
group_percentages = ["{0:.2%}".format(value) for value in
                     cf_matrix.flatten()/np.sum(cf_matrix)]
labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in
          zip(group_names,group_counts,group_percentages)]
labels = np.asarray(labels).reshape(2,2)
sns.heatmap(cf_matrix, annot=labels, fmt='', cmap='Blues')

plt.savefig(f'/content/drive/MyDrive/Thesis/SA/{name}-cf.png', dpi=200)

# outcome values order in sklearn
tn, fp, fn, tp = confusion_matrix(cm_correct_labels, cm_predictions).reshape(-1)
print('Outcome values : \n', 'TP :', tp, '\n FN :', fn, '\n FP :', fp, '\n TN :', tn)

# classification report for precision, recall f1-score and accuracy
matrix = classification_report(cm_correct_labels, cm_predictions)
print('Classification report : \n',matrix)


report = pd.DataFrame(classification_report(cm_correct_labels, cm_predictions, 
                                            output_dict=True)).transpose()
report.to_csv(f'/content/drive/MyDrive/Thesis/Graphs/{name}-report.csv')