# `Land Cover` Classification

Data Source: EuroSAT : Land Use and Land Cover Classification with Sentinel-2 <br>
[Download to local](https://madm.dfki.de/files/sentinel/EuroSAT.zip)

- Anomaly detection with CVAE
- Multi Convolutional Neural Network and Transfer Learning pretrained model classification

[Citation](#citation)

In [None]:
import tensorflow as tf

print(tf.__version__)
import glob
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
from google.colab import drive

drive.mount("/content/drive")

# Anomaly Detection with CVAE

## CVAE Model Constructions

In [None]:
def encoder_block(encoder_inputs_shape, latent_dim):
    inputs = tf.keras.Input(shape=encoder_inputs_shape, name='encode_input_layer')
    x = tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=2, padding='same', name='encode_conv_layer_1')(inputs)
    x = tf.keras.layers.LeakyReLU(name='encode_lrelu_1')(x)
    x = tf.keras.layers.Conv2D(filters=64, kernel_size=3, strides=2, padding='same', name='encode_conv_layer_2')(x)
    x = tf.keras.layers.LeakyReLU(name='encode_lrelu_2')(x)
    x = tf.keras.layers.Conv2D(filters=64, kernel_size=3, strides=2, padding='same', name='encode_conv_layer_3')(x)
    x = tf.keras.layers.LeakyReLU(name='encode_lrelu_3')(x)
    x = tf.keras.layers.Flatten(name='encode_flatten_layer')(x)
    mean = tf.keras.layers.Dense(latent_dim)(x)
    log_var = tf.keras.layers.Dense(latent_dim)(x)

    class Sampling(tf.keras.layers.Layer):
        def call(self, inputs):
            mean, log_var = inputs
            epsilon = tf.keras.backend.random_normal(
                shape=(tf.keras.backend.shape(mean)[0], latent_dim),
                mean=0.0,
                stddev=0.1,
            )
            return mean + tf.keras.backend.exp(log_var) * epsilon

    z = Sampling()([mean, log_var])
    encoder = tf.keras.Model(
        inputs=inputs, outputs=[mean, log_var, z], name='encoder_module'
    )
    return mean, log_var, inputs, encoder


def decoder_block(decoder_inputs_shape):
    inputs = tf.keras.Input(shape=(decoder_inputs_shape,), name='decode_input_layer')
    x = tf.keras.layers.Dense(units=8 * 8 * 64, name='decode_dense_layer')(inputs)
    x = tf.keras.layers.LeakyReLU(name='decode_lrelu_1')(x)
    x = tf.keras.layers.Reshape(target_shape=(8, 8, 64), name='decode_reshape_layer')(x)
    x = tf.keras.layers.Conv2DTranspose(filters=64,kernel_size=3,strides=2,padding='same',name='decode_conv2t_layer_1',)(x)
    x = tf.keras.layers.LeakyReLU(name='decode_lrelu_2')(x)
    x = tf.keras.layers.Conv2DTranspose(filters=64,kernel_size=3,strides=2,padding='same',name='decode_conv2t_layer_2',)(x)
    x = tf.keras.layers.LeakyReLU(name='decode_lrelu_3')(x)
    x = tf.keras.layers.Conv2DTranspose(filters=32,kernel_size=3,strides=2,padding='same',name='decode_conv2t_layer_3',)(x)
    x = tf.keras.layers.LeakyReLU(name='decode_lrelu_4')(x)
    outputs = tf.keras.layers.Conv2DTranspose(filters=3,kernel_size=3,activation='sigmoid',padding='same',name='decode_output_layer',)(x)
    decoder = tf.keras.Model(inputs=inputs, outputs=outputs, name='decoder_module')
    return outputs, decoder


def create_model(input_shape, latent_dim):

    mean, log_var, encoder_inputs, encoder = encoder_block(
        encoder_inputs_shape=input_shape, latent_dim=latent_dim
    )
    decoder_outputs, decoder = decoder_block(decoder_inputs_shape=latent_dim)

    # display(encoder.summary())
    # display(decoder.summary())
    model_outputs = decoder(encoder(encoder_inputs)[2])
    vae = tf.keras.Model(inputs=encoder_inputs, outputs=model_outputs, name='vae_model')
    reconstruction_loss = tf.reduce_mean(
        tf.reduce_sum(
            tf.keras.losses.binary_crossentropy(encoder_inputs, model_outputs),
            axis=(1, 2),
        )
    )
    kl_loss = -0.5 * tf.keras.backend.sum(
        1 + log_var - tf.keras.backend.square(mean) - tf.keras.backend.exp(log_var),
        axis=1,
    )
    vae_loss = tf.keras.backend.mean(reconstruction_loss + kl_loss)
    vae.add_loss(vae_loss)
    vae.compile(optimizer=tf.keras.optimizers.Adam())
    return vae

## Helper

In [None]:
def plot_result(history, epochs):
    loss = history.history['loss']
    epochs_range = range(epochs)

    fig, ax = plt.subplots(figsize=(10, 4))
    ax.plot(epochs_range, loss, label='Training Loss')
    ax.legend(loc='upper right')
    ax.set_xlabel('Epochs')
    ax.set_ylabel('Reconstruction + KL Loss')
    ax.set_title('Training and Validation Loss')
    plt.show()

In [None]:
def test_cvae(model, image_generator):
    images = next(iter(image_generator))[0]
    for image in images:
        fig, ax = plt.subplots(1, 2, figsize=(10, 5))
        img_array = tf.keras.utils.img_to_array(image)
        img_array = img_array.astype('float32')
        img_array = tf.expand_dims(img_array, 0)
        reconstructed_image = model.predict(img_array, steps=1)[0]
        ax[0].imshow(image)
        ax[1].imshow(reconstructed_image)
        plt.show()

## Preprocessing

In [None]:
DATASET_DIR = '/content/drive/MyDrive/land_cover_datasets/2750/'
DATASET_DIR_DEPTH = '/content/drive/MyDrive/land_cover_datasets/2750/*/*'
IMAGE_SIZE = 64
BATCH_SIZE = 64
EPOCHS = 20
INPUT_SHAPE = (IMAGE_SIZE, IMAGE_SIZE, 3)
LATENT_DIM = 1000

In [None]:
def get_class(path):
    return path.split('/', 6)[-1]

def get_id(path):
    return path.split('/')[6]

image_df = pd.DataFrame(glob.glob(DATASET_DIR_DEPTH), columns=['path'])
image_df['id'] = image_df['path'].apply(get_class)
image_df['class'] = image_df['path'].apply(get_id)
display(image_df.head())

## Training

In [None]:
def anomaly_detection(image_df, class_label):

    print('\nCreating model...')
    model = create_model(input_shape=INPUT_SHAPE, latent_dim=LATENT_DIM)

    subset_df = image_df[image_df['class'] == class_label]

    image_gen = tf.keras.preprocessing.image.ImageDataGenerator(
        horizontal_flip=True, vertical_flip=True, rotation_range=0.2, rescale=1.0 / 255
    )

    print('Processing image...')
    image_generator = image_gen.flow_from_dataframe(
        dataframe=subset_df,
        directory=DATASET_DIR,
        x_col='id',
        y_col='class',
        target_size=(IMAGE_SIZE, IMAGE_SIZE),
        class_mode='input',
        batch_size=BATCH_SIZE,
        seed=7,
        shuffle=False,
    )

    print('Training model...')
    history = model.fit(image_generator, epochs=EPOCHS, verbose=2)

    print('PLotting result...')
    plot_result(history=history, epochs=EPOCHS)

    total_step = len(image_generator)
    res = model.predict(image_generator)
    upb = np.median(res) + np.std(res)  # np.percentile(a=res, q=90)
    lwb = np.median(res) - np.std(res)  # np.percentile(a=res, q=10)

    print('Anomaly detecting...')
    for steps, images in enumerate(image_generator):
        images = images[0]

        idx = (image_generator.batch_index - 1) * image_generator.batch_size
        current_batch_file_list = image_generator.filenames[
            idx : idx + image_generator.batch_size
        ]

        if len(current_batch_file_list) != 0:
            outliers_image_file_list = list()

            for idx, image in enumerate(images):
                # fig, ax = plt.subplots(figsize=(10,5))
                img_array = tf.keras.utils.img_to_array(image)
                img_array = img_array.astype('float32')
                img_array = tf.expand_dims(img_array, 0)
                boundary = model.predict(img_array)
                if not lwb <= np.median(boundary) <= upb:
                    outliers_image_file_list.append(current_batch_file_list[idx])
                # ax.imshow(img_array[0])
                # ax.set_title(f'{current_batch_file_list[idx]}')
                # plt.show()

        if steps > total_step:
            break

    return outliers_image_file_list

In [None]:
anomaly_list = list()

for class_label in image_df['class'].unique():
    anomaly_list.extend(anomaly_detection(image_df=image_df, class_label=class_label))

In [None]:
ncols = 5
nrows = int(len(anomaly_list) / ncols)
axes = []
fig = plt.figure(figsize=(50, 50))

for i in range(nrows * ncols):
    axes.append(fig.add_subplot(nrows, ncols, i + 1))
    axes[-1].set_title(f'{str(anomaly_list[i])}')
    img_array = tf.keras.preprocessing.image.load_img(DATASET_DIR + anomaly_list[i])
    img_array = tf.keras.utils.img_to_array(img_array)
    img_array = img_array.astype('float32')
    plt.imshow(img_array.astype(np.uint8))
    plt.axis('off')
plt.show()

In [None]:
anomaly_path = os.getcwd() + "/anomaly_/"

if not os.path.exists(anomaly_path):
    os.makedirs(anomaly_path)

try:
    for file in anomaly_list:
        from_ = DATASET_DIR + file
        to_ = anomaly_path + file.split('/')[-1]
        os.replace(from_, to_)
except:
    print('File not found or moved previously')

# Classification

## Multi Convolutional Neural Network(MCNN) Model Building Block

In [None]:
def augmentation_layer(x):
    x = tf.keras.layers.RandomFlip('horizontal')(x)
    x = tf.keras.layers.RandomRotation(0.1)(x)
    x = tf.keras.layers.RandomZoom(0.1)(x)
    return x


def mlp(x, filters, idx):
    x = tf.keras.layers.Conv2D(filters=filters, kernel_size=3, padding='same', name=f'conv_layer_{idx}')(x)
    x = tf.keras.layers.Activation('relu', name=f'relu_{idx}')(x)
    # x = tf.keras.layers.BatchNormalization(name=f'normalization_layer_{idx}')(x)
    x = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), padding='valid', name=f'pooling_layer_{idx}')(x)
    x = tf.keras.layers.Dropout(0.2, name=f'dropout_layer_{idx}')(x)
    return x


def create_mcnn_model(input_shape, num_classes, augmentation):
    inputs = tf.keras.Input(shape=input_shape, name='input_layer')
    if augmentation:
        x = augmentation_layer(inputs)
        x = tf.keras.layers.Rescaling(scale=1.0 / 255, name='rescaling_layer')(x)
    else:
        x = tf.keras.layers.Rescaling(scale=1.0 / 255, name='rescaling_layer')(inputs)
    for idx, f in enumerate([16, 32, 64, 64]):
        x = mlp(x, f, idx + 1)
    x = tf.keras.layers.Flatten(name='flatten_layer')(x)
    x = tf.keras.layers.Dense(units=128, activation='relu', name='dense_layer')(x)
    outputs = tf.keras.layers.Dense(units=num_classes, name='output_layer')(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs, name='mcnn_model')
    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['accuracy'],
    )
    return model


def train_mcnn_model(input_shape, num_classes, augmentation, train_ds, val_ds, epochs):

    model = create_mcnn_model(
        input_shape=input_shape, num_classes=num_classes, augmentation=augmentation
    )
    
    def scheduler(epoch, lr):
        if epoch < tf.math.ceil(epochs/2):
            return lr
        else:
            return lr * tf.math.exp(-0.1)
    lr_callback_ = tf.keras.callbacks.LearningRateScheduler(scheduler)

    early_stopping_ = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', min_delta=0.001, patience=tf.math.ceil(epochs*0.5), restore_best_weights=True
    )

    checkpoint_filepath = "/MCNN_checkpoint/cp-{epoch:04d}.ckpt"
    model_checkpoint_ = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_filepath, save_weights_only=True, monitor='val_accuracy'
    )
    
    all_callbacks = [lr_callback_, early_stopping_, model_checkpoint_]

    model_history = model.fit(
        train_ds, validation_data=val_ds, epochs=epochs, callbacks=all_callbacks
    )

    model.save("/MCNN_model/MCNN_model.h5", save_format='h5')

    return model, model_history

## Transfer Learning pretrained model(TLPM) Model Building Block

In [None]:
def create_tlpm_model(input_shape, num_classes):
    base_model = tf.keras.applications.NASNetMobile(input_shape=input_shape, include_top=False, weights='imagenet')

    fine_tune_threshold = int(len(base_model.layers) * 0.8)
    for layer in base_model.layers[:fine_tune_threshold]:
        layer.trainable = False

    inputs = tf.keras.Input(shape=input_shape, name='input_layer')
    x = tf.keras.applications.nasnet.preprocess_input(inputs)
    x = base_model(x, training=False)
    x = tf.keras.layers.GlobalAveragePooling2D(name='global_avg_pool_layer')(x)
    x = tf.keras.layers.Dropout(rate=0.2, name='dropout_layer')(x)
    outputs = tf.keras.layers.Dense(units=num_classes, name='dense_layer')(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs, name='tl_model')
    base_learning_rate = 0.0001
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['accuracy'],
    )
    return model


def train_tlpm_model(input_shape, num_classes, train_ds, val_ds, epochs):

    model = create_tlpm_model(input_shape=input_shape, num_classes=num_classes)

    early_stopping_ = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        min_delta=0.01,
        patience=epochs // 3,
        restore_best_weights=True,
    )

    checkpoint_filepath = "/TLPM_checkpoint/cp-{epoch:04d}.ckpt"
    model_checkpoint_ = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_filepath, save_weights_only=True, monitor='val_accuracy'
    )
    all_callbacks = [early_stopping_, model_checkpoint_]

    model_history = model.fit(
        train_ds, validation_data=val_ds, epochs=epochs, callbacks=all_callbacks
    )

    model.save("/TLPM_model/TLPM_model.h5", save_format='h5')

    return model, model_history

## Helper

In [None]:
def show_samples(ds):
    plt.figure(figsize=(10, 10))
    for images, labels in ds.take(1):
        for i in range(9):
            ax = plt.subplot(3, 3, i + 1)
            plt.imshow(images[i].numpy().astype('uint8'))
            plt.title(CLASS_NAMES[labels[i]])
            plt.axis('off')

In [None]:
def show_predict(model, ds, return_labels=False):
    actual_label_list, predict_label_list = [], []
    for images, labels in ds.take(1):
        predicted = model.predict(images)
        for i in range(BATCH_SIZE):
            actual = labels[i].numpy()
            actual_label_list.append(actual)
            predict = predicted[i].argmax()
            predict_label_list.append(predict)
            print(f'Actual: {CLASS_NAMES[actual]}')
            print(f'Predicted: {CLASS_NAMES[predict]}')
            plt.imshow(images[i].numpy().astype('uint8'))
            plt.axis('off')
            plt.show()
    if return_labels:
        return actual_label_list, predict_label_list

In [None]:
def plot_results(history, epochs, title):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs_range = range(epochs)

    fig, ax = plt.subplots(1, 2, figsize=(10, 4))
    ax[0].plot(epochs_range, acc, label='Training Accuracy')
    ax[0].plot(epochs_range, val_acc, label='Validation Accuracy')
    ax[0].legend(loc='lower right')
    ax[0].set_xlabel('Epochs')
    ax[0].set_title('Training and Validation Accuracy')
    ax[1].plot(epochs_range, loss, label='Training Loss')
    ax[1].plot(epochs_range, val_loss, label='Validation Loss')
    ax[1].legend(loc='upper right')
    ax[1].set_xlabel('Epochs')
    ax[1].set_title(f'Training and Validation Loss_{title}')
    plt.show()

## Preprocessing

In [None]:
# Option 1: Tf.data.dataset, Memory leak possible
def loading_data_1(
    dataset_dir, total_size, train_size, image_size, batch_size, class_names
):

    list_ds = tf.data.Dataset.list_files(dataset_dir, shuffle=False)
    list_ds = list_ds.shuffle(total_size, reshuffle_each_iteration=False)

    train_ds = list_ds.take(train_size)
    val_ds = list_ds.skip(train_size)

    def parse_image(image_path):
        image = tf.io.read_file(image_path)
        image = tf.io.decode_jpeg(image)
        image = tf.image.resize(image, [image_size, image_size])
        label = tf.strings.split(image_path, os.sep)
        one_hot = label[-2] == class_names
        return image, tf.argmax(one_hot)

    train_ds = train_ds.map(parse_image, num_parallel_calls=AUTOTUNE)
    val_ds = val_ds.map(parse_image, num_parallel_calls=AUTOTUNE)

    def configure_for_performance(ds):
        ds = ds.cache()
        ds = ds.shuffle(buffer_size=len(ds))
        ds = ds.batch(batch_size)
        ds = ds.prefetch(buffer_size=AUTOTUNE)
        return ds

    train_ds = configure_for_performance(train_ds)
    val_ds = configure_for_performance(val_ds)

    print(f"Total size: {len(list_ds)}")
    print(f"Train size: {len(train_ds)*batch_size}")
    print(f"Validation size: {len(val_ds)*batch_size}")

    return train_ds, val_ds


# Option 2: Tf.keras.utils
def loading_data_2(dataset_dir, batch_size, image_size):
    train_ds = tf.keras.utils.image_dataset_from_directory(
        dataset_dir,
        validation_split=0.2,
        subset="training",
        shuffle=True,
        seed=7,
        batch_size=batch_size,
        image_size=(image_size, image_size),
    )

    val_ds = tf.keras.utils.image_dataset_from_directory(
        dataset_dir,
        validation_split=0.2,
        subset="validation",
        shuffle=True,
        seed=7,
        batch_size=batch_size,
        image_size=(IMAGE_SIZE, IMAGE_SIZE),
    )

    class_names = train_ds.class_names
    train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
    val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)

    return train_ds, val_ds, class_names

In [None]:
AUTOTUNE = tf.data.AUTOTUNE
IMAGE_SIZE = 224
BATCH_SIZE = 64

loading_data_option = 2

if loading_data_option == 1:
    DATASET_DIR = "/content/drive/MyDrive/land_cover_datasets/2750/*/*"
    CLASS_DIR = "/content/drive/MyDrive/land_cover_datasets/2750/"
    TOTAL_SIZE = len(glob.glob(DATASET_DIR))
    TRAIN_SIZE = int(TOTAL_SIZE * 0.8)
    CLASS_NAMES = os.listdir(CLASS_DIR)
    train_ds, val_ds = loading_data_1(
        dataset_dir=DATASET_DIR,
        total_size=TOTAL_SIZE,
        train_size=TRAIN_SIZE,
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        class_names=CLASS_NAMES,
    )
elif loading_data_option == 2:
    DATASET_DIR = "/content/drive/MyDrive/land_cover_datasets/2750/"
    train_ds, val_ds, CLASS_NAMES = loading_data_2(
        dataset_dir=DATASET_DIR, batch_size=BATCH_SIZE, image_size=IMAGE_SIZE
    )

## Training

In [None]:
INPUT_SHAPE = (IMAGE_SIZE, IMAGE_SIZE, 3)
NUM_CLASSES = 10
EPOCHS = 10

In [None]:
MCNN, MCNN_history = train_mcnn_model(
    input_shape=INPUT_SHAPE,
    num_classes=NUM_CLASSES,
    augmentation=True,
    train_ds=train_ds,
    val_ds=val_ds,
    epochs=EPOCHS,
)

plot_results(
    history=MCNN_history, epochs=EPOCHS, title='Multi Convolutional Neural Network'
)

In [None]:
TLPM, TLPM_history = train_tlpm_model(
    input_shape=INPUT_SHAPE,
    num_classes=NUM_CLASSES,
    train_ds=train_ds,
    val_ds=val_ds,
    epochs=EPOCHS,
)

plot_results(
    history=TLPM_history, epochs=EPOCHS, title='Transfer Learning with Pretrained Model'
)

## Predicting

In [None]:
loaded_MCNN = tf.keras.models.load_model("/MCNN_model/MCNN_model.h5")
loaded_TLPM = tf.keras.models.load_model("/TLPM_model/TLPM_model.h5")

In [None]:
actual_label_list, predict_label_list = show_predict(
    model=loaded_MCNN, ds=val_ds, return_labels=True
)
tf.math.confusion_matrix(actual_label_list, predict_label_list, num_classes=NUM_CLASSES)

In [None]:
actual_label_list, predict_label_list = show_predict(
    model=loaded_TLPM, ds=val_ds, return_labels=True
)
tf.math.confusion_matrix(actual_label_list, predict_label_list, num_classes=NUM_CLASSES)

# Citation<a id='citation'></a>

@article{helber2019eurosat,<br>

- title={Eurosat: A novel dataset and deep learning benchmark for land use and land cover classification},<br>
- author={Helber, Patrick and Bischke, Benjamin and Dengel, Andreas and Borth, Damian},<br>
- journal={IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing},<br>
- year={2019},<br>
- publisher={IEEE}<br>
  }<br>