In [30]:
import numpy as np
import matplotlib.pyplot as plt
import xarray as rx
import rioxarray as rix
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard


In [31]:
# JUST FOR PLOTTING
def norm_diff(x,y):
    return (x-y)/(x+y)

child = "S2A_MSIL2A_20170701T093031_19_10"

def plot_folder(folder,rgb=True):
    parent = "data/"
    if rgb:
        plt.figure(figsize=(6, 6))
        blue = rix.open_rasterio(parent+f"{folder}/{folder}_B02.tif")
        green = rix.open_rasterio(parent+f"{folder}/{folder}_B03.tif")
        red = rix.open_rasterio(parent+f"{folder}/{folder}_B04.tif")
        print(blue.attrs)
        rgb_img = rx.concat([red, green, blue], dim='band')
        # rgb_img = (red+green+blue).squeeze()
        rgb_img.plot.imshow(robust=True)
        plt.title("RGB image")
        plt.show()

    elif not rgb:
        plt.figure(figsize=(6, 6))
        nir = rix.open_rasterio(parent+f"{folder}/{folder}_B8A.tif")
        swir = rix.open_rasterio(parent+f"{folder}/{folder}_B11.tif")
        avg_ndmi = np.mean(norm_diff(nir.values,swir.values).squeeze())
        ndmi_img = norm_diff(nir,swir).squeeze()
        print(avg_ndmi)
        # plt.imshow(ndmi)
        # plt.colorbar(label='Pixel Values')
        # plt.title('GeoTIFF Image')
        # plt.show()
        ndmi_img.plot.imshow(robust=True)
        plt.title(f"NDMI image. Average NDMI:{avg_ndmi:.2f}")
        plt.show()

# plot_folder(child,rgb=True)
# plot_folder(child,rgb=False)


In [32]:
import os

def list_folders(directory):
    folders = np.array([d for d in os.listdir(directory) if os.path.isdir(os.path.join(directory, d))])
    return folders
all_folders = list_folders("data/")
print(all_folders)
print(len(all_folders))


['S2A_MSIL2A_20170701T093031_25_48' 'S2A_MSIL2A_20170701T093031_16_58'
 'S2A_MSIL2A_20170701T093031_78_27' ... 'S2B_MSIL2A_20170801T095029_11_32'
 'S2B_MSIL2A_20170906T101020_78_48' 'S2B_MSIL2A_20170802T092029_10_37']
7343


In [33]:

# Read GeoTIFF file
def norm_diff(x,y):
    try:
        return (x-y)/(x+y)
    except Exception as e:
        print(e)
        return None

# testchild = "S2A_MSIL2A_20171101T094131_1_38"


def get_input_label(folder):
    try:
        base_path=f"data/{folder}/{folder}"

        blue = rix.open_rasterio(f"{base_path}_B02.tif")
        green = rix.open_rasterio(f"{base_path}_B03.tif")
        red = rix.open_rasterio(f"{base_path}_B04.tif")     

        rgb = np.concatenate([red.values,green.values,blue.values], axis=0)

        # print(rgb)
        nir = rix.open_rasterio(f"{base_path}_B8A.tif")
        swir = rix.open_rasterio(f"{base_path}_B11.tif")

        ndmi = norm_diff(nir.values.squeeze(),swir.values.squeeze())
        label = np.mean(ndmi)
        
        return rgb, label
    except Exception as e:
        print(e)
        return None, None

In [34]:
folders = all_folders.copy()

input = np.zeros((len(folders), 3, 120, 120))
labels = np.zeros((len(folders), 1))

for j, folder in enumerate(folders):
    rgb, label = get_input_label(folder)
    # print(label)
    input[j] = rgb
    labels[j] = label

data/S2B_MSIL2A_20170802T092029_18_24/S2B_MSIL2A_20170802T092029_18_24_B03.tif: No such file or directory


  return (x-y)/(x+y)


data/S2A_MSIL2A_20170613T101031_24_59/S2A_MSIL2A_20170613T101031_24_59_B02.tif: No such file or directory


  return (x-y)/(x+y)
  return (x-y)/(x+y)
  return (x-y)/(x+y)


data/S2A_MSIL2A_20170701T093031_86_9/S2A_MSIL2A_20170701T093031_86_9_B03.tif: No such file or directory


In [35]:
# remove rows from label and input where label is nan or invalid (>1 or <-1)
faulty_row = (np.isnan(labels) | (np.abs(labels)>1)).squeeze()
print(folders.shape)
labels_clean = labels[~faulty_row]
input_clean = input[~faulty_row]
folders_clean = folders[~faulty_row]
# labels = np.clip(labels, -1, 1)
print(len(labels))
print(len(labels_clean))
print(len(input_clean))
# print(input[:3])
# print(labels[:3])
# print(input.shape)
# np.save("arrays/clean_folders.npy", folders_clean,)
# np.save("arrays/clean_labels.npy", labels_clean)
# np.save("arrays/clean_input.npy", input_clean)

(7343,)
7343
2881
2881


In [36]:

# Convert labels to categorical
labels_categorical = tf.keras.utils.to_categorical(labels_clean, num_classes=10)

# Print the shape of the categorical labels
print(labels_categorical.shape)


(2881, 10)


In [37]:
# start constructing network for RGB dimension (120,120,3)
input_clean = input_clean.reshape(len(input_clean),120,120,3)
input_shape = input_clean.shape

# Example parameters, replace with your actual values
# img_placeholder = tf.placeholder(tf.float32, input_shape)
num_classes = 10  # Replace with your actual number of classes
is_training = True  # Set to False for inference
prediction_threshold = 0.5  # Set the threshold based on your needs

# pre-trained network: pt
input_t = tf.keras.Input(shape=(120,120,3))
model_res = tf.keras.applications.resnet50.ResNet50(
    include_top=False,
    weights='imagenet',
    input_tensor=input_t,
    input_shape=(120,120,3),
    pooling=None,
    classes=10,
)

# pretrained model
model = tf.keras.Sequential()
# model.add(tf.keras.layers.Lambda(lambda image: tf.image.resize(image, (224, 224))))
model.add(model_res)
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.BatchNormalization())
# I would not put a too complicated network on top to prevent overfitting
# model.add(tf.keras.layers.Dense(256, activation="relu"))
# model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(128, activation="relu"))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(64, activation="relu"))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(10, activation="softmax"))


In [39]:
log_dir = "logs/pt_fit/"  # Choose a suitable directory
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
# callback = tf.keras.callbacks.ModelCheckpoint(filepath='beNet50.h5', monitor='acc', mode="max", save_best_only=True)
checkpoint_path = "weights/pt_weights.best.h5"
# default metric to compare whether it's "best" is loss
model_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_best_only=True, save_weights_only=True)

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(input_clean, labels_categorical, epochs=1, batch_size=32, verbose=1, validation_split=0.2, callbacks=[tensorboard_callback,model_callback])



In [45]:
# Example parameters, replace with your actual values
# img_placeholder = tf.placeholder(tf.float32, input_shape)
num_classes = 10  # Replace with your actual number of classes
is_training = True  # Set to False for inference
prediction_threshold = 0.5  # Set the threshold based on your needs

# same structure network but not pre-trained (initialised): init
input_t = tf.keras.Input(shape=(120,120,3))
model_res = tf.keras.applications.resnet50.ResNet50(
    include_top=False,
    weights=None,
    input_tensor=input_t,
    input_shape=(120,120,3),
    pooling=None,
    classes=10,
)

# pretrained model
model = tf.keras.Sequential()
# model.add(tf.keras.layers.Lambda(lambda image: tf.image.resize(image, (224, 224))))
model.add(model_res)
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(256, activation="relu"))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(128, activation="relu"))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(64, activation="relu"))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(10, activation="softmax"))
# model.summary()

In [46]:
log_dir = "logs/init_fit/"  # Choose a suitable directory
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
# callback = tf.keras.callbacks.ModelCheckpoint(filepath='beNet50.h5', monitor='acc', mode="max", save_best_only=True)
checkpoint_path = "weights/init_weights.best.h5"
# default metric to compare whether it's "best" is loss
model_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_best_only=True, save_weights_only=True)

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(input_clean, labels_categorical, epochs=10, batch_size=32, verbose=1, validation_split=0.2, callbacks=[tensorboard_callback,model_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10

In [None]:
# Example parameters, replace with your actual values
# img_placeholder = tf.placeholder(tf.float32, input_shape)
num_classes = 10  # Replace with your actual number of classes
is_training = False  # Set to False for inference
prediction_threshold = 0.5  # Set the threshold based on your needs

# pre-trained network with frozen ResNet weights (freeze): fr
input_t = tf.keras.Input(shape=(120,120,3))
model_res = tf.keras.applications.resnet50.ResNet50(
    include_top=False,
    weights='imagenet',
    input_tensor=input_t,
    input_shape=(120,120,3),
    pooling=None,
    classes=10,
)
optional for not training the whole model
for layer in model_res.layers:
    layer.trainable = False

# pretrained model
model = tf.keras.Sequential()
# model.add(tf.keras.layers.Lambda(lambda image: tf.image.resize(image, (224, 224))))
model.add(model_res)
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(256, activation="relu"))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(128, activation="relu"))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(64, activation="relu"))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(10, activation="softmax"))
# model.summary()

In [None]:
log_dir = "logs/fr_fit/"  # Choose a suitable directory
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
# callback = tf.keras.callbacks.ModelCheckpoint(filepath='beNet50.h5', monitor='acc', mode="max", save_best_only=True)
checkpoint_path = "weights/fr_weights.best.h5"
# default metric to compare whether it's "best" is loss
model_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_best_only=True, save_weights_only=True)

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(input_clean, labels_categorical, epochs=10, batch_size=32, verbose=1, validation_split=0.2, callbacks=[tensorboard_callback,model_callback])