# ___

# [ Demo THR vs. ML for image classification]

**Department of Applied Geoinformatics and Carthography, Charles University** 

*Lukas Brodsky lukas.brodsky@natur.cuni.cz*


___

### Task Statement

This notebook demonstrates the use of both a simple **Thresholding** approach and a **Machine Learningâ€“based classification model** to segment supraglacial lakes from Sentinel-2 satellite imagery acquired over Greenland.

### Problem Type

Supervised classification (binary image segmentation)

In [1]:
# packages imports 
import os
import numpy as np
import rasterio 
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt
%matplotlib inline 

In [2]:
os.getcwd()

'/Users/lukas/Work/prfuk/ownCloud/Vyuka/Machine_learning_geosciences/src/machine_learning_geosciences/01_Computing_environment'

In [None]:
# data 
path = '../data/lab1'
img_fn = os.path.join(path, 'S2_20220827T152809_T22WEC_20m_subset.tif')
labels_fn = os.path.join(path, 'sgl_s2_20220827_labels.tif')
labels2_fn = os.path.join(path, 'sgl_s2_20220827_labels2.tif')

In [None]:
os.listdir(path)

### Open the Sentinel-2 image and labels in QGIS and explore the data! 

### Open the same data in Python and explore! 

In [None]:

# open datasets
with rasterio.open(img_fn) as img_ds:
    # rasterio reads as (bands, rows, cols)
    image = img_ds.read()                      # shape: (B, H, W)
    image = np.transpose(image, (1, 2, 0))     # -> (H, W, B)

with rasterio.open(labels_fn) as labels_ds:
    labels = labels_ds.read(1).astype(np.uint8)

with rasterio.open(labels2_fn) as labels2_ds:
    labels2 = labels2_ds.read(1).astype(np.uint8)


In [None]:
# input image
image.shape

In [None]:
# reference labels (training data)
labels.shape

In [None]:
# 1 ... supraglacial lake, 2 ... ice 
np.unique(labels2)

In [None]:
# cmap='Spectral'
plt.imshow(image[50:200, 100:250, 0], cmap='gray', interpolation='nearest')
plt.colorbar()

In [None]:
plt.imshow(labels[50:200, 100:250], cmap='Blues', interpolation='nearest')
plt.colorbar()

### Model 1: THR 

In [None]:
# define model 1 

def model1(img, bnd, thr): 
    cls = (img[:, :, bnd] < thr) * 1
    return cls

### Select your own threshold value and apply! 

In [None]:
# select band and boundary value from image 
treshold = 5000 # 7217 # 6634 # 7517 # 4426
# classify based on model 
SGL1 = model1(image, 0, treshold)

In [None]:
# Plot the result on small sample 
plt.imshow(SGL1[50:200, 100:250], cmap='Blues')
plt.colorbar()

In [None]:
# Plot full extend 
plt.imshow(SGL1, cmap='Blues')
plt.colorbar()

### Accuracy assesment (based on reference labels)

In [None]:
acc = accuracy_score(labels, SGL1)
print('Model1 accuracy is: {:.2f}%'.format(acc * 100))

## Model 2: Random Forest (Machine Learning) 

In [None]:
np.unique(labels2[labels2>0])

In [None]:
labels_map = np.unique(labels2[labels2 > 0])
print('The training data include {n} classes: {classes}'.format(n=labels_map.size, 
                                                                classes=labels_map))

In [None]:
n_samples = (labels2 > 0).sum()
print("Numnber of samples: {}".format(n_samples))

In [None]:
X = image[labels2 > 0, :]
y = labels2[labels2 > 0]

In [None]:
clf = RandomForestClassifier(max_depth=20, oob_score=True, random_state=0)
clf.fit(X, y)

In [None]:
print('Our OOB prediction of accuracy is: {oob:.2f}%'.format(oob=clf.oob_score_ * 100))

In [None]:
cls = clf.predict(X)

In [None]:
new_shape = (image.shape[0] * image.shape[1], image.shape[2])

img_as_array = image[:, :, :].reshape(new_shape)
# print('Reshaped from {o} to {n}'.format(o=image.shape,
#                                         n=img_as_array.shape))

# Now predict for each pixel
class_prediction = clf.predict(img_as_array)

# Reshape our classification map
class_prediction = class_prediction.reshape(image[:, :, 0].shape)

In [None]:
# define model 2
roi = np.ones((image.shape[1], image.shape[2])) 

def model2(img, lbl): 
    clf = RandomForestClassifier(max_depth=20, oob_score=True, random_state=0)
    X = img[lbl > 0, :]
    y = lbl[lbl > 0]
    clf.fit(X, y)
    print('Model2 accuracy is: {oob:.2f}%'.format(oob=clf.oob_score_ * 100))
    new_shape = (img.shape[0] * img.shape[1], img.shape[2])
    img_as_array = img[:, :, :].reshape(new_shape)
    cls = clf.predict(img_as_array)
    cls_pred = cls.reshape(img[:, :, 0].shape)
        
    return cls 


In [None]:
SGL2 = model2(image, labels2)

### What makes the difference?

In [None]:
# --- Feature space & decision boundary visualization ---
# prepare dataset (use labelled pixels)
mask = labels2 > 0
X = image[mask][:, :2]   # use first two bands for visualization
y = labels2[mask]

# train RF again for visualization
rf = RandomForestClassifier(max_depth=20, random_state=0)
rf.fit(X, y)

# simple threshold model (on band 0)
threshold = X[:,0].mean()

In [None]:
# create meshgrid
x_min, x_max = X[:,0].min(), X[:,0].max()
y_min, y_max = X[:,1].min(), X[:,1].max()
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),
                     np.linspace(y_min, y_max, 200))

grid = np.c_[xx.ravel(), yy.ravel()]

# RF decision
rf_pred = rf.predict(grid).reshape(xx.shape)

# threshold decision
thr_pred = (xx > threshold).astype(int)

In [None]:
# plot feature space
plt.figure()
plt.scatter(X[y==1,0], X[y==1,1], s=5, alpha=0.5, label='water')
plt.scatter(X[y==2,0], X[y==2,1], s=5, alpha=0.5, label='ice')
plt.title("Feature space (ice vs lake)")
plt.xlabel("Band 1")
plt.ylabel("Band 2")
plt.legend()
plt.show()

# RF boundary
plt.figure()
plt.contourf(xx, yy, rf_pred, alpha=0.3)
plt.scatter(X[y==1,0], X[y==1,1], s=5, alpha=0.01)
plt.scatter(X[y==2,0], X[y==2,1], s=5, alpha=0.01)
plt.title("Random Forest decision boundary")
plt.xlabel("Band 1")
plt.ylabel("Band 2")
plt.show()

# Threshold boundary
plt.figure()
plt.contourf(xx, yy, thr_pred, alpha=0.3)
plt.scatter(X[y==1,0], X[y==1,1], s=5, alpha=0.01)
plt.scatter(X[y==2,0], X[y==2,1], s=5, alpha=0.01)
plt.title("Threshold model decision boundary")
plt.xlabel("Band 1")
plt.ylabel("Band 2")
plt.show()
