In [1]:
import numpy as np
import random
import pandas as pd
import os 
import geopandas as gpd 
import rasterio as rio
from msmla50 import MSMLA50
import utils
import cnn_utils
import gc
import torch
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score, accuracy_score
from sklearn.ensemble import RandomForestClassifier
import pickle

torch.manual_seed(0)
np.random.seed(0)
torch.cuda.manual_seed(0)
random.seed(0)
torch.cuda.manual_seed_all(0)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms(True, warn_only=True)
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
np.random.default_rng(seed=0)

Generator(PCG64) at 0x272CA67ECE0

In [2]:
# settings
patch_size = 32
gt_stride = 32
stride = 10
background_label = 0
batch_size_emb = 1
offset_left = 'best'
offset_top = 'best'

# Berlin

In [2]:
# load splited reference data
splited_ref_data = gpd.read_file(r'ref_data\berlin_ref_splitS2S3S4.gpkg')

In [4]:
# load satellite image (10 m resolution)
image = r'imagery\berlin_20170519.tif'

In [5]:
# load rasterized subset of morphometrics from non-weighted RF S1 models and weighted RF S1 models (10 m resolution)
non_weighted_set = r'rasterized_morphometrics\berlin_rasterized_morphometrics_fold3.tif'
weighted_set = r'rasterized_morphometrics\berlin_rasterized_morphometrics_fold3_weighted.tif'

In [6]:
# load trained S2 CNN models for each fold
cnn_fold0 = r's2_cnn_models\berlin_S2_fold0_epoch12.pth'
cnn_fold1 = r's2_cnn_models\berlin_S2_fold1_epoch16.pth'
cnn_fold2 = r's2_cnn_models\berlin_S2_fold2_epoch22.pth'
cnn_fold3 = r's2_cnn_models\berlin_S2_fold3_epoch39.pth'
cnn_fold4 = r's2_cnn_models\berlin_S2_fold4_epoch32.pth'

In [7]:
# recording results
setups = ["non_weighted_set", "weighted_set"]
strategies = ["non_weighted_model", "weighted_model"]
folds = [0, 1, 2, 3, 4]

# results[setup][strategy][fold]
results = {
    setup: {
        strat: {
            fold: {} for fold in folds
        } for strat in strategies
    } for setup in setups
}

## Non-weighted RF S1 morphometric subset

### Fold 0

In [8]:
fold = 0
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [9]:
train_polygons_raster = r"berlin_train_f0.tif"
test_polygons_raster = r"berlin_test_f0.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [10]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [11]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold0)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 12
  Train Accuracy: 0.8106%
  Test Accuracy: 0.7647%
  Gap (Train - Test): 0.0459%


In [12]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1956
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [116  34 202 317 119  69 395  81  85 361  29 148]


In [13]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 425
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [27 19 40 72 34  6 93 17 13 71  7 26]


In [14]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [15]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1956, 640) (1956,)
(425, 640) (425,)


In [16]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path=non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1956
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [116  34 202 317 119  69 395  81  85 361  29 148]


In [17]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path=non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 425
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [27 19 40 72 34  6 93 17 13 71  7 26]


In [18]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1956, 740) (1956,)
(425, 740) (425,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=X_train.shape[1], class_weight=False)

In [29]:
param, train, test, diff

(3, 75.66, 70.82, 4.84)

In [19]:
model_fold0 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=3, n_estimators=100)
model_fold0.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,3
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [20]:
# training accuracy
pred_train = model_fold0.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  75.66
wF1 train:  70.58


In [21]:
# test accuracy
pred_test = model_fold0.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold0.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold0.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold0
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  70.82
wF1 test:  64.74
Urban wF1 test:  53.57
Natural wF1 test:  75.87


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=2, max_features=X_train.shape[1], class_weight=True)

In [37]:
param, train, test, diff

(98, 57.98, 54.59, 3.39)

In [22]:
model_fold0_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=2, max_features=98, class_weight='balanced', n_estimators=100)
model_fold0_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,2
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,98
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [23]:
# training accuracy
pred_train = model_fold0_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  57.98
wF1 train:  53.68


In [24]:
# test accuracy
pred_test = model_fold0_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold0_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold0_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold0_weighted
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  54.59
wF1 test:  51.11
Urban wF1 test:  48.64
Natural wF1 test:  54.12


### Fold 1

In [25]:
fold = 1
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [26]:
train_polygons_raster = r"berlin_train_f1.tif"
test_polygons_raster = r"berlin_test_f1.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [27]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [28]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold1)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 16
  Train Accuracy: 0.8276%
  Test Accuracy: 0.8040%
  Gap (Train - Test): 0.0236%


In [29]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1830
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [102  41 195 320 124  53 383  76  72 320  26 118]


In [30]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 551
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [ 41  12  47  69  29  22 105  22  26 112  10  56]


In [31]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [32]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1830, 640) (1830,)
(551, 640) (551,)


In [33]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1830
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [102  41 195 320 124  53 383  76  72 320  26 118]


In [34]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 551
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [ 41  12  47  69  29  22 105  22  26 112  10  56]


In [35]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1830, 740) (1830,)
(551, 740) (551,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=120, class_weight=False)

In [None]:
param, train, test, diff

In [36]:
model_fold1 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=12, n_estimators=100)
model_fold1.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,12
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [37]:
# training accuracy
pred_train = model_fold1.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  72.79
wF1 train:  65.26


In [38]:
# test accuracy
pred_test = model_fold1.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold1.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold1.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold1 
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  67.51
wF1 test:  59.19
Urban wF1 test:  40.26
Natural wF1 test:  72.12


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=120, class_weight=True)

In [64]:
param, train, test, diff

(33, 68.96, 64.07, 4.9)

In [39]:
model_fold1_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=33, class_weight='balanced', n_estimators=100)
model_fold1_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,33
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [40]:
# training accuracy
pred_train = model_fold1_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  68.96
wF1 train:  68.28


In [41]:
# test accuracy
pred_test = model_fold1_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold1_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold1_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold1_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  64.07
wF1 test:  64.67
Urban wF1 test:  65.94
Natural wF1 test:  64.1


### Fold 2

In [42]:
fold = 2
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [43]:
train_polygons_raster = r"berlin_train_f2.tif"
test_polygons_raster = r"berlin_test_f2.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [44]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [45]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold2)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 22
  Train Accuracy: 0.8202%
  Test Accuracy: 0.7962%
  Gap (Train - Test): 0.0240%


In [46]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1910
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [117  41 193 308 121  62 390  83  73 347  33 142]


In [47]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 471
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [26 12 49 81 32 13 98 15 25 85  3 32]


In [48]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [49]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1910, 640) (1910,)
(471, 640) (471,)


In [50]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1910
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [117  41 193 308 121  62 390  83  73 347  33 142]


In [51]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 471
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [26 12 49 81 32 13 98 15 25 85  3 32]


In [52]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1910, 740) (1910,)
(471, 740) (471,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=4, max_features=120, class_weight=False)

In [83]:
param, train, test, diff

(13, 83.77, 79.83, 3.94)

In [53]:
model_fold2 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=13, n_estimators=100)
model_fold2.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,13
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [54]:
# training accuracy
pred_train = model_fold2.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  83.77
wF1 train:  79.64


In [55]:
# test accuracy
pred_test = model_fold2.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold2.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold2.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold2 
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  79.83
wF1 test:  75.61
Urban wF1 test:  67.6
Natural wF1 test:  83.41


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=6, max_features=X_train.shape[1], class_weight=True)

In [90]:
param, train, test, diff

(29, 88.27, 84.08, 4.2)

In [56]:
model_fold2_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=6, max_features=29, class_weight='balanced', n_estimators=100)
model_fold2_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,6
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,29
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [57]:
# training accuracy
pred_train = model_fold2_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  88.27
wF1 train:  89.2


In [58]:
# test accuracy
pred_test = model_fold2_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold2_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold2_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold2_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  84.08
wF1 test:  83.91
Urban wF1 test:  74.54
Natural wF1 test:  92.14


### Fold 3

In [59]:
fold = 3
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [60]:
train_polygons_raster = r"berlin_train_f3.tif"
test_polygons_raster = r"berlin_test_f3.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [61]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [62]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold3)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 39
  Train Accuracy: 0.8942%
  Test Accuracy: 0.8623%
  Gap (Train - Test): 0.0319%


In [63]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1909
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [120  50 187 283 129  63 403  74  76 354  21 149]


In [64]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 472
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [ 23   3  55 106  24  12  85  24  22  78  15  25]


In [65]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [66]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1909, 640) (1909,)
(472, 640) (472,)


In [67]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path=non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1909
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [120  50 187 283 129  63 403  74  76 354  21 149]


In [68]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 472
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [ 23   3  55 106  24  12  85  24  22  78  15  25]


In [69]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1909, 740) (1909,)
(472, 740) (472,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=6, max_features=X_train.shape[1], class_weight=False)

In [None]:
param, train, test, diff

In [70]:
model_fold3 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=6, max_features=200, n_estimators=100)
model_fold3.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,6
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,200
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [71]:
# training accuracy
pred_train = model_fold3.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  90.26
wF1 train:  88.93


In [72]:
# test accuracy
pred_test = model_fold3.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold3.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold3.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold3 
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  85.38
wF1 test:  82.12
Urban wF1 test:  90.59
Natural wF1 test:  78.39


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=7, max_features=160, class_weight=True)

In [113]:
param, train, test, diff

(85, 90.1, 88.56, 1.54)

In [73]:
model_fold3_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=6, max_features=85, class_weight='balanced', n_estimators=100)
model_fold3_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,6
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,85
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [74]:
# training accuracy
pred_train = model_fold3_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  90.1
wF1 train:  90.65


In [75]:
# test accuracy
pred_test = model_fold3_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold3_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold3_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold3_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  88.56
wF1 test:  87.6
Urban wF1 test:  87.77
Natural wF1 test:  87.47


### Fold 4

In [76]:
fold = 4
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [77]:
train_polygons_raster = r"berlin_train_f4.tif"
test_polygons_raster = r"berlin_test_f4.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [78]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [79]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold4)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 32
  Train Accuracy: 0.8820%
  Test Accuracy: 0.8680%
  Gap (Train - Test): 0.0140%


In [80]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1919
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [117  46 191 328 119  53 381  78  86 346  35 139]


In [81]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 462
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [ 26   7  51  61  34  22 107  20  12  86   1  35]


In [82]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [83]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1919, 640) (1919,)
(462, 640) (462,)


In [84]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1919
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [117  46 191 328 119  53 381  78  86 346  35 139]


In [85]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 462
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [ 26   7  51  61  34  22 107  20  12  86   1  35]


In [86]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1919, 740) (1919,)
(462, 740) (462,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=5, max_features=X_train.shape[1], class_weight=False)

In [132]:
param, train, test, diff

(14, 90.05, 87.01, 3.03)

In [87]:
model_fold4 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=5, max_features=14, n_estimators=100)
model_fold4.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,5
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,14
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [88]:
# training accuracy
pred_train = model_fold4.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  90.05
wF1 train:  88.6


In [89]:
# test accuracy
pred_test = model_fold4.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold4.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold4.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold4 
}


results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  87.01
wF1 test:  85.48
Urban wF1 test:  77.74
Natural wF1 test:  92.15


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=4, max_features=X_train.shape[1], class_weight=True)

In [140]:
param, train, test, diff

(6, 82.39, 79.22, 3.17)

In [90]:
model_fold4_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=6, class_weight='balanced', n_estimators=100)
model_fold4_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,6
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [91]:
# training accuracy
pred_train = model_fold4_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  82.39
wF1 train:  81.08


In [92]:
# test accuracy
pred_test = model_fold4_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold4_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold4_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold4_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  79.22
wF1 test:  80.23
Urban wF1 test:  73.43
Natural wF1 test:  86.19


## Weighted RF S1 morphometric subset

### Fold 0

In [93]:
if os.path.isfile(r"berlin_train_f0.tif"):
    fold = 0
    train_polygons_raster = r'berlin_train_f0.tif'
    test_polygons_raster = r'berlin_test_f0.tif'
else:
    fold = 0
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"berlin_train_f0.tif"
    test_polygons_raster = r"berlin_test_f0.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [94]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold0)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 12
  Train Accuracy: 0.8106%
  Test Accuracy: 0.7647%
  Gap (Train - Test): 0.0459%


In [95]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1956
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [116  34 202 317 119  69 395  81  85 361  29 148]


In [96]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 425
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [27 19 40 72 34  6 93 17 13 71  7 26]


In [97]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [98]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1956, 640) (1956,)
(425, 640) (425,)


In [99]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1956
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [116  34 202 317 119  69 395  81  85 361  29 148]


In [100]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 425
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [27 19 40 72 34  6 93 17 13 71  7 26]


In [101]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1956, 740) (1956,)
(425, 740) (425,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=120, class_weight=False)

In [160]:
param, train, test, diff

(5, 72.75, 68.94, 3.81)

In [102]:
ws_model_fold0 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=5, n_estimators=100)
ws_model_fold0.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,5
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [103]:
# training accuracy
pred_train = ws_model_fold0.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  72.75
wF1 train:  66.09


In [104]:
# test accuracy
pred_test = ws_model_fold0.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold0.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold0.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold0 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  68.94
wF1 test:  61.6
Urban wF1 test:  46.32
Natural wF1 test:  76.09


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=2, max_features=X_train.shape[1], class_weight=True)

In [168]:
param, train, test, diff

(87, 67.59, 62.82, 4.76)

In [105]:
ws_model_fold0_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=2, max_features=87, class_weight='balanced', n_estimators=100)
ws_model_fold0_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,2
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,87
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [106]:
# training accuracy
pred_train = ws_model_fold0_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  67.59
wF1 train:  65.74


In [107]:
# test accuracy
pred_test = ws_model_fold0_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold0_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold0_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold0_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  62.82
wF1 test:  61.32
Urban wF1 test:  70.56
Natural wF1 test:  54.12


### Fold 1

In [108]:
if os.path.isfile(r"berlin_train_f1.tif"):
    fold = 1
    train_polygons_raster = r'berlin_train_f1.tif'
    test_polygons_raster = r'berlin_test_f1.tif'
else:
    fold = 1
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"berlin_train_f1.tif"
    test_polygons_raster = r"berlin_test_f1.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [109]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold1)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 16
  Train Accuracy: 0.8276%
  Test Accuracy: 0.8040%
  Gap (Train - Test): 0.0236%


In [110]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1830
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [102  41 195 320 124  53 383  76  72 320  26 118]


In [111]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 551
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [ 41  12  47  69  29  22 105  22  26 112  10  56]


In [112]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [113]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1830, 640) (1830,)
(551, 640) (551,)


In [114]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1830
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [102  41 195 320 124  53 383  76  72 320  26 118]


In [115]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 551
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [ 41  12  47  69  29  22 105  22  26 112  10  56]


In [116]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1830, 740) (1830,)
(551, 740) (551,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=X_train.shape[1], class_weight=False)

In [None]:
param, train, test, diff

In [117]:
ws_model_fold1 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=112, n_estimators=100)
ws_model_fold1.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,112
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [118]:
# training accuracy
pred_train = ws_model_fold1.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  71.97
wF1 train:  63.92


In [119]:
# test accuracy
pred_test = ws_model_fold1.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold1.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold1.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold1 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  66.61
wF1 test:  58.11
Urban wF1 test:  37.65
Natural wF1 test:  72.13


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=X_train.shape[1], class_weight=True)

In [190]:
param, train, test, diff

(23, 70.49, 65.52, 4.97)

In [120]:
ws_model_fold1_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=23, class_weight='balanced', n_estimators=100)
ws_model_fold1_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,23
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [121]:
# training accuracy
pred_train = ws_model_fold1_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  70.49
wF1 train:  70.38


In [122]:
# test accuracy
pred_test = ws_model_fold1_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold1_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold1_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold1_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  65.52
wF1 test:  66.52
Urban wF1 test:  62.92
Natural wF1 test:  69.38


### Fold 2

In [123]:
if os.path.isfile(r"berlin_train_f2.tif"):
    fold = 2
    train_polygons_raster = r'berlin_train_f2.tif'
    test_polygons_raster = r'berlin_test_f2.tif'
else:
    fold = 2
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"berlin_train_f2.tif"
    test_polygons_raster = r"berlin_test_f2.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [124]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold2)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 22
  Train Accuracy: 0.8202%
  Test Accuracy: 0.7962%
  Gap (Train - Test): 0.0240%


In [125]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1910
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [117  41 193 308 121  62 390  83  73 347  33 142]


In [126]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 471
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [26 12 49 81 32 13 98 15 25 85  3 32]


In [127]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [128]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1910, 640) (1910,)
(471, 640) (471,)


In [129]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1910
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [117  41 193 308 121  62 390  83  73 347  33 142]


In [130]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 471
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [26 12 49 81 32 13 98 15 25 85  3 32]


In [131]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1910, 740) (1910,)
(471, 740) (471,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=4, max_features=X_train.shape[1], class_weight=False)

In [205]:
param, train, test, diff

(6, 82.88, 78.77, 4.11)

In [132]:
ws_model_fold2 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=6, n_estimators=100)
ws_model_fold2.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,6
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [133]:
# training accuracy
pred_train = ws_model_fold2.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  82.88
wF1 train:  78.67


In [134]:
# test accuracy
pred_test = ws_model_fold2.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold2.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold2.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold2 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  78.77
wF1 test:  74.34
Urban wF1 test:  66.74
Natural wF1 test:  81.69


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=5, max_features=X_train.shape[1], class_weight=True)

In [211]:
param, train, test, diff

(53, 87.43, 83.44, 4.0)

In [135]:
ws_model_fold2_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=5, max_features=53, class_weight='balanced', n_estimators=100)
ws_model_fold2_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,5
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,53
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [136]:
# training accuracy
pred_train = ws_model_fold2_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  87.43
wF1 train:  87.93


In [137]:
# test accuracy
pred_test = ws_model_fold2_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold2_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold2_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold2_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  83.44
wF1 test:  82.34
Urban wF1 test:  75.03
Natural wF1 test:  89.04


### Fold 3

In [138]:
if os.path.isfile(r"berlin_train_f3.tif"):
    fold = 3
    train_polygons_raster = r'berlin_train_f3.tif'
    test_polygons_raster = r'berlin_test_f3.tif'
else:
    fold = 3
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"berlin_train_f3.tif"
    test_polygons_raster = r"berlin_test_f3.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [139]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold3)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 39
  Train Accuracy: 0.8942%
  Test Accuracy: 0.8623%
  Gap (Train - Test): 0.0319%


In [140]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1909
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [120  50 187 283 129  63 403  74  76 354  21 149]


In [141]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 472
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [ 23   3  55 106  24  12  85  24  22  78  15  25]


In [142]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [143]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1909, 640) (1909,)
(472, 640) (472,)


In [144]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1909
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [120  50 187 283 129  63 403  74  76 354  21 149]


In [145]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 472
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [ 23   3  55 106  24  12  85  24  22  78  15  25]


In [146]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1909, 740) (1909,)
(472, 740) (472,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=5, max_features=X_train.shape[1], class_weight=False)

In [226]:
param, train, test, diff

(88, 88.69, 83.69, 5.0)

In [147]:
ws_model_fold3 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=5, max_features=88, n_estimators=100)
ws_model_fold3.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,5
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,88
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [148]:
# training accuracy
pred_train = ws_model_fold3.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  88.69
wF1 train:  86.78


In [149]:
# test accuracy
pred_test = ws_model_fold3.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold3.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold3.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold3 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  83.69
wF1 test:  79.68
Urban wF1 test:  85.35
Natural wF1 test:  75.86


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=6, max_features=X_train.shape[1], class_weight=True)

In [232]:
param, train, test, diff

(49, 90.1, 87.08, 3.02)

In [150]:
ws_model_fold3_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=6, max_features=49, class_weight='balanced', n_estimators=100)
ws_model_fold3_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,6
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,49
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [151]:
# training accuracy
pred_train = ws_model_fold3_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  90.1
wF1 train:  90.59


In [152]:
# test accuracy
pred_test = ws_model_fold3_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold3_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold3_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold3_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  87.08
wF1 test:  86.08
Urban wF1 test:  86.81
Natural wF1 test:  85.72


### Fold 4

In [153]:
if os.path.isfile(r"berlin_train_f4.tif"):
    fold = 4
    train_polygons_raster = r'berlin_train_f4.tif'
    test_polygons_raster = r'berlin_test_f4.tif'
else:
    fold = 4
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"berlin_train_f4.tif"
    test_polygons_raster = r"berlin_test_f4.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [154]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold4)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 32
  Train Accuracy: 0.8820%
  Test Accuracy: 0.8680%
  Gap (Train - Test): 0.0140%


In [155]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1919
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [117  46 191 328 119  53 381  78  86 346  35 139]


In [156]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 462
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [ 26   7  51  61  34  22 107  20  12  86   1  35]


In [157]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [158]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1919, 640) (1919,)
(462, 640) (462,)


In [159]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1919
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [117  46 191 328 119  53 381  78  86 346  35 139]


In [160]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 462
Unique Labels: [ 2  4  5  6  8  9 11 12 13 14 16 17]
Counts: [ 26   7  51  61  34  22 107  20  12  86   1  35]


In [161]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1919, 740) (1919,)
(462, 740) (462,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=5, max_features=180, class_weight=False)

In [247]:
param, train, test, diff

(13, 89.53, 85.93, 3.6)

In [162]:
ws_model_fold4 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=5, max_features=13, n_estimators=100)
ws_model_fold4.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,5
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,13
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [163]:
# training accuracy
pred_train = ws_model_fold4.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  89.53
wF1 train:  87.8


In [164]:
# test accuracy
pred_test = ws_model_fold4.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold4.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold4.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold4 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  85.93
wF1 test:  83.96
Urban wF1 test:  74.33
Natural wF1 test:  92.25


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=4, max_features=X_train.shape[1], class_weight=True)

In [253]:
param, train, test, diff

(24, 83.85, 79.22, 4.62)

In [165]:
ws_model_fold4_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=24, class_weight='balanced', n_estimators=100)
ws_model_fold4_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,24
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [166]:
# training accuracy
pred_train = ws_model_fold4_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  83.85
wF1 train:  82.72


In [167]:
# test accuracy
pred_test = ws_model_fold4_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold4_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold4_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold4_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  79.22
wF1 test:  79.93
Urban wF1 test:  73.44
Natural wF1 test:  85.5


## Evaluation

In [168]:
df = pd.DataFrame.from_dict({
    (i, j, k): results[i][j][k] 
    for i in results.keys() 
    for j in results[i].keys() 
    for k in results[i][j].keys()
}, orient='index')

df.index.names = ["Setup", "Strategy", "Fold"]
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,OA,wF1,F1U,F1N,Model
Setup,Strategy,Fold,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
non_weighted_set,non_weighted_model,0,70.82,64.74,53.57,75.87,"(DecisionTreeClassifier(max_depth=3, max_featu..."
non_weighted_set,non_weighted_model,1,67.51,59.19,40.26,72.12,"(DecisionTreeClassifier(max_depth=3, max_featu..."
non_weighted_set,non_weighted_model,2,79.83,75.61,67.6,83.41,"(DecisionTreeClassifier(max_depth=4, max_featu..."
non_weighted_set,non_weighted_model,3,85.38,82.12,90.59,78.39,"(DecisionTreeClassifier(max_depth=6, max_featu..."
non_weighted_set,non_weighted_model,4,87.01,85.48,77.74,92.15,"(DecisionTreeClassifier(max_depth=5, max_featu..."
non_weighted_set,weighted_model,0,54.59,51.11,48.64,54.12,"(DecisionTreeClassifier(max_depth=2, max_featu..."
non_weighted_set,weighted_model,1,64.07,64.67,65.94,64.1,"(DecisionTreeClassifier(max_depth=3, max_featu..."
non_weighted_set,weighted_model,2,84.08,83.91,74.54,92.14,"(DecisionTreeClassifier(max_depth=6, max_featu..."
non_weighted_set,weighted_model,3,88.56,87.6,87.77,87.47,"(DecisionTreeClassifier(max_depth=6, max_featu..."
non_weighted_set,weighted_model,4,79.22,80.23,73.43,86.19,"(DecisionTreeClassifier(max_depth=4, max_featu..."


In [169]:
# determine best set and best weighting strategy
df_metrics = df.drop(columns=["Model"])
averages = df_metrics.groupby(["Setup", "Strategy"]).mean().round(2)
averages["wF1+F1U"] = averages["wF1"] + averages["F1U"]
averages = averages.sort_values("wF1+F1U", ascending=False)
averages

Unnamed: 0_level_0,Unnamed: 1_level_0,OA,wF1,F1U,F1N,wF1+F1U
Setup,Strategy,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
weighted_set,weighted_model,75.62,75.24,73.75,76.75,148.99
non_weighted_set,weighted_model,74.1,73.5,70.06,76.8,143.56
non_weighted_set,non_weighted_model,78.11,73.43,65.95,80.39,139.38
weighted_set,non_weighted_model,76.79,71.54,62.08,79.6,133.62


In [170]:
best_setup, best_strategy = averages["wF1+F1U"].idxmax()
best_setup, best_strategy

('weighted_set', 'weighted_model')

In [171]:
# show individual model performance of the best  set and best weighting strategy
df_metrics.loc[best_setup, best_strategy]

Unnamed: 0_level_0,OA,wF1,F1U,F1N
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,62.82,61.32,70.56,54.12
1,65.52,66.52,62.92,69.38
2,83.44,82.34,75.03,89.04
3,87.08,86.08,86.81,85.72
4,79.22,79.93,73.44,85.5


In [172]:
# show the average metrics of the best set and best weighting strategy
averages.loc[best_setup, best_strategy]

OA          75.62
wF1         75.24
F1U         73.75
F1N         76.75
wF1+F1U    148.99
Name: (weighted_set, weighted_model), dtype: float64

In [173]:
# get the models
rf_models = df.loc[(best_setup, best_strategy), "Model"].tolist()
rf_models

[RandomForestClassifier(class_weight='balanced', max_depth=2, max_features=87,
                        n_jobs=-1, random_state=0),
 RandomForestClassifier(class_weight='balanced', max_depth=3, max_features=23,
                        n_jobs=-1, random_state=0),
 RandomForestClassifier(class_weight='balanced', max_depth=5, max_features=53,
                        n_jobs=-1, random_state=0),
 RandomForestClassifier(class_weight='balanced', max_depth=6, max_features=49,
                        n_jobs=-1, random_state=0),
 RandomForestClassifier(class_weight='balanced', max_depth=4, max_features=24,
                        n_jobs=-1, random_state=0)]

## Prediction

In [174]:
better_set = weighted_set
rf_models = df.loc[(best_setup, best_strategy), "Model"].tolist()
cnn_models = [cnn_fold0, cnn_fold1, cnn_fold2, cnn_fold3, cnn_fold4]
output = [r'outputs\s4\berlin_S4_fold0.tif', r'outputs\s4\berlin_S4_fold1.tif', r'outputs\s4\berlin_S4_fold2.tif', r'outputs\s4\berlin_S4_fold3.tif', r'outputs\s4\berlin_S4_fold4.tif']

In [175]:
# whole satellite image to patches
feature_patches = cnn_utils.generate_feature_patches_loader(image_path = image,patch_size = patch_size,stride = stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top)

Total patches loaded: 418465


In [176]:
# for all image patches extract morphometrics
feature_patches_urbanform = cnn_utils.generate_feature_patches_loader(image_path =better_set,patch_size = patch_size,stride = stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top)

Total patches loaded: 418465


In [None]:
for i in range(5):
    train_polygons_raster = fr'berlin_train_f{i}.tif'

    # load model
    cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
    cnn_model = cnn_model.cuda()
    trained_model = torch.load(cnn_models[i])
    cnn_model.load_state_dict(trained_model['model_state'])
    print('cnn model loaded')
    
    train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

    mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
    feature_patches_norm = cnn_utils.normalize_loader(feature_patches, mean, std)
    print('image patches normalized')
    
    # extract embeddings
    cnn_model.eval()
    embeddings = list()
    with torch.no_grad():
        for feature in feature_patches_norm:
            feature = feature.cuda()
            embedding = cnn_model.get_embedding_raw_fc(feature)
            embeddings.append(embedding.cpu().numpy())
    embeddings = np.concatenate(embeddings, axis=0)
    print('embeddings extracted')
    
    # extract morphometrics
    urbanform = list()
    for feature in feature_patches_urbanform:
        urbanform.append(feature.cpu().numpy())
    urbanform = np.concatenate(urbanform, axis=0)
    print('morphometrics extracted')
    
    # aggregate morphometrics
    mean_urbanform = urbanform.mean(axis=(2,3))
    min_urbanform = urbanform.min(axis=(2,3))
    max_urbanform = urbanform.max(axis=(2,3))
    std_urbanform = urbanform.std(axis=(2,3))
    med_urbanform = np.median(urbanform, axis=(2, 3))
    print('morphometrics aggregated')
    
    # merge
    all_features = np.hstack((embeddings,mean_urbanform,min_urbanform,max_urbanform,std_urbanform,med_urbanform))

    # prediction
    rf_model = rf_models[i]
    prediction = rf_model.predict(all_features)
    print('prediction done')

    offset_left_calc, offset_top_calc = cnn_utils.calculate_optimal_offsets(image, patch_size, stride)

    output_path = output[i]
    output_path = output_path.replace(".tif", "_temp.tif")
    cnn_utils.lcz_map(offset_left_calc, offset_top_calc, image, prediction, output_path)

## Per pixel validation

In [4]:
# provide test polygons raster path
test_polygons_path = ['berlin_test_f0.tif','berlin_test_f1.tif','berlin_test_f2.tif','berlin_test_f3.tif','berlin_test_f4.tif']

In [185]:
# resample lcz map to 100m
for f in output:
    out = f
    temp_f = f.replace(".tif", "_temp.tif")
    utils.resample_lcz_map(temp_f, out)
    # if os.path.exists(temp_f):
    #     try:
    #         os.remove(temp_f)
    #     except:
    #         pass

 saved to s4_outputs\berlin_S4_fold0.tif
 saved to s4_outputs\berlin_S4_fold1.tif
 saved to s4_outputs\berlin_S4_fold2.tif
 saved to s4_outputs\berlin_S4_fold3.tif
 saved to s4_outputs\berlin_S4_fold4.tif


In [5]:
metrics, confusion_matrices = utils.perpixel_validation(output, test_polygons_path, splited_ref_data)

In [6]:
df_perpixel = pd.DataFrame(metrics)
df_perpixel = df_perpixel.set_index("Fold")
df_perpixel

Unnamed: 0_level_0,OA,wF1,wF1_Urban,wF1_Natural,F1_Class_1,F1_Class_2,F1_Class_3,F1_Class_4,F1_Class_5,F1_Class_6,...,F1_Class_8,F1_Class_9,F1_Class_10,F1_Class_11,F1_Class_12,F1_Class_13,F1_Class_14,F1_Class_15,F1_Class_16,F1_Class_17
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,61.21,59.49,67.14,53.64,,91.97,,59.17,30.04,84.22,...,69.01,4.65,,95.31,0.0,20.25,0.0,,39.02,100.0
1,63.63,64.84,63.88,66.06,,79.67,,52.61,41.33,73.55,...,59.0,54.89,,89.47,0.0,30.48,52.53,,32.37,99.91
2,82.26,81.15,73.29,88.55,,88.06,,48.93,54.79,79.86,...,84.87,64.24,,99.01,72.86,34.3,93.93,,21.43,98.19
3,85.6,85.11,84.64,86.09,,68.92,,31.87,68.07,97.93,...,81.19,84.32,,98.01,59.42,15.82,98.14,,81.34,100.0
4,78.85,79.01,74.31,83.51,,79.51,,26.2,37.07,92.35,...,91.72,80.61,,97.02,42.19,9.64,84.76,,22.43,100.0


In [7]:
df_perpixel_mean = df_perpixel.mean().round(2)
df_perpixel_mean

OA             74.31
wF1            73.92
wF1_Urban      72.65
wF1_Natural    75.57
F1_Class_1       NaN
F1_Class_2     81.63
F1_Class_3       NaN
F1_Class_4     43.76
F1_Class_5     46.26
F1_Class_6     85.58
F1_Class_7       NaN
F1_Class_8     77.16
F1_Class_9     57.74
F1_Class_10      NaN
F1_Class_11    95.76
F1_Class_12    34.89
F1_Class_13    22.10
F1_Class_14    65.87
F1_Class_15      NaN
F1_Class_16    39.32
F1_Class_17    99.62
dtype: float64

In [8]:
# export all results to csv
df_perpixel.to_csv(r"results\s4\berlin_S4_results.csv")

In [9]:
# export confusion matrices
with open(r"results\s4\berlin_S4_confusion_matrices.pkl", "wb") as f:
    pickle.dump(confusion_matrices, f)

# Hong Kong

In [2]:
# load splited reference data
splited_ref_data = gpd.read_file(r'ref_data\hongkong_ref_splitS2S3S4.gpkg')

In [3]:
# load satellite image (10 m resolution)
image = r'imagery\hongkong_20180321.tif'

In [4]:
# load rasterized subset of morphometrics from non-weighted RF S1 models and weighted RF S1 models (10 m resolution)
non_weighted_set = r'rasterized_morphometrics\hongkong_rasterized_morphometrics_fold3.tif'
weighted_set = r'rasterized_morphometrics\hongkong_rasterized_morphometrics_fold0_weighted.tif'

In [5]:
# load trained S2 CNN models for each fold
cnn_fold0 = r's2_cnn_models\hongkong_S2_fold0_epoch43.pth'
cnn_fold1 = r's2_cnn_models\hongkong_S2_fold1_epoch38.pth'
cnn_fold2 = r's2_cnn_models\hongkong_S2_fold2_epoch33.pth'
cnn_fold3 = r's2_cnn_models\hongkong_S2_fold3_epoch26.pth'
cnn_fold4 = r's2_cnn_models\hongkong_S2_fold4_epoch29.pth'

In [6]:
# recording results
setups = ["non_weighted_set", "weighted_set"]
strategies = ["non_weighted_model", "weighted_model"]
folds = [0, 1, 2, 3, 4]

# results[setup][strategy][fold]
results = {
    setup: {
        strat: {
            fold: {} for fold in folds
        } for strat in strategies
    } for setup in setups
}

## Non-weighted RF S1 morphometric subset

### Fold 0

In [7]:
fold = 0
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [8]:
train_polygons_raster = r"hongkong_train_f0.tif"
test_polygons_raster = r"hongkong_test_f0.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [9]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [10]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold0)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 43
  Train Accuracy: 0.7514%
  Test Accuracy: 0.7315%
  Gap (Train - Test): 0.0199%


In [13]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 706
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 45  12  27  55   8   8  10  22 122  46  56  75 220]


In [14]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 149
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [12  6  3 11  1  2  1  3 42  9 10 15 34]


In [15]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [16]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(706, 640) (706,)
(149, 640) (149,)


In [17]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path=non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 706
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 45  12  27  55   8   8  10  22 122  46  56  75 220]


In [18]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path=non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 149
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [12  6  3 11  1  2  1  3 42  9 10 15 34]


In [19]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(706, 740) (706,)
(149, 740) (149,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=180, class_weight=False)

In [30]:
param, train, test, diff

(9, 73.51, 71.81, 1.7)

In [20]:
model_fold0 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=9, n_estimators=100)
model_fold0.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,9
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [21]:
# training accuracy
pred_train = model_fold0.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  73.51
wF1 train:  67.06


In [22]:
# test accuracy
pred_test = model_fold0.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold0.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold0.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold0
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  71.81
wF1 test:  66.19
Urban wF1 test:  47.7
Natural wF1 test:  72.95


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=180, class_weight=True)

In [36]:
param, train, test, diff

(12, 79.75, 75.17, 4.58)

In [23]:
model_fold0_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=12, class_weight='balanced', n_estimators=100)
model_fold0_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,12
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [24]:
# training accuracy
pred_train = model_fold0_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  79.75
wF1 train:  76.94


In [25]:
# test accuracy
pred_test = model_fold0_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold0_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold0_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold0_weighted
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  75.17
wF1 test:  73.61
Urban wF1 test:  59.0
Natural wF1 test:  79.23


### Fold 1

In [26]:
fold = 1
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [27]:
train_polygons_raster = r"hongkong_train_f1.tif"
test_polygons_raster = r"hongkong_test_f1.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [28]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [29]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold1)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 38
  Train Accuracy: 0.7880%
  Test Accuracy: 0.7513%
  Gap (Train - Test): 0.0367%


In [30]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 658
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 44  15  24  45   7   8  11  18 129  40  53  65 199]


In [31]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 197
Unique Labels: [ 1  2  3  4  5  6 10 11 12 13 14 17]
Counts: [13  3  6 21  2  2  7 35 15 13 25 55]


In [32]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [33]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(658, 640) (658,)
(197, 640) (197,)


In [34]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 658
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 44  15  24  45   7   8  11  18 129  40  53  65 199]


In [35]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 197
Unique Labels: [ 1  2  3  4  5  6 10 11 12 13 14 17]
Counts: [13  3  6 21  2  2  7 35 15 13 25 55]


In [36]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(658, 740) (658,)
(197, 740) (197,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=15, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=120, class_weight=False)

In [53]:
param, train, test, diff

(13, 71.43, 70.56, 0.87)

In [37]:
model_fold1 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=13, n_estimators=100)
model_fold1.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,13
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [38]:
# training accuracy
pred_train = model_fold1.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  71.43
wF1 train:  65.28


In [39]:
# test accuracy
pred_test = model_fold1.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold1.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold1.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold1 
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  70.56
wF1 test:  64.13
Urban wF1 test:  48.17
Natural wF1 test:  71.39


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=2, max_features=120, class_weight=True)

In [59]:
param, train, test, diff

(15, 67.78, 63.45, 4.33)

In [40]:
model_fold1_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=1, max_features=15, class_weight='balanced', n_estimators=100)
model_fold1_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,1
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,15
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [41]:
# training accuracy
pred_train = model_fold1_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  67.78
wF1 train:  63.02


In [42]:
# test accuracy
pred_test = model_fold1_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold1_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold1_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold1_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  63.45
wF1 test:  57.82
Urban wF1 test:  43.23
Natural wF1 test:  64.18


### Fold 2

In [43]:
fold = 2
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [44]:
train_polygons_raster = r"hongkong_train_f2.tif"
test_polygons_raster = r"hongkong_test_f2.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [45]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [46]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold2)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 33
  Train Accuracy: 0.7964%
  Test Accuracy: 0.7515%
  Gap (Train - Test): 0.0449%


In [47]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 690
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 48  15  25  57   7   8   5  21 142  48  49  73 192]


In [48]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 165
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 9  3  5  9  2  2  6  4 22  7 17 17 62]


In [49]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [50]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(690, 640) (690,)
(165, 640) (165,)


In [51]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 690
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 48  15  25  57   7   8   5  21 142  48  49  73 192]


In [52]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 165
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 9  3  5  9  2  2  6  4 22  7 17 17 62]


In [53]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(690, 740) (690,)
(165, 740) (165,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=4, max_features=180, class_weight=False)

In [77]:
param, train, test, diff

(53, 81.45, 78.18, 3.27)

In [54]:
model_fold2 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=53, n_estimators=100)
model_fold2.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,53
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [55]:
# training accuracy
pred_train = model_fold2.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  81.45
wF1 train:  78.64


In [56]:
# test accuracy
pred_test = model_fold2.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold2.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold2.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold2 
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  78.18
wF1 test:  75.28
Urban wF1 test:  36.59
Natural wF1 test:  87.76


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=180, class_weight=True)

In [83]:
param, train, test, diff

(9, 79.71, 79.39, 0.32)

In [57]:
model_fold2_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=9, class_weight='balanced', n_estimators=100)
model_fold2_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,9
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [58]:
# training accuracy
pred_train = model_fold2_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  79.71
wF1 train:  78.82


In [59]:
# test accuracy
pred_test = model_fold2_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold2_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold2_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold2_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  79.39
wF1 test:  75.75
Urban wF1 test:  61.93
Natural wF1 test:  80.27


### Fold 3

In [60]:
fold = 3
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [61]:
train_polygons_raster = r"hongkong_train_f3.tif"
test_polygons_raster = r"hongkong_test_f3.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [62]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [63]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold3)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 26
  Train Accuracy: 0.7571%
  Test Accuracy: 0.7120%
  Gap (Train - Test): 0.0451%


In [64]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 671
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 44  15  21  54   8   9   8  19 133  46  52  73 189]


In [65]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 184
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [13  3  9 12  1  1  3  6 31  9 14 17 65]


In [66]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [67]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(671, 640) (671,)
(184, 640) (184,)


In [68]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path=non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 671
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 44  15  21  54   8   9   8  19 133  46  52  73 189]


In [69]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 184
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [13  3  9 12  1  1  3  6 31  9 14 17 65]


In [70]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(671, 740) (671,)
(184, 740) (184,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=4, max_features=180, class_weight=False)

In [101]:
param, train, test, diff

(1, 81.52, 78.26, 3.26)

In [71]:
model_fold3 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=1, n_estimators=100)
model_fold3.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [72]:
# training accuracy
pred_train = model_fold3.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  81.52
wF1 train:  77.86


In [73]:
# test accuracy
pred_test = model_fold3.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold3.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold3.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold3 
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  78.26
wF1 test:  72.7
Urban wF1 test:  62.79
Natural wF1 test:  77.37


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=160, class_weight=True)

In [109]:
param, train, test, diff

(9, 80.48, 79.35, 1.13)

In [74]:
model_fold3_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=9, class_weight='balanced', n_estimators=100)
model_fold3_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,9
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [75]:
# training accuracy
pred_train = model_fold3_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  80.48
wF1 train:  78.92


In [76]:
# test accuracy
pred_test = model_fold3_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold3_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold3_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold3_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  79.35
wF1 test:  78.79
Urban wF1 test:  64.68
Natural wF1 test:  84.6


### Fold 4

In [77]:
fold = 4
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [78]:
train_polygons_raster = r"hongkong_train_f4.tif"
test_polygons_raster = r"hongkong_test_f4.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [79]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [80]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold4)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 29
  Train Accuracy: 0.7835%
  Test Accuracy: 0.7688%
  Gap (Train - Test): 0.0147%


In [81]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 695
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 47  15  23  53   6   7  10  20 130  40  54  74 216]


In [82]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 160
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [10  3  7 13  3  3  1  5 34 15 12 16 38]


In [83]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [84]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(695, 640) (695,)
(160, 640) (160,)


In [85]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 695
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 47  15  23  53   6   7  10  20 130  40  54  74 216]


In [86]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 160
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [10  3  7 13  3  3  1  5 34 15 12 16 38]


In [87]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(695, 740) (695,)
(160, 740) (160,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=2, max_features=180, class_weight=False)

In [None]:
param, train, test, diff

In [88]:
model_fold4 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=1, max_features=80, n_estimators=100)
model_fold4.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,1
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,80
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [89]:
# training accuracy
pred_train = model_fold4.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  49.78
wF1 train:  38.95


In [90]:
# test accuracy
pred_test = model_fold4.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold4.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold4.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold4 
}


results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  45.0
wF1 test:  33.01
Urban wF1 test:  0.0
Natural wF1 test:  51.16


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=1, max_features=180, class_weight=True)

In [133]:
param, train, test, diff

(113, 71.94, 67.5, 4.44)

In [91]:
model_fold4_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=1, max_features=113, class_weight='balanced', n_estimators=100)
model_fold4_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,1
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,113
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [92]:
# training accuracy
pred_train = model_fold4_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  71.94
wF1 train:  64.66


In [93]:
# test accuracy
pred_test = model_fold4_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold4_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold4_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold4_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  67.5
wF1 test:  58.68
Urban wF1 test:  46.84
Natural wF1 test:  64.72


## Weighted RF S1 morphometric subset

### Fold 0

In [94]:
if os.path.isfile(r"hongkong_train_f0.tif"):
    fold = 0
    train_polygons_raster = r'hongkong_train_f0.tif'
    test_polygons_raster = r'hongkong_test_f0.tif'
else:
    fold = 0
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"hongkong_train_f0.tif"
    test_polygons_raster = r"hongkong_test_f0.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [95]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold0)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 43
  Train Accuracy: 0.7514%
  Test Accuracy: 0.7315%
  Gap (Train - Test): 0.0199%


In [96]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 706
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 45  12  27  55   8   8  10  22 122  46  56  75 220]


In [97]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 149
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [12  6  3 11  1  2  1  3 42  9 10 15 34]


In [98]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [99]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(706, 640) (706,)
(149, 640) (149,)


In [100]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 706
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 45  12  27  55   8   8  10  22 122  46  56  75 220]


In [101]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 149
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [12  6  3 11  1  2  1  3 42  9 10 15 34]


In [102]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(706, 740) (706,)
(149, 740) (149,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=160, class_weight=False)

In [148]:
param, train, test, diff

(5, 74.5, 70.47, 4.03)

In [103]:
ws_model_fold0 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=5, n_estimators=100)
ws_model_fold0.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,5
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [104]:
# training accuracy
pred_train = ws_model_fold0.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  74.5
wF1 train:  68.38


In [105]:
# test accuracy
pred_test = ws_model_fold0.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold0.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold0.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold0 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  70.47
wF1 test:  65.03
Urban wF1 test:  47.34
Natural wF1 test:  71.96


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=180, class_weight=True)

In [154]:
param, train, test, diff

(38, 76.91, 72.48, 4.43)

In [106]:
ws_model_fold0_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=38, class_weight='balanced', n_estimators=100)
ws_model_fold0_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,38
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [107]:
# training accuracy
pred_train = ws_model_fold0_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  76.91
wF1 train:  73.54


In [108]:
# test accuracy
pred_test = ws_model_fold0_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold0_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold0_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold0_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  72.48
wF1 test:  69.72
Urban wF1 test:  52.38
Natural wF1 test:  76.52


### Fold 1

In [109]:
if os.path.isfile(r"hongkong_train_f1.tif"):
    fold = 1
    train_polygons_raster = r'hongkong_train_f1.tif'
    test_polygons_raster = r'hongkong_test_f1.tif'
else:
    fold = 1
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"hongkong_train_f1.tif"
    test_polygons_raster = r"hongkong_test_f1.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [110]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold1)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 38
  Train Accuracy: 0.7880%
  Test Accuracy: 0.7513%
  Gap (Train - Test): 0.0367%


In [111]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 658
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 44  15  24  45   7   8  11  18 129  40  53  65 199]


In [112]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 197
Unique Labels: [ 1  2  3  4  5  6 10 11 12 13 14 17]
Counts: [13  3  6 21  2  2  7 35 15 13 25 55]


In [113]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [114]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(658, 640) (658,)
(197, 640) (197,)


In [115]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 658
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 44  15  24  45   7   8  11  18 129  40  53  65 199]


In [116]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 197
Unique Labels: [ 1  2  3  4  5  6 10 11 12 13 14 17]
Counts: [13  3  6 21  2  2  7 35 15 13 25 55]


In [117]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(658, 740) (658,)
(197, 740) (197,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=15, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=180, class_weight=False)

In [169]:
param, train, test, diff

(14, 74.77, 71.57, 3.2)

In [118]:
ws_model_fold1 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=14, n_estimators=100)
ws_model_fold1.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,14
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [119]:
# training accuracy
pred_train = ws_model_fold1.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  74.77
wF1 train:  69.58


In [120]:
# test accuracy
pred_test = ws_model_fold1.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold1.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold1.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold1 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  71.57
wF1 test:  66.62
Urban wF1 test:  46.67
Natural wF1 test:  75.08


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=1, max_features=180, class_weight=True)

In [175]:
param, train, test, diff

(21, 69.3, 65.99, 3.31)

In [121]:
ws_model_fold1_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=1, max_features=21, class_weight='balanced', n_estimators=100)
ws_model_fold1_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,1
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,21
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [122]:
# training accuracy
pred_train = ws_model_fold1_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  69.3
wF1 train:  63.7


In [123]:
# test accuracy
pred_test = ws_model_fold1_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold1_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold1_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold1_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  65.99
wF1 test:  59.46
Urban wF1 test:  27.26
Natural wF1 test:  72.6


### Fold 2

In [124]:
if os.path.isfile(r"hongkong_train_f2.tif"):
    fold = 2
    train_polygons_raster = r'hongkong_train_f2.tif'
    test_polygons_raster = r'hongkong_test_f2.tif'
else:
    fold = 2
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"hongkong_train_f2.tif"
    test_polygons_raster = r"hongkong_test_f2.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [125]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold2)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 33
  Train Accuracy: 0.7964%
  Test Accuracy: 0.7515%
  Gap (Train - Test): 0.0449%


In [126]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 690
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 48  15  25  57   7   8   5  21 142  48  49  73 192]


In [127]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 165
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 9  3  5  9  2  2  6  4 22  7 17 17 62]


In [128]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [129]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(690, 640) (690,)
(165, 640) (165,)


In [130]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 690
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 48  15  25  57   7   8   5  21 142  48  49  73 192]


In [131]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 165
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 9  3  5  9  2  2  6  4 22  7 17 17 62]


In [132]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(690, 740) (690,)
(165, 740) (165,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=4, max_features=180, class_weight=False)

In [190]:
param, train, test, diff

(37, 81.3, 78.18, 3.12)

In [133]:
ws_model_fold2 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=37, n_estimators=100)
ws_model_fold2.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,37
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [134]:
# training accuracy
pred_train = ws_model_fold2.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  81.3
wF1 train:  78.87


In [135]:
# test accuracy
pred_test = ws_model_fold2.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold2.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold2.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold2 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  78.18
wF1 test:  75.81
Urban wF1 test:  36.37
Natural wF1 test:  88.53


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=180, class_weight=True)

In [196]:
param, train, test, diff

(8, 79.57, 78.18, 1.38)

In [136]:
ws_model_fold2_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=8, class_weight='balanced', n_estimators=100)
ws_model_fold2_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,8
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [137]:
# training accuracy
pred_train = ws_model_fold2_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  79.57
wF1 train:  78.51


In [138]:
# test accuracy
pred_test = ws_model_fold2_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold2_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold2_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold2_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  78.18
wF1 test:  74.12
Urban wF1 test:  58.34
Natural wF1 test:  79.25


### Fold 3

In [139]:
if os.path.isfile(r"hongkong_train_f3.tif"):
    fold = 3
    train_polygons_raster = r'hongkong_train_f3.tif'
    test_polygons_raster = r'hongkong_test_f3.tif'
else:
    fold = 3
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"hongkong_train_f3.tif"
    test_polygons_raster = r"hongkong_test_f3.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [140]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold3)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 26
  Train Accuracy: 0.7571%
  Test Accuracy: 0.7120%
  Gap (Train - Test): 0.0451%


In [141]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 671
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 44  15  21  54   8   9   8  19 133  46  52  73 189]


In [142]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 184
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [13  3  9 12  1  1  3  6 31  9 14 17 65]


In [143]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [144]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(671, 640) (671,)
(184, 640) (184,)


In [145]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 671
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 44  15  21  54   8   9   8  19 133  46  52  73 189]


In [146]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 184
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [13  3  9 12  1  1  3  6 31  9 14 17 65]


In [147]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(671, 740) (671,)
(184, 740) (184,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=5, max_features=180, class_weight=False)

In [211]:
param, train, test, diff

(0, 0, 0, 0)

In [148]:
ws_model_fold3 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=5, max_features=150, n_estimators=100)
ws_model_fold3.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,5
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,150
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [149]:
# training accuracy
pred_train = ws_model_fold3.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  85.54
wF1 train:  83.24


In [150]:
# test accuracy
pred_test = ws_model_fold3.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold3.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold3.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold3 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  80.43
wF1 test:  77.15
Urban wF1 test:  65.69
Natural wF1 test:  81.97


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=4, max_features=180, class_weight=True)

In [217]:
param, train, test, diff

(11, 85.99, 81.52, 4.47)

In [151]:
ws_model_fold3_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=11, class_weight='balanced', n_estimators=100)
ws_model_fold3_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,11
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [152]:
# training accuracy
pred_train = ws_model_fold3_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  85.99
wF1 train:  85.79


In [153]:
# test accuracy
pred_test = ws_model_fold3_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold3_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold3_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold3_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  81.52
wF1 test:  80.04
Urban wF1 test:  69.77
Natural wF1 test:  84.54


### Fold 4

In [154]:
if os.path.isfile(r"hongkong_train_f4.tif"):
    fold = 4
    train_polygons_raster = r'hongkong_train_f4.tif'
    test_polygons_raster = r'hongkong_test_f4.tif'
else:
    fold = 4
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"hongkong_train_f4.tif"
    test_polygons_raster = r"hongkong_test_f4.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [155]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold4)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 29
  Train Accuracy: 0.7835%
  Test Accuracy: 0.7688%
  Gap (Train - Test): 0.0147%


In [156]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 695
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 47  15  23  53   6   7  10  20 130  40  54  74 216]


In [157]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 160
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [10  3  7 13  3  3  1  5 34 15 12 16 38]


In [158]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [159]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(695, 640) (695,)
(160, 640) (160,)


In [160]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 695
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [ 47  15  23  53   6   7  10  20 130  40  54  74 216]


In [161]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 160
Unique Labels: [ 1  2  3  4  5  6  8 10 11 12 13 14 17]
Counts: [10  3  7 13  3  3  1  5 34 15 12 16 38]


In [162]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(695, 740) (695,)
(160, 740) (160,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=7, max_features=180, class_weight=False)

In [None]:
param, train, test, diff

In [163]:
ws_model_fold4 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=1, max_features=22, n_estimators=100)
ws_model_fold4.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,1
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,22
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [164]:
# training accuracy
pred_train = ws_model_fold4.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  49.78
wF1 train:  38.61


In [165]:
# test accuracy
pred_test = ws_model_fold4.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold4.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold4.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold4 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  45.0
wF1 test:  33.01
Urban wF1 test:  0.0
Natural wF1 test:  51.16


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=2, max_features=180, class_weight=True)

In [240]:
param, train, test, diff

(0, 0, 0, 0)

In [166]:
ws_model_fold4_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=2, max_features=13, class_weight='balanced', n_estimators=100)
ws_model_fold4_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,2
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,13
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [167]:
# training accuracy
pred_train = ws_model_fold4_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  78.13
wF1 train:  77.13


In [168]:
# test accuracy
pred_test = ws_model_fold4_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold4_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold4_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold4_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  72.5
wF1 test:  69.65
Urban wF1 test:  62.99
Natural wF1 test:  73.36


## Evaluation

In [169]:
df = pd.DataFrame.from_dict({
    (i, j, k): results[i][j][k] 
    for i in results.keys() 
    for j in results[i].keys() 
    for k in results[i][j].keys()
}, orient='index')

df.index.names = ["Setup", "Strategy", "Fold"]
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,OA,wF1,F1U,F1N,Model
Setup,Strategy,Fold,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
non_weighted_set,non_weighted_model,0,71.81,66.19,47.7,72.95,"(DecisionTreeClassifier(max_depth=3, max_featu..."
non_weighted_set,non_weighted_model,1,70.56,64.13,48.17,71.39,"(DecisionTreeClassifier(max_depth=3, max_featu..."
non_weighted_set,non_weighted_model,2,78.18,75.28,36.59,87.76,"(DecisionTreeClassifier(max_depth=4, max_featu..."
non_weighted_set,non_weighted_model,3,78.26,72.7,62.79,77.37,"(DecisionTreeClassifier(max_depth=4, max_featu..."
non_weighted_set,non_weighted_model,4,45.0,33.01,0.0,51.16,"(DecisionTreeClassifier(max_depth=1, max_featu..."
non_weighted_set,weighted_model,0,75.17,73.61,59.0,79.23,"(DecisionTreeClassifier(max_depth=3, max_featu..."
non_weighted_set,weighted_model,1,63.45,57.82,43.23,64.18,"(DecisionTreeClassifier(max_depth=1, max_featu..."
non_weighted_set,weighted_model,2,79.39,75.75,61.93,80.27,"(DecisionTreeClassifier(max_depth=3, max_featu..."
non_weighted_set,weighted_model,3,79.35,78.79,64.68,84.6,"(DecisionTreeClassifier(max_depth=3, max_featu..."
non_weighted_set,weighted_model,4,67.5,58.68,46.84,64.72,"(DecisionTreeClassifier(max_depth=1, max_featu..."


In [170]:
# determine best set and best weighting strategy
df_metrics = df.drop(columns=["Model"])
averages = df_metrics.groupby(["Setup", "Strategy"]).mean().round(2)
averages["wF1+F1U"] = averages["wF1"] + averages["F1U"]
averages = averages.sort_values("wF1+F1U", ascending=False)
averages

Unnamed: 0_level_0,Unnamed: 1_level_0,OA,wF1,F1U,F1N,wF1+F1U
Setup,Strategy,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
weighted_set,weighted_model,74.13,70.6,54.15,77.25,124.75
non_weighted_set,weighted_model,72.97,68.93,55.14,74.6,124.07
weighted_set,non_weighted_model,69.13,63.52,39.21,73.74,102.73
non_weighted_set,non_weighted_model,68.76,62.26,39.05,72.13,101.31


In [171]:
best_setup, best_strategy = averages["wF1+F1U"].idxmax()
best_setup, best_strategy

('weighted_set', 'weighted_model')

In [172]:
# show individual model performance of the best  set and best weighting strategy
df_metrics.loc[best_setup, best_strategy]

Unnamed: 0_level_0,OA,wF1,F1U,F1N
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,72.48,69.72,52.38,76.52
1,65.99,59.46,27.26,72.6
2,78.18,74.12,58.34,79.25
3,81.52,80.04,69.77,84.54
4,72.5,69.65,62.99,73.36


In [173]:
# show the average metrics of the best set and best weighting strategy
averages.loc[best_setup, best_strategy]

OA          74.13
wF1         70.60
F1U         54.15
F1N         77.25
wF1+F1U    124.75
Name: (weighted_set, weighted_model), dtype: float64

In [174]:
# get the models
rf_models = df.loc[(best_setup, best_strategy), "Model"].tolist()
rf_models

[RandomForestClassifier(class_weight='balanced', max_depth=3, max_features=38,
                        n_jobs=-1, random_state=0),
 RandomForestClassifier(class_weight='balanced', max_depth=1, max_features=21,
                        n_jobs=-1, random_state=0),
 RandomForestClassifier(class_weight='balanced', max_depth=3, max_features=8,
                        n_jobs=-1, random_state=0),
 RandomForestClassifier(class_weight='balanced', max_depth=4, max_features=11,
                        n_jobs=-1, random_state=0),
 RandomForestClassifier(class_weight='balanced', max_depth=2, max_features=13,
                        n_jobs=-1, random_state=0)]

## Prediction

In [175]:
better_set = weighted_set
rf_models = df.loc[(best_setup, best_strategy), "Model"].tolist()
cnn_models = [cnn_fold0, cnn_fold1, cnn_fold2, cnn_fold3, cnn_fold4]
output = [r'outputs\s4\hongkong_S4_fold0.tif', r'outputs\s4\hongkong_S4_fold1.tif', r'outputs\s4\hongkong_S4_fold2.tif', r'outputs\s4\hongkong_S4_fold3.tif', r'outputs\s4\hongkong_S4_fold4.tif']

In [176]:
# whole satellite image to patches
feature_patches = cnn_utils.generate_feature_patches_loader(image_path = image,patch_size = patch_size,stride = stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top)

Total patches loaded: 214720


In [177]:
# for all image patches extract morphometrics
feature_patches_urbanform = cnn_utils.generate_feature_patches_loader(image_path =better_set,patch_size = patch_size,stride = stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top)

Total patches loaded: 214720


In [None]:
for i in range(5):
    train_polygons_raster = fr'hongkong_train_f{i}.tif'

    # load model
    cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
    cnn_model = cnn_model.cuda()
    trained_model = torch.load(cnn_models[i])
    cnn_model.load_state_dict(trained_model['model_state'])
    print('cnn model loaded')
    
    train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

    mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
    feature_patches_norm = cnn_utils.normalize_loader(feature_patches, mean, std)
    print('image patches normalized')
    
    # extract embeddings
    cnn_model.eval()
    embeddings = list()
    with torch.no_grad():
        for feature in feature_patches_norm:
            feature = feature.cuda()
            embedding = cnn_model.get_embedding_raw_fc(feature)
            embeddings.append(embedding.cpu().numpy())
    embeddings = np.concatenate(embeddings, axis=0)
    print('embeddings extracted')
    
    # extract morphometrics
    urbanform = list()
    for feature in feature_patches_urbanform:
        urbanform.append(feature.cpu().numpy())
    urbanform = np.concatenate(urbanform, axis=0)
    print('morphometrics extracted')
    
    # aggregate morphometrics
    mean_urbanform = urbanform.mean(axis=(2,3))
    min_urbanform = urbanform.min(axis=(2,3))
    max_urbanform = urbanform.max(axis=(2,3))
    std_urbanform = urbanform.std(axis=(2,3))
    med_urbanform = np.median(urbanform, axis=(2, 3))
    print('morphometrics aggregated')
    
    # merge
    all_features = np.hstack((embeddings,mean_urbanform,min_urbanform,max_urbanform,std_urbanform,med_urbanform))

    # prediction
    rf_model = rf_models[i]
    prediction = rf_model.predict(all_features)
    print('prediction done')

    offset_left_calc, offset_top_calc = cnn_utils.calculate_optimal_offsets(image, patch_size, stride)

    output_path = output[i]
    output_path = output_path.replace(".tif", "_temp.tif")
    cnn_utils.lcz_map(offset_left_calc, offset_top_calc, image, prediction, output_path)

## Per pixel validation

In [178]:
# provide test polygons raster path
test_polygons_path = ['hongkong_test_f0.tif','hongkong_test_f1.tif','hongkong_test_f2.tif','hongkong_test_f3.tif','hongkong_test_f4.tif']

In [188]:
# resample lcz map to 100m
for f in output:
    out = f
    temp_f = f.replace(".tif", "_temp.tif")
    utils.resample_lcz_map(temp_f, out)
    # if os.path.exists(temp_f):
    #     try:
    #         os.remove(temp_f)
    #     except:
    #         pass

 saved to s4_outputs\hongkong_S4_fold0.tif
 saved to s4_outputs\hongkong_S4_fold1.tif
 saved to s4_outputs\hongkong_S4_fold2.tif
 saved to s4_outputs\hongkong_S4_fold3.tif
 saved to s4_outputs\hongkong_S4_fold4.tif


In [179]:
metrics, confusion_matrices = utils.perpixel_validation(output, test_polygons_path, splited_ref_data)

In [180]:
df_perpixel = pd.DataFrame(metrics)
df_perpixel = df_perpixel.set_index("Fold")
df_perpixel

Unnamed: 0_level_0,OA,wF1,wF1_Urban,wF1_Natural,F1_Class_1,F1_Class_2,F1_Class_3,F1_Class_4,F1_Class_5,F1_Class_6,...,F1_Class_8,F1_Class_9,F1_Class_10,F1_Class_11,F1_Class_12,F1_Class_13,F1_Class_14,F1_Class_15,F1_Class_16,F1_Class_17
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,70.19,66.91,51.63,73.5,65.45,0.0,72.29,49.06,59.26,41.58,...,11.11,,61.68,95.27,1.68,42.54,19.91,,,99.44
1,64.75,57.19,23.69,71.19,44.86,0.0,75.34,0.93,22.62,30.0,...,20.29,,3.77,90.73,0.0,3.55,63.21,,,96.2
2,74.37,70.67,53.49,77.51,76.8,31.58,43.24,71.13,31.37,31.91,...,2.82,,66.67,90.27,10.64,67.37,29.38,,,99.57
3,81.26,79.41,66.21,84.84,72.54,33.33,87.7,73.28,7.14,50.0,...,0.0,,54.24,96.54,60.69,9.52,72.04,,,100.0
4,69.7,66.43,63.92,68.66,66.38,54.84,85.9,61.26,28.0,28.24,...,0.0,,83.95,83.5,30.3,65.36,6.73,,,99.87


In [181]:
df_perpixel_mean = df_perpixel.mean().round(2)
df_perpixel_mean

OA             72.05
wF1            68.12
wF1_Urban      51.79
wF1_Natural    75.14
F1_Class_1     65.21
F1_Class_2     23.95
F1_Class_3     72.89
F1_Class_4     51.13
F1_Class_5     29.68
F1_Class_6     36.35
F1_Class_7       NaN
F1_Class_8      6.84
F1_Class_9       NaN
F1_Class_10    54.06
F1_Class_11    91.26
F1_Class_12    20.66
F1_Class_13    37.67
F1_Class_14    38.25
F1_Class_15      NaN
F1_Class_16      NaN
F1_Class_17    99.02
dtype: float64

In [182]:
# export all results to csv
df_perpixel.to_csv(r"results\s4\hongkong_S4_results.csv")

In [183]:
# export confusion matrices
with open(r"results\s4\hongkong_S4_confusion_matrices.pkl", "wb") as f:
    pickle.dump(confusion_matrices, f)

# Paris

In [3]:
# load splited reference data
splited_ref_data = gpd.read_file(r'ref_data\paris_ref_splitS2S3S4.gpkg')

In [4]:
# load satellite image (10 m resolution)
image = r'imagery\paris_20170526.tif'

In [5]:
# load rasterized subset of morphometrics from non-weighted RF S1 models and weighted RF S1 models (10 m resolution)
non_weighted_set = r'rasterized_morphometrics\paris_rasterized_morphometrics_fold1.tif'
weighted_set = r'rasterized_morphometrics\paris_rasterized_morphometrics_fold1_weighted.tif'

In [6]:
# load trained S2 CNN models for each fold
cnn_fold0 = r's2_cnn_models\paris_S2_fold0_epoch64.pth'
cnn_fold1 = r's2_cnn_models\paris_S2_fold1_epoch88.pth'
cnn_fold2 = r's2_cnn_models\paris_S2_fold2_epoch75.pth'
cnn_fold3 = r's2_cnn_models\paris_S2_fold3_epoch76.pth'
cnn_fold4 = r's2_cnn_models\paris_S2_fold4_epoch88.pth'

In [7]:
# recording results
setups = ["non_weighted_set", "weighted_set"]
strategies = ["non_weighted_model", "weighted_model"]
folds = [0, 1, 2, 3, 4]

# results[setup][strategy][fold]
results = {
    setup: {
        strat: {
            fold: {} for fold in folds
        } for strat in strategies
    } for setup in setups
}

## Non-weighted RF S1 morphometric subset

### Fold 0

In [8]:
fold = 0
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [9]:
train_polygons_raster = r"paris_train_f0.tif"
test_polygons_raster = r"paris_test_f0.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [10]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [11]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold0)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 64
  Train Accuracy: 0.9425%
  Test Accuracy: 0.9065%
  Gap (Train - Test): 0.0360%


In [12]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1565
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  4 240  27  31 187  66   2 357  31 583  17  20]


In [13]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 353
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  2  18   8  11  43   6   3  76  11 172   2   1]


In [14]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [15]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1565, 640) (1565,)
(353, 640) (353,)


In [16]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path=non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1565
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  4 240  27  31 187  66   2 357  31 583  17  20]


In [17]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path=non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 353
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  2  18   8  11  43   6   3  76  11 172   2   1]


In [18]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1565, 740) (1565,)
(353, 740) (353,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=5, max_features=X_train.shape[1], class_weight=False)

In [None]:
param, train, test, diff

In [19]:
model_fold0 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=5, max_features=178, n_estimators=100)
model_fold0.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,5
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,178
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [20]:
# training accuracy
pred_train = model_fold0.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  95.78
wF1 train:  95.22


In [21]:
# test accuracy
pred_test = model_fold0.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold0.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold0.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold0
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  90.93
wF1 test:  88.76
Urban wF1 test:  78.56
Natural wF1 test:  93.11


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=4, max_features=X_train.shape[1], class_weight=True)

In [None]:
param, train, test, diff

In [22]:
model_fold0_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=49, class_weight='balanced', n_estimators=100)
model_fold0_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,49
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [24]:
# training accuracy
pred_train = model_fold0_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  97.12
wF1 train:  97.12


In [25]:
# test accuracy
pred_test = model_fold0_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold0_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold0_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold0_weighted
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  92.07
wF1 test:  90.95
Urban wF1 test:  83.87
Natural wF1 test:  94.05


### Fold 1

In [26]:
fold = 1
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [27]:
train_polygons_raster = r"paris_train_f1.tif"
test_polygons_raster = r"paris_test_f1.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [28]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [29]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold1)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 88
  Train Accuracy: 0.9867%
  Test Accuracy: 0.9545%
  Gap (Train - Test): 0.0322%


In [30]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1391
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  2  73  27  35 181  66   3 345  32 593  17  17]


In [31]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 527
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  4 185   8   7  49   6   2  88  10 162   2   4]


In [32]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [33]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1391, 640) (1391,)
(527, 640) (527,)


In [34]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1391
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  2  73  27  35 181  66   3 345  32 593  17  17]


In [35]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 527
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  4 185   8   7  49   6   2  88  10 162   2   4]


In [36]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1391, 740) (1391,)
(527, 740) (527,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=15, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=11, max_features=120, class_weight=False)

In [58]:
param, train, test, diff

(91, 100.0, 95.64, 4.36)

In [37]:
model_fold1 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=11, max_features=91, n_estimators=100)
model_fold1.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,11
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,91
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [38]:
# training accuracy
pred_train = model_fold1.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  100.0
wF1 train:  100.0


In [39]:
# test accuracy
pred_test = model_fold1.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold1.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold1.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold1 
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  95.64
wF1 test:  95.0
Urban wF1 test:  93.4
Natural wF1 test:  96.9


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=6, max_features=120, class_weight=True)

In [64]:
param, train, test, diff

(103, 98.63, 96.77, 1.86)

In [40]:
model_fold1_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=6, max_features=103, class_weight='balanced', n_estimators=100)
model_fold1_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,6
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,103
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [41]:
# training accuracy
pred_train = model_fold1_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  98.63
wF1 train:  98.68


In [42]:
# test accuracy
pred_test = model_fold1_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold1_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold1_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold1_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  96.77
wF1 test:  96.41
Urban wF1 test:  94.9
Natural wF1 test:  98.04


### Fold 2

In [43]:
fold = 2
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [44]:
train_polygons_raster = r"paris_train_f2.tif"
test_polygons_raster = r"paris_test_f2.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [45]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [46]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold2)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 75
  Train Accuracy: 0.9527%
  Test Accuracy: 0.9233%
  Gap (Train - Test): 0.0294%


In [47]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1618
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  6 229  30  34 186  62   5 347  35 652  13  19]


In [48]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 300
Unique Labels: [ 2  4  5  6  8 11 12 14 15 17]
Counts: [ 29   5   8  44  10  86   7 103   6   2]


In [49]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [50]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1618, 640) (1618,)
(300, 640) (300,)


In [51]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1618
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  6 229  30  34 186  62   5 347  35 652  13  19]


In [52]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 300
Unique Labels: [ 2  4  5  6  8 11 12 14 15 17]
Counts: [ 29   5   8  44  10  86   7 103   6   2]


In [53]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1618, 740) (1618,)
(300, 740) (300,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=5, max_features=120, class_weight=False)

In [83]:
param, train, test, diff

(33, 96.54, 92.33, 4.21)

In [54]:
model_fold2 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=5, max_features=33, n_estimators=100)
model_fold2.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,5
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,33
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [55]:
# training accuracy
pred_train = model_fold2.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  96.54
wF1 train:  96.05


In [56]:
# test accuracy
pred_test = model_fold2.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold2.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold2.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold2 
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  92.33
wF1 test:  91.6
Urban wF1 test:  90.94
Natural wF1 test:  94.02


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=X_train.shape[1], class_weight=True)

In [None]:
param, train, test, diff

In [57]:
model_fold2_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=101, class_weight='balanced', n_estimators=100)
model_fold2_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,101
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [58]:
# training accuracy
pred_train = model_fold2_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  96.11
wF1 train:  95.71


In [59]:
# test accuracy
pred_test = model_fold2_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold2_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold2_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold2_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  91.67
wF1 test:  92.44
Urban wF1 test:  88.88
Natural wF1 test:  95.55


### Fold 3

In [60]:
fold = 3
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [61]:
train_polygons_raster = r"paris_train_f3.tif"
test_polygons_raster = r"paris_test_f3.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [62]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [63]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold3)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 76
  Train Accuracy: 0.9575%
  Test Accuracy: 0.9316%
  Gap (Train - Test): 0.0259%


In [64]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1611
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  6 246  29  35 184  61   5 330  36 648  14  17]


In [65]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 307
Unique Labels: [ 2  4  5  6  8 11 12 14 15 17]
Counts: [ 12   6   7  46  11 103   6 107   5   4]


In [66]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [67]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1611, 640) (1611,)
(307, 640) (307,)


In [68]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path=non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1611
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  6 246  29  35 184  61   5 330  36 648  14  17]


In [69]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 307
Unique Labels: [ 2  4  5  6  8 11 12 14 15 17]
Counts: [ 12   6   7  46  11 103   6 107   5   4]


In [70]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1611, 740) (1611,)
(307, 740) (307,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=5, max_features=120, class_weight=False)

In [110]:
param, train, test, diff

(65, 96.34, 92.18, 4.16)

In [71]:
model_fold3 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=5, max_features=65, n_estimators=100)
model_fold3.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,5
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,65
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [72]:
# training accuracy
pred_train = model_fold3.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  96.34
wF1 train:  95.65


In [73]:
# test accuracy
pred_test = model_fold3.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold3.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold3.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold3 
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  92.18
wF1 test:  91.15
Urban wF1 test:  83.76
Natural wF1 test:  95.09


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=5, max_features=160, class_weight=True)

In [None]:
param, train, test, diff

In [74]:
model_fold3_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=5, max_features=129, class_weight='balanced', n_estimators=100)
model_fold3_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,5
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,129
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [75]:
# training accuracy
pred_train = model_fold3_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  98.63
wF1 train:  98.64


In [76]:
# test accuracy
pred_test = model_fold3_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold3_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold3_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold3_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  92.83
wF1 test:  92.61
Urban wF1 test:  89.51
Natural wF1 test:  94.53


### Fold 4

In [77]:
fold = 4
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [78]:
train_polygons_raster = r"paris_train_f4.tif"
test_polygons_raster = r"paris_test_f4.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [79]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [80]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold4)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 88
  Train Accuracy: 0.9734%
  Test Accuracy: 0.9582%
  Gap (Train - Test): 0.0152%


In [81]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1487
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  6 244  27  33 182  33   5 353  34 544  15  11]


In [82]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 431
Unique Labels: [ 2  4  5  6  8 11 12 14 15 17]
Counts: [ 14   8   9  48  39  80   8 211   4  10]


In [83]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [84]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1487, 640) (1487,)
(431, 640) (431,)


In [85]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1487
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  6 244  27  33 182  33   5 353  34 544  15  11]


In [86]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 431
Unique Labels: [ 2  4  5  6  8 11 12 14 15 17]
Counts: [ 14   8   9  48  39  80   8 211   4  10]


In [87]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1487, 740) (1487,)
(431, 740) (431,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=9, max_features=180, class_weight=False)

In [134]:
param, train, test, diff

(96, 99.93, 96.98, 2.95)

In [88]:
model_fold4 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=9, max_features=96, n_estimators=100)
model_fold4.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,9
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,96
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [89]:
# training accuracy
pred_train = model_fold4.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  99.93
wF1 train:  99.93


In [90]:
# test accuracy
pred_test = model_fold4.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold4.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold4.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold4 
}


results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  96.98
wF1 test:  96.32
Urban wF1 test:  94.14
Natural wF1 test:  97.73


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=8, max_features=180, class_weight=True)

In [140]:
param, train, test, diff

(62, 98.86, 96.52, 2.34)

In [91]:
model_fold4_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=8, max_features=62, class_weight='balanced', n_estimators=100)
model_fold4_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,8
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,62
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [92]:
# training accuracy
pred_train = model_fold4_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  98.86
wF1 train:  98.93


In [93]:
# test accuracy
pred_test = model_fold4_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold4_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold4_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold4_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  96.52
wF1 test:  96.2
Urban wF1 test:  92.84
Natural wF1 test:  97.92


## Weighted RF S1 morphometric subset

### Fold 0

In [94]:
if os.path.isfile(r"paris_train_f0.tif"):
    fold = 0
    train_polygons_raster = r'paris_train_f0.tif'
    test_polygons_raster = r'paris_test_f0.tif'
else:
    fold = 0
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"paris_train_f0.tif"
    test_polygons_raster = r"paris_test_f0.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [95]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold0)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 64
  Train Accuracy: 0.9425%
  Test Accuracy: 0.9065%
  Gap (Train - Test): 0.0360%


In [96]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1565
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  4 240  27  31 187  66   2 357  31 583  17  20]


In [97]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 353
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  2  18   8  11  43   6   3  76  11 172   2   1]


In [98]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [99]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1565, 640) (1565,)
(353, 640) (353,)


In [100]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1565
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  4 240  27  31 187  66   2 357  31 583  17  20]


In [101]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 353
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  2  18   8  11  43   6   3  76  11 172   2   1]


In [102]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1565, 740) (1565,)
(353, 740) (353,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=5, max_features=120, class_weight=False)

In [155]:
param, train, test, diff

(3, 91.5, 86.69, 4.82)

In [103]:
ws_model_fold0 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=3, n_estimators=100)
ws_model_fold0.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,3
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [104]:
# training accuracy
pred_train = ws_model_fold0.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  91.5
wF1 train:  89.02


In [105]:
# test accuracy
pred_test = ws_model_fold0.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold0.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold0.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold0 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  86.69
wF1 test:  81.92
Urban wF1 test:  56.29
Natural wF1 test:  91.63


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=4, max_features=180, class_weight=True)

In [163]:
param, train, test, diff

(0, 0, 0, 0)

In [106]:
ws_model_fold0_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=29, class_weight='balanced', n_estimators=100)
ws_model_fold0_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,29
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [107]:
# training accuracy
pred_train = ws_model_fold0_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  94.25
wF1 train:  94.43


In [108]:
# test accuracy
pred_test = ws_model_fold0_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold0_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold0_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold0_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  88.67
wF1 test:  87.65
Urban wF1 test:  73.25
Natural wF1 test:  93.49


### Fold 1

In [109]:
if os.path.isfile(r"paris_train_f1.tif"):
    fold = 1
    train_polygons_raster = r'paris_train_f1.tif'
    test_polygons_raster = r'paris_test_f1.tif'
else:
    fold = 1
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"paris_train_f1.tif"
    test_polygons_raster = r"paris_test_f1.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [110]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold1)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 88
  Train Accuracy: 0.9867%
  Test Accuracy: 0.9545%
  Gap (Train - Test): 0.0322%


In [111]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1391
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  2  73  27  35 181  66   3 345  32 593  17  17]


In [112]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 527
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  4 185   8   7  49   6   2  88  10 162   2   4]


In [113]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [114]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1391, 640) (1391,)
(527, 640) (527,)


In [115]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1391
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  2  73  27  35 181  66   3 345  32 593  17  17]


In [116]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 527
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  4 185   8   7  49   6   2  88  10 162   2   4]


In [117]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1391, 740) (1391,)
(527, 740) (527,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=15, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=12, max_features=180, class_weight=False)

In [179]:
param, train, test, diff

(45, 100.0, 95.07, 4.93)

In [118]:
ws_model_fold1 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=12, max_features=45, n_estimators=100)
ws_model_fold1.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,12
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,45
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [119]:
# training accuracy
pred_train = ws_model_fold1.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  100.0
wF1 train:  100.0


In [120]:
# test accuracy
pred_test = ws_model_fold1.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold1.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold1.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold1 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  95.07
wF1 test:  94.54
Urban wF1 test:  92.99
Natural wF1 test:  96.32


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=7, max_features=180, class_weight=True)

In [187]:
param, train, test, diff

(130, 98.71, 95.45, 3.26)

In [121]:
ws_model_fold1_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=7, max_features=130, class_weight='balanced', n_estimators=100)
ws_model_fold1_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,7
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,130
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [122]:
# training accuracy
pred_train = ws_model_fold1_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  98.71
wF1 train:  98.74


In [123]:
# test accuracy
pred_test = ws_model_fold1_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold1_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold1_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold1_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  95.45
wF1 test:  95.15
Urban wF1 test:  92.38
Natural wF1 test:  98.04


### Fold 2

In [124]:
if os.path.isfile(r"paris_train_f2.tif"):
    fold = 2
    train_polygons_raster = r'paris_train_f2.tif'
    test_polygons_raster = r'paris_test_f2.tif'
else:
    fold = 2
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"paris_train_f2.tif"
    test_polygons_raster = r"paris_test_f2.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [125]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold2)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 75
  Train Accuracy: 0.9527%
  Test Accuracy: 0.9233%
  Gap (Train - Test): 0.0294%


In [126]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1618
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  6 229  30  34 186  62   5 347  35 652  13  19]


In [127]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 300
Unique Labels: [ 2  4  5  6  8 11 12 14 15 17]
Counts: [ 29   5   8  44  10  86   7 103   6   2]


In [128]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [129]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1618, 640) (1618,)
(300, 640) (300,)


In [130]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1618
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  6 229  30  34 186  62   5 347  35 652  13  19]


In [131]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 300
Unique Labels: [ 2  4  5  6  8 11 12 14 15 17]
Counts: [ 29   5   8  44  10  86   7 103   6   2]


In [132]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1618, 740) (1618,)
(300, 740) (300,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=5, max_features=180, class_weight=False)

In [202]:
param, train, test, diff

(2, 94.99, 90.0, 4.99)

In [133]:
ws_model_fold2 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=5, max_features=2, n_estimators=100)
ws_model_fold2.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,5
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,2
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [134]:
# training accuracy
pred_train = ws_model_fold2.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  94.99
wF1 train:  93.74


In [135]:
# test accuracy
pred_test = ws_model_fold2.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold2.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold2.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold2 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  90.0
wF1 test:  88.68
Urban wF1 test:  83.14
Natural wF1 test:  93.65


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=2, max_features=180, class_weight=True)

In [211]:
param, train, test, diff

(53, 87.43, 83.44, 4.0)

In [136]:
ws_model_fold2_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=2, max_features=48, class_weight='balanced', n_estimators=100)
ws_model_fold2_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,2
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,48
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [137]:
# training accuracy
pred_train = ws_model_fold2_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  91.9
wF1 train:  92.06


In [138]:
# test accuracy
pred_test = ws_model_fold2_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold2_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold2_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold2_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  86.67
wF1 test:  88.12
Urban wF1 test:  79.12
Natural wF1 test:  93.98


### Fold 3

In [139]:
if os.path.isfile(r"paris_train_f3.tif"):
    fold = 3
    train_polygons_raster = r'paris_train_f3.tif'
    test_polygons_raster = r'paris_test_f3.tif'
else:
    fold = 3
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"paris_train_f3.tif"
    test_polygons_raster = r"paris_test_f3.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [140]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold3)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 76
  Train Accuracy: 0.9575%
  Test Accuracy: 0.9316%
  Gap (Train - Test): 0.0259%


In [141]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1611
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  6 246  29  35 184  61   5 330  36 648  14  17]


In [142]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 307
Unique Labels: [ 2  4  5  6  8 11 12 14 15 17]
Counts: [ 12   6   7  46  11 103   6 107   5   4]


In [143]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [144]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1611, 640) (1611,)
(307, 640) (307,)


In [145]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1611
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  6 246  29  35 184  61   5 330  36 648  14  17]


In [146]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 307
Unique Labels: [ 2  4  5  6  8 11 12 14 15 17]
Counts: [ 12   6   7  46  11 103   6 107   5   4]


In [147]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1611, 740) (1611,)
(307, 740) (307,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=5, max_features=180, class_weight=False)

In [226]:
param, train, test, diff

(46, 95.28, 91.86, 3.43)

In [148]:
ws_model_fold3 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=5, max_features=46, n_estimators=100)
ws_model_fold3.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,5
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,46
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [149]:
# training accuracy
pred_train = ws_model_fold3.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  95.28
wF1 train:  94.46


In [150]:
# test accuracy
pred_test = ws_model_fold3.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold3.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold3.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold3 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  91.86
wF1 test:  91.24
Urban wF1 test:  85.31
Natural wF1 test:  94.39


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=2, max_features=180, class_weight=True)

In [None]:
param, train, test, diff

In [151]:
ws_model_fold3_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=2, max_features=129, class_weight='balanced', n_estimators=100)
ws_model_fold3_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,2
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,129
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [152]:
# training accuracy
pred_train = ws_model_fold3_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  90.44
wF1 train:  90.1


In [153]:
# test accuracy
pred_test = ws_model_fold3_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold3_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold3_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold3_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  84.36
wF1 test:  85.69
Urban wF1 test:  64.85
Natural wF1 test:  94.14


### Fold 4

In [154]:
if os.path.isfile(r"paris_train_f4.tif"):
    fold = 4
    train_polygons_raster = r'paris_train_f4.tif'
    test_polygons_raster = r'paris_test_f4.tif'
else:
    fold = 4
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"paris_train_f4.tif"
    test_polygons_raster = r"paris_test_f4.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [155]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold4)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 88
  Train Accuracy: 0.9734%
  Test Accuracy: 0.9582%
  Gap (Train - Test): 0.0152%


In [156]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1487
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  6 244  27  33 182  33   5 353  34 544  15  11]


In [157]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 431
Unique Labels: [ 2  4  5  6  8 11 12 14 15 17]
Counts: [ 14   8   9  48  39  80   8 211   4  10]


In [158]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [159]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1487, 640) (1487,)
(431, 640) (431,)


In [160]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1487
Unique Labels: [ 1  2  4  5  6  8  9 11 12 14 15 17]
Counts: [  6 244  27  33 182  33   5 353  34 544  15  11]


In [161]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 431
Unique Labels: [ 2  4  5  6  8 11 12 14 15 17]
Counts: [ 14   8   9  48  39  80   8 211   4  10]


In [162]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1487, 740) (1487,)
(431, 740) (431,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=7, max_features=180, class_weight=False)

In [None]:
param, train, test, diff

In [163]:
ws_model_fold4 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=7, max_features=98, n_estimators=100)
ws_model_fold4.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,7
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,98
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [164]:
# training accuracy
pred_train = ws_model_fold4.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  98.52
wF1 train:  98.43


In [165]:
# test accuracy
pred_test = ws_model_fold4.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold4.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold4.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold4 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  93.5
wF1 test:  93.49
Urban wF1 test:  80.4
Natural wF1 test:  98.5


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=7, max_features=180, class_weight=True)

In [256]:
param, train, test, diff

(26, 97.51, 93.04, 4.47)

In [166]:
ws_model_fold4_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=7, max_features=26, class_weight='balanced', n_estimators=100)
ws_model_fold4_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,7
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,26
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [167]:
# training accuracy
pred_train = ws_model_fold4_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  97.51
wF1 train:  97.66


In [168]:
# test accuracy
pred_test = ws_model_fold4_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold4_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold4_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold4_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  93.04
wF1 test:  92.85
Urban wF1 test:  80.74
Natural wF1 test:  97.73


## Evaluation

In [169]:
df = pd.DataFrame.from_dict({
    (i, j, k): results[i][j][k] 
    for i in results.keys() 
    for j in results[i].keys() 
    for k in results[i][j].keys()
}, orient='index')

df.index.names = ["Setup", "Strategy", "Fold"]
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,OA,wF1,F1U,F1N,Model
Setup,Strategy,Fold,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
non_weighted_set,non_weighted_model,0,90.93,88.76,78.56,93.11,"(DecisionTreeClassifier(max_depth=5, max_featu..."
non_weighted_set,non_weighted_model,1,95.64,95.0,93.4,96.9,"(DecisionTreeClassifier(max_depth=11, max_feat..."
non_weighted_set,non_weighted_model,2,92.33,91.6,90.94,94.02,"(DecisionTreeClassifier(max_depth=5, max_featu..."
non_weighted_set,non_weighted_model,3,92.18,91.15,83.76,95.09,"(DecisionTreeClassifier(max_depth=5, max_featu..."
non_weighted_set,non_weighted_model,4,96.98,96.32,94.14,97.73,"(DecisionTreeClassifier(max_depth=9, max_featu..."
non_weighted_set,weighted_model,0,92.07,90.95,83.87,94.05,"(DecisionTreeClassifier(max_depth=4, max_featu..."
non_weighted_set,weighted_model,1,96.77,96.41,94.9,98.04,"(DecisionTreeClassifier(max_depth=6, max_featu..."
non_weighted_set,weighted_model,2,91.67,92.44,88.88,95.55,"(DecisionTreeClassifier(max_depth=3, max_featu..."
non_weighted_set,weighted_model,3,92.83,92.61,89.51,94.53,"(DecisionTreeClassifier(max_depth=5, max_featu..."
non_weighted_set,weighted_model,4,96.52,96.2,92.84,97.92,"(DecisionTreeClassifier(max_depth=8, max_featu..."


In [170]:
# determine best set and best weighting strategy
df_metrics = df.drop(columns=["Model"])
averages = df_metrics.groupby(["Setup", "Strategy"]).mean().round(2)
averages["wF1+F1U"] = averages["wF1"] + averages["F1U"]
averages = averages.sort_values("wF1+F1U", ascending=False)
averages

Unnamed: 0_level_0,Unnamed: 1_level_0,OA,wF1,F1U,F1N,wF1+F1U
Setup,Strategy,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
non_weighted_set,weighted_model,93.97,93.72,90.0,96.02,183.72
non_weighted_set,non_weighted_model,93.61,92.57,88.16,95.37,180.73
weighted_set,non_weighted_model,91.42,89.97,79.63,94.9,169.6
weighted_set,weighted_model,89.64,89.89,78.07,95.48,167.96


In [171]:
best_setup, best_strategy = averages["wF1+F1U"].idxmax()
best_setup, best_strategy

('non_weighted_set', 'weighted_model')

In [172]:
# show individual model performance of the best  set and best weighting strategy
df_metrics.loc[best_setup, best_strategy]

Unnamed: 0_level_0,OA,wF1,F1U,F1N
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,92.07,90.95,83.87,94.05
1,96.77,96.41,94.9,98.04
2,91.67,92.44,88.88,95.55
3,92.83,92.61,89.51,94.53
4,96.52,96.2,92.84,97.92


In [173]:
# show the average metrics of the best set and best weighting strategy
averages.loc[best_setup, best_strategy]

OA          93.97
wF1         93.72
F1U         90.00
F1N         96.02
wF1+F1U    183.72
Name: (non_weighted_set, weighted_model), dtype: float64

In [174]:
# get the models
rf_models = df.loc[(best_setup, best_strategy), "Model"].tolist()
rf_models

[RandomForestClassifier(class_weight='balanced', max_depth=4, max_features=49,
                        n_jobs=-1, random_state=0),
 RandomForestClassifier(class_weight='balanced', max_depth=6, max_features=103,
                        n_jobs=-1, random_state=0),
 RandomForestClassifier(class_weight='balanced', max_depth=3, max_features=101,
                        n_jobs=-1, random_state=0),
 RandomForestClassifier(class_weight='balanced', max_depth=5, max_features=129,
                        n_jobs=-1, random_state=0),
 RandomForestClassifier(class_weight='balanced', max_depth=8, max_features=62,
                        n_jobs=-1, random_state=0)]

## Prediction

In [175]:
better_set = non_weighted_set
rf_models = df.loc[(best_setup, best_strategy), "Model"].tolist()
cnn_models = [cnn_fold0, cnn_fold1, cnn_fold2, cnn_fold3, cnn_fold4]
output = [r'outputs\s4\paris_S4_fold0.tif', r'outputs\s4\paris_S4_fold1.tif', r'outputs\s4\paris_S4_fold2.tif', r'outputs\s4\paris_S4_fold3.tif', r'outputs\s4\paris_S4_fold4.tif']

In [176]:
# whole satellite image to patches
feature_patches = cnn_utils.generate_feature_patches_loader(image_path = image,patch_size = patch_size,stride = stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top)

Total patches loaded: 976752


In [177]:
# for all image patches extract morphometrics
feature_patches_urbanform = cnn_utils.generate_feature_patches_loader(image_path =better_set,patch_size = patch_size,stride = stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top)

Total patches loaded: 976752


In [None]:
for i in range(5):
    train_polygons_raster = fr'paris_train_f{i}.tif'

    # load model
    cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
    cnn_model = cnn_model.cuda()
    trained_model = torch.load(cnn_models[i])
    cnn_model.load_state_dict(trained_model['model_state'])
    print('cnn model loaded')
    
    train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

    mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
    feature_patches_norm = cnn_utils.normalize_loader(feature_patches, mean, std)
    print('image patches normalized')
    
    # extract embeddings
    cnn_model.eval()
    embeddings = list()
    with torch.no_grad():
        for feature in feature_patches_norm:
            feature = feature.cuda()
            embedding = cnn_model.get_embedding_raw_fc(feature)
            embeddings.append(embedding.cpu().numpy())
    embeddings = np.concatenate(embeddings, axis=0)
    print('embeddings extracted')

    # extract morphometrics
    urbanform = list()
    for feature in feature_patches_urbanform:
        urbanform.append(feature.cpu().numpy())
    #urbanform = np.concatenate(urbanform, axis=0)
    print('morphometrics extracted')

    mean_list = [batch.mean(axis=(2,3)) for batch in urbanform]
    min_list  = [batch.min(axis=(2,3))  for batch in urbanform]
    max_list  = [batch.max(axis=(2,3))  for batch in urbanform]
    std_list  = [batch.std(axis=(2,3))  for batch in urbanform]
    med_list  = [np.median(batch, axis=(2,3)) for batch in urbanform]

    mean_urbanform = np.concatenate(mean_list, axis=0)
    min_urbanform  = np.concatenate(min_list, axis=0)
    max_urbanform  = np.concatenate(max_list, axis=0)
    std_urbanform  = np.concatenate(std_list, axis=0)
    med_urbanform  = np.concatenate(med_list, axis=0)
    
    # aggregate morphometrics
    # mean_urbanform = urbanform.mean(axis=(2,3))
    # min_urbanform = urbanform.min(axis=(2,3))
    # max_urbanform = urbanform.max(axis=(2,3))
    # std_urbanform = urbanform.std(axis=(2,3))
    # med_urbanform = np.median(urbanform, axis=(2, 3))
    print('morphometrics aggregated')
    
    # merge
    all_features = np.hstack((embeddings,mean_urbanform,min_urbanform,max_urbanform,std_urbanform,med_urbanform))

    # prediction
    rf_model = rf_models[i]
    prediction = rf_model.predict(all_features)
    print('prediction done')

    offset_left_calc, offset_top_calc = cnn_utils.calculate_optimal_offsets(image, patch_size, stride)

    output_path = output[i]
    output_path = output_path.replace(".tif", "_temp.tif")
    cnn_utils.lcz_map(offset_left_calc, offset_top_calc, image, prediction, output_path)

## Per pixel validation

In [178]:
# provide test polygons raster path
test_polygons_path = ['paris_test_f0.tif','paris_test_f1.tif','paris_test_f2.tif','paris_test_f3.tif','paris_test_f4.tif']

In [188]:
# resample lcz map to 100m
for f in output:
    out = f
    temp_f = f.replace(".tif", "_temp.tif")
    utils.resample_lcz_map(temp_f, out)
    # if os.path.exists(temp_f):
    #     try:
    #         os.remove(temp_f)
    #     except:
    #         pass

 saved to s4_outputs\paris_S4_fold0.tif
 saved to s4_outputs\paris_S4_fold1.tif
 saved to s4_outputs\paris_S4_fold2.tif
 saved to s4_outputs\paris_S4_fold3.tif
 saved to s4_outputs\paris_S4_fold4.tif


In [180]:
metrics, confusion_matrices = utils.perpixel_validation(output, test_polygons_path, splited_ref_data)

In [181]:
df_perpixel = pd.DataFrame(metrics)
df_perpixel = df_perpixel.set_index("Fold")
df_perpixel

Unnamed: 0_level_0,OA,wF1,wF1_Urban,wF1_Natural,F1_Class_1,F1_Class_2,F1_Class_3,F1_Class_4,F1_Class_5,F1_Class_6,...,F1_Class_8,F1_Class_9,F1_Class_10,F1_Class_11,F1_Class_12,F1_Class_13,F1_Class_14,F1_Class_15,F1_Class_16,F1_Class_17
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,88.69,87.76,79.67,92.3,43.48,88.22,,36.36,62.04,87.86,...,77.66,5.26,,89.61,15.91,,98.69,10.81,,95.24
1,96.28,95.92,94.51,97.9,27.27,99.32,,67.4,67.13,94.52,...,84.88,27.78,,98.62,76.4,,98.88,57.14,,87.88
2,86.87,87.95,80.49,93.63,,90.52,,20.29,51.96,84.32,...,74.21,,,96.02,49.79,,96.04,36.84,,92.86
3,91.86,92.04,85.25,95.6,,87.89,,59.26,57.38,88.68,...,89.29,,,98.18,31.19,,98.62,50.0,,87.5
4,95.66,95.56,92.36,97.32,,98.16,,49.18,61.32,97.9,...,92.67,,,97.49,60.94,,99.72,52.78,,92.23


In [182]:
df_perpixel_mean = df_perpixel.mean().round(2)
df_perpixel_mean

OA             91.87
wF1            91.85
wF1_Urban      86.46
wF1_Natural    95.35
F1_Class_1     35.38
F1_Class_2     92.82
F1_Class_3       NaN
F1_Class_4     46.50
F1_Class_5     59.97
F1_Class_6     90.66
F1_Class_7       NaN
F1_Class_8     83.74
F1_Class_9     16.52
F1_Class_10      NaN
F1_Class_11    95.98
F1_Class_12    46.85
F1_Class_13      NaN
F1_Class_14    98.39
F1_Class_15    41.51
F1_Class_16      NaN
F1_Class_17    91.14
dtype: float64

In [183]:
# export all results to csv
df_perpixel.to_csv(r"results\s4\paris_S4_results.csv")

In [184]:
# export confusion matrices
with open(r"results\s4\paris_S4_confusion_matrices.pkl", "wb") as f:
    pickle.dump(confusion_matrices, f)

# Rome

In [3]:
# load splited reference data
splited_ref_data = gpd.read_file(r'ref_data\rome_ref_splitS2S3S4.gpkg')

In [4]:
# load satellite image (10 m resolution)
image = r'imagery\rome_20170620.tif'

In [5]:
# load rasterized subset of morphometrics from non-weighted RF S1 models and weighted RF S1 models (10 m resolution)
non_weighted_set = r'rasterized_morphometrics\rome_rasterized_morphometrics_fold2.tif'
weighted_set = r'rasterized_morphometrics\rome_rasterized_morphometrics_fold2_weighted.tif'

In [6]:
# load trained S2 CNN models for each fold
cnn_fold0 = r's2_cnn_models\rome_S2_fold0_epoch17.pth'
cnn_fold1 = r's2_cnn_models\rome_S2_fold1_epoch10.pth'
cnn_fold2 = r's2_cnn_models\rome_S2_fold2_epoch90.pth'
cnn_fold3 = r's2_cnn_models\rome_S2_fold3_epoch76.pth'
cnn_fold4 = r's2_cnn_models\rome_S2_fold4_epoch20.pth'

In [7]:
# recording results
setups = ["non_weighted_set", "weighted_set"]
strategies = ["non_weighted_model", "weighted_model"]
folds = [0, 1, 2, 3, 4]

# results[setup][strategy][fold]
results = {
    setup: {
        strat: {
            fold: {} for fold in folds
        } for strat in strategies
    } for setup in setups
}

## Non-weighted RF S1 morphometric subset

### Fold 0

In [8]:
fold = 0
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [9]:
train_polygons_raster = r"rome_train_f0.tif"
test_polygons_raster = r"rome_test_f0.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [10]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [11]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold0)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 17
  Train Accuracy: 0.7309%
  Test Accuracy: 0.6842%
  Gap (Train - Test): 0.0467%


In [12]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 485
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [125   3 111  40  36   3  24  42  83  18]


In [13]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 133
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [25  8 27  8  9  2  4 10  9 31]


In [14]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [15]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(485, 640) (485,)
(133, 640) (133,)


In [16]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path=non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 485
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [125   3 111  40  36   3  24  42  83  18]


In [17]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path=non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 133
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [25  8 27  8  9  2  4 10  9 31]


In [18]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(485, 740) (485,)
(133, 740) (133,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=2, max_features=180, class_weight=False)

In [None]:
param, train, test, diff

In [19]:
model_fold0 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=2, max_features=5, n_estimators=100)
model_fold0.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,2
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,5
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [20]:
# training accuracy
pred_train = model_fold0.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  68.25
wF1 train:  59.71


In [21]:
# test accuracy
pred_test = model_fold0.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold0.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold0.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold0
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  62.41
wF1 test:  54.24
Urban wF1 test:  39.95
Natural wF1 test:  80.42


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=2, max_features=180, class_weight=True)

In [35]:
param, train, test, diff

(50, 79.59, 75.19, 4.4)

In [22]:
model_fold0_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=2, max_features=50, class_weight='balanced', n_estimators=100)
model_fold0_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,2
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,50
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [23]:
# training accuracy
pred_train = model_fold0_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  79.59
wF1 train:  77.08


In [24]:
# test accuracy
pred_test = model_fold0_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold0_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold0_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold0_weighted
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  75.19
wF1 test:  72.71
Urban wF1 test:  62.63
Natural wF1 test:  91.24


### Fold 1

In [25]:
fold = 1
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [26]:
train_polygons_raster = r"rome_train_f1.tif"
test_polygons_raster = r"rome_test_f1.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [27]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [28]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold1)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 10
  Train Accuracy: 0.7214%
  Test Accuracy: 0.6875%
  Gap (Train - Test): 0.0339%


In [29]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 490
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [123   8 114  37  33   2  22  39  66  46]


In [30]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 128
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [27  3 24 11 12  3  6 13 26  3]


In [31]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [32]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(490, 640) (490,)
(128, 640) (128,)


In [33]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 490
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [123   8 114  37  33   2  22  39  66  46]


In [34]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 128
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [27  3 24 11 12  3  6 13 26  3]


In [35]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(490, 740) (490,)
(128, 740) (128,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=15, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=2, max_features=120, class_weight=False)

In [None]:
param, train, test, diff

In [36]:
model_fold1 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=1, max_features=16, n_estimators=100)
model_fold1.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,1
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,16
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [37]:
# training accuracy
pred_train = model_fold1.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  55.31
wF1 train:  43.83


In [38]:
# test accuracy
pred_test = model_fold1.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold1.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold1.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold1 
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  49.22
wF1 test:  38.82
Urban wF1 test:  39.69
Natural wF1 test:  46.73


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=2, max_features=700, class_weight=True)

In [65]:
param, train, test, diff

(650, 73.47, 70.31, 3.16)

In [39]:
model_fold1_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=2, max_features=650, class_weight='balanced', n_estimators=100)
model_fold1_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,2
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,650
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [40]:
# training accuracy
pred_train = model_fold1_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  73.47
wF1 train:  71.16


In [41]:
# test accuracy
pred_test = model_fold1_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold1_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold1_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold1_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  70.31
wF1 test:  65.5
Urban wF1 test:  61.75
Natural wF1 test:  76.6


### Fold 2

In [42]:
fold = 2
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [43]:
train_polygons_raster = r"rome_train_f2.tif"
test_polygons_raster = r"rome_test_f2.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [44]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [45]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold2)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 90
  Train Accuracy: 0.9008%
  Test Accuracy: 0.8915%
  Gap (Train - Test): 0.0093%


In [46]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 489
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [116  11 109  38  37   5  22  32  85  34]


In [47]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 129
Unique Labels: [ 2  5  6  8 11 12 14 17]
Counts: [34 29 10  8  6 20  7 15]


In [48]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [49]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(489, 640) (489,)
(129, 640) (129,)


In [50]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 489
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [116  11 109  38  37   5  22  32  85  34]


In [51]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 129
Unique Labels: [ 2  5  6  8 11 12 14 17]
Counts: [34 29 10  8  6 20  7 15]


In [52]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(489, 740) (489,)
(129, 740) (129,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=120, class_weight=False)

In [82]:
param, train, test, diff

(39, 86.09, 82.95, 3.15)

In [53]:
model_fold2 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=39, n_estimators=100)
model_fold2.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,39
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [54]:
# training accuracy
pred_train = model_fold2.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  86.09
wF1 train:  83.97


In [55]:
# test accuracy
pred_test = model_fold2.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold2.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold2.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold2 
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  82.95
wF1 test:  80.59
Urban wF1 test:  75.12
Natural wF1 test:  92.8


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=5, max_features=550, class_weight=True)

In [88]:
param, train, test, diff

(207, 92.02, 89.92, 2.1)

In [56]:
model_fold2_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=5, max_features=207, class_weight='balanced', n_estimators=100)
model_fold2_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,5
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,207
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [57]:
# training accuracy
pred_train = model_fold2_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  92.02
wF1 train:  91.61


In [58]:
# test accuracy
pred_test = model_fold2_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold2_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold2_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold2_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  89.92
wF1 test:  90.55
Urban wF1 test:  88.1
Natural wF1 test:  97.96


### Fold 3

In [59]:
fold = 3
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [60]:
train_polygons_raster = r"rome_train_f3.tif"
test_polygons_raster = r"rome_test_f3.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [61]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [62]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold3)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 76
  Train Accuracy: 0.8281%
  Test Accuracy: 0.7798%
  Gap (Train - Test): 0.0483%


In [63]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 509
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [112  11 112  37  36   5  27  46  74  49]


In [64]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 109
Unique Labels: [ 2  5  6  8 11 12 14]
Counts: [38 26 11  9  1  6 18]


In [65]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [66]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(509, 640) (509,)
(109, 640) (109,)


In [67]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path=non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 509
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [112  11 112  37  36   5  27  46  74  49]


In [68]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 109
Unique Labels: [ 2  5  6  8 11 12 14]
Counts: [38 26 11  9  1  6 18]


In [69]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(509, 740) (509,)
(109, 740) (109,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utilsfinetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=180, class_weight=False)

In [105]:
param, train, test, diff

(18, 82.32, 78.9, 3.42)

In [70]:
model_fold3 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=18, n_estimators=100)
model_fold3.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,18
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [71]:
# training accuracy
pred_train = model_fold3.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  82.32
wF1 train:  78.37


In [72]:
# test accuracy
pred_test = model_fold3.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold3.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold3.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold3 
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  78.9
wF1 test:  74.67
Urban wF1 test:  69.83
Natural wF1 test:  92.1


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=4, max_features=160, class_weight=True)

In [111]:
param, train, test, diff

(1, 83.69, 78.9, 4.79)

In [73]:
model_fold3_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=1, class_weight='balanced', n_estimators=100)
model_fold3_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [74]:
# training accuracy
pred_train = model_fold3_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  83.69
wF1 train:  83.88


In [75]:
# test accuracy
pred_test = model_fold3_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold3_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold3_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold3_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  78.9
wF1 test:  78.01
Urban wF1 test:  73.5
Natural wF1 test:  94.43


### Fold 4

In [76]:
fold = 4
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [77]:
train_polygons_raster = r"rome_train_f4.tif"
test_polygons_raster = r"rome_test_f4.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [78]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [79]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold4)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 20
  Train Accuracy: 0.9198%
  Test Accuracy: 0.8908%
  Gap (Train - Test): 0.0291%


In [80]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 499
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [124  11 106  40  38   5  17  49  60  49]


In [81]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 119
Unique Labels: [ 2  5  6  8 11 12 14]
Counts: [26 32  8  7 11  3 32]


In [82]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [83]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(499, 640) (499,)
(119, 640) (119,)


In [84]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 499
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [124  11 106  40  38   5  17  49  60  49]


In [85]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 119
Unique Labels: [ 2  5  6  8 11 12 14]
Counts: [26 32  8  7 11  3 32]


In [86]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(499, 740) (499,)
(119, 740) (119,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=4, max_features=180, class_weight=False)

In [128]:
param, train, test, diff

(27, 89.58, 89.08, 0.5)

In [87]:
model_fold4 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=27, n_estimators=100)
model_fold4.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,27
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [88]:
# training accuracy
pred_train = model_fold4.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  89.58
wF1 train:  88.59


In [89]:
# test accuracy
pred_test = model_fold4.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold4.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold4.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold4 
}


results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  89.08
wF1 test:  89.85
Urban wF1 test:  88.96
Natural wF1 test:  91.26


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=400, class_weight=True)

In [136]:
param, train, test, diff

(148, 85.57, 80.67, 4.9)

In [90]:
model_fold4_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=148, class_weight='balanced', n_estimators=100)
model_fold4_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,148
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [91]:
# training accuracy
pred_train = model_fold4_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  85.57
wF1 train:  85.02


In [92]:
# test accuracy
pred_test = model_fold4_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold4_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold4_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold4_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  80.67
wF1 test:  83.61
Urban wF1 test:  80.75
Natural wF1 test:  88.15


## Weighted RF S1 morphometric subset

### Fold 0

In [93]:
if os.path.isfile(r"rome_train_f0.tif"):
    fold = 0
    train_polygons_raster = r'rome_train_f0.tif'
    test_polygons_raster = r'rome_test_f0.tif'
else:
    fold = 0
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"rome_train_f0.tif"
    test_polygons_raster = r"rome_test_f0.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [94]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold0)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 17
  Train Accuracy: 0.7309%
  Test Accuracy: 0.6842%
  Gap (Train - Test): 0.0467%


In [95]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 485
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [125   3 111  40  36   3  24  42  83  18]


In [96]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 133
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [25  8 27  8  9  2  4 10  9 31]


In [97]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [98]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(485, 640) (485,)
(133, 640) (133,)


In [99]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 485
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [125   3 111  40  36   3  24  42  83  18]


In [100]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 133
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [25  8 27  8  9  2  4 10  9 31]


In [101]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(485, 740) (485,)
(133, 740) (133,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=120, class_weight=False)

In [152]:
param, train, test, diff

(7, 68.04, 63.16, 4.88)

In [102]:
ws_model_fold0 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=2, max_features=7, n_estimators=100)
ws_model_fold0.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,2
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,7
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [103]:
# training accuracy
pred_train = ws_model_fold0.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  68.04
wF1 train:  59.83


In [104]:
# test accuracy
pred_test = ws_model_fold0.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold0.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold0.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold0 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  63.16
wF1 test:  54.67
Urban wF1 test:  40.98
Natural wF1 test:  80.42


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=180, class_weight=True)

In [159]:
param, train, test, diff

(68, 81.44, 78.2, 3.25)

In [105]:
ws_model_fold0_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=68, class_weight='balanced', n_estimators=100)
ws_model_fold0_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,68
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [106]:
# training accuracy
pred_train = ws_model_fold0_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  81.44
wF1 train:  81.48


In [107]:
# test accuracy
pred_test = ws_model_fold0_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold0_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold0_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold0_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  78.2
wF1 test:  75.46
Urban wF1 test:  68.24
Natural wF1 test:  90.03


### Fold 1

In [108]:
if os.path.isfile(r"rome_train_f1.tif"):
    fold = 1
    train_polygons_raster = r'rome_train_f1.tif'
    test_polygons_raster = r'rome_test_f1.tif'
else:
    fold = 1
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"rome_train_f1.tif"
    test_polygons_raster = r"rome_test_f1.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [109]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold1)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 10
  Train Accuracy: 0.7214%
  Test Accuracy: 0.6875%
  Gap (Train - Test): 0.0339%


In [110]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 490
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [123   8 114  37  33   2  22  39  66  46]


In [111]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 128
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [27  3 24 11 12  3  6 13 26  3]


In [112]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [113]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(490, 640) (490,)
(128, 640) (128,)


In [114]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 490
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [123   8 114  37  33   2  22  39  66  46]


In [115]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 128
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [27  3 24 11 12  3  6 13 26  3]


In [116]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(490, 740) (490,)
(128, 740) (128,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=15, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=1, max_features=180, class_weight=False)

In [179]:
param, train, test, diff

(45, 100.0, 95.07, 4.93)

In [117]:
ws_model_fold1 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=1, max_features=46, n_estimators=100)
ws_model_fold1.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,1
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,46
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [118]:
# training accuracy
pred_train = ws_model_fold1.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  55.71
wF1 train:  44.46


In [119]:
# test accuracy
pred_test = ws_model_fold1.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold1.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold1.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold1 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  47.66
wF1 test:  38.7
Urban wF1 test:  39.98
Natural wF1 test:  45.14


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=700, class_weight=True)

In [179]:
param, train, test, diff

(451, 71.43, 67.19, 4.24)

In [120]:
ws_model_fold1_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=451, class_weight='balanced', n_estimators=100)
ws_model_fold1_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,451
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [121]:
# training accuracy
pred_train = ws_model_fold1_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  71.43
wF1 train:  70.82


In [122]:
# test accuracy
pred_test = ws_model_fold1_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold1_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold1_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold1_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  67.19
wF1 test:  65.8
Urban wF1 test:  58.29
Natural wF1 test:  83.15


### Fold 2

In [123]:
if os.path.isfile(r"rome_train_f2.tif"):
    fold = 2
    train_polygons_raster = r'rome_train_f2.tif'
    test_polygons_raster = r'rome_test_f2.tif'
else:
    fold = 2
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"rome_train_f2.tif"
    test_polygons_raster = r"rome_test_f2.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [124]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold2)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 90
  Train Accuracy: 0.9008%
  Test Accuracy: 0.8915%
  Gap (Train - Test): 0.0093%


In [125]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 489
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [116  11 109  38  37   5  22  32  85  34]


In [126]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 129
Unique Labels: [ 2  5  6  8 11 12 14 17]
Counts: [34 29 10  8  6 20  7 15]


In [127]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [128]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(489, 640) (489,)
(129, 640) (129,)


In [129]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 489
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [116  11 109  38  37   5  22  32  85  34]


In [130]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 129
Unique Labels: [ 2  5  6  8 11 12 14 17]
Counts: [34 29 10  8  6 20  7 15]


In [131]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(489, 740) (489,)
(129, 740) (129,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=180, class_weight=False)

In [194]:
param, train, test, diff

(39, 84.87, 83.72, 1.15)

In [132]:
ws_model_fold2 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=39, n_estimators=100)
ws_model_fold2.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,39
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [133]:
# training accuracy
pred_train = ws_model_fold2.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  84.87
wF1 train:  82.76


In [134]:
# test accuracy
pred_test = ws_model_fold2.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold2.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold2.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold2 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  83.72
wF1 test:  81.49
Urban wF1 test:  74.8
Natural wF1 test:  96.83


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=6, max_features=690, class_weight=True)

In [None]:
param, train, test, diff

In [135]:
ws_model_fold2_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=6, max_features=512, class_weight='balanced', n_estimators=100)
ws_model_fold2_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,6
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,512
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [136]:
# training accuracy
pred_train = ws_model_fold2_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  94.68
wF1 train:  94.62


In [137]:
# test accuracy
pred_test = ws_model_fold2_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold2_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold2_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold2_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  89.15
wF1 test:  89.67
Urban wF1 test:  85.68
Natural wF1 test:  100.0


### Fold 3

In [138]:
if os.path.isfile(r"rome_train_f3.tif"):
    fold = 3
    train_polygons_raster = r'rome_train_f3.tif'
    test_polygons_raster = r'rome_test_f3.tif'
else:
    fold = 3
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"rome_train_f3.tif"
    test_polygons_raster = r"rome_test_f3.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [139]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold3)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 76
  Train Accuracy: 0.8281%
  Test Accuracy: 0.7798%
  Gap (Train - Test): 0.0483%


In [140]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 509
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [112  11 112  37  36   5  27  46  74  49]


In [141]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 109
Unique Labels: [ 2  5  6  8 11 12 14]
Counts: [38 26 11  9  1  6 18]


In [142]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [143]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(509, 640) (509,)
(109, 640) (109,)


In [144]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 509
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [112  11 112  37  36   5  27  46  74  49]


In [145]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 109
Unique Labels: [ 2  5  6  8 11 12 14]
Counts: [38 26 11  9  1  6 18]


In [146]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(509, 740) (509,)
(109, 740) (109,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=180, class_weight=False)

In [220]:
param, train, test, diff

(168, 82.32, 80.73, 1.58)

In [147]:
ws_model_fold3 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=168, n_estimators=100)
ws_model_fold3.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,168
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [148]:
# training accuracy
pred_train = ws_model_fold3.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  82.32
wF1 train:  78.78


In [149]:
# test accuracy
pred_test = ws_model_fold3.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold3.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold3.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold3 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  80.73
wF1 test:  79.08
Urban wF1 test:  75.7
Natural wF1 test:  92.1


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=4, max_features=180, class_weight=True)

In [226]:
param, train, test, diff

(25, 83.3, 80.73, 2.57)

In [150]:
ws_model_fold3_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=25, class_weight='balanced', n_estimators=100)
ws_model_fold3_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,25
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [151]:
# training accuracy
pred_train = ws_model_fold3_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  83.3
wF1 train:  83.65


In [152]:
# test accuracy
pred_test = ws_model_fold3_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold3_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold3_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold3_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  80.73
wF1 test:  80.52
Urban wF1 test:  77.77
Natural wF1 test:  91.14


### Fold 4

In [153]:
if os.path.isfile(r"rome_train_f4.tif"):
    fold = 4
    train_polygons_raster = r'rome_train_f4.tif'
    test_polygons_raster = r'rome_test_f4.tif'
else:
    fold = 4
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"rome_train_f4.tif"
    test_polygons_raster = r"rome_test_f4.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [154]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold4)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 20
  Train Accuracy: 0.9198%
  Test Accuracy: 0.8908%
  Gap (Train - Test): 0.0291%


In [155]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 499
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [124  11 106  40  38   5  17  49  60  49]


In [156]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 119
Unique Labels: [ 2  5  6  8 11 12 14]
Counts: [26 32  8  7 11  3 32]


In [157]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [158]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(499, 640) (499,)
(119, 640) (119,)


In [159]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 499
Unique Labels: [ 2  3  5  6  8 10 11 12 14 17]
Counts: [124  11 106  40  38   5  17  49  60  49]


In [160]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 119
Unique Labels: [ 2  5  6  8 11 12 14]
Counts: [26 32  8  7 11  3 32]


In [161]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(499, 740) (499,)
(119, 740) (119,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=4, max_features=180, class_weight=False)

In [241]:
param, train, test, diff

(16, 87.78, 87.39, 0.38)

In [162]:
ws_model_fold4 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=16, n_estimators=100)
ws_model_fold4.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,16
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [163]:
# training accuracy
pred_train = ws_model_fold4.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  87.78
wF1 train:  86.75


In [164]:
# test accuracy
pred_test = ws_model_fold4.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold4.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold4.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold4 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  87.39
wF1 test:  88.08
Urban wF1 test:  86.08
Natural wF1 test:  91.26


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=180, class_weight=True)

In [250]:
param, train, test, diff

(55, 85.17, 81.51, 3.66)

In [165]:
ws_model_fold4_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=55, class_weight='balanced', n_estimators=100)
ws_model_fold4_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,55
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [166]:
# training accuracy
pred_train = ws_model_fold4_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  85.17
wF1 train:  84.89


In [167]:
# test accuracy
pred_test = ws_model_fold4_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold4_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold4_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold4_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  81.51
wF1 test:  84.67
Urban wF1 test:  80.23
Natural wF1 test:  91.72


## Evaluation

In [168]:
df = pd.DataFrame.from_dict({
    (i, j, k): results[i][j][k] 
    for i in results.keys() 
    for j in results[i].keys() 
    for k in results[i][j].keys()
}, orient='index')

df.index.names = ["Setup", "Strategy", "Fold"]
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,OA,wF1,F1U,F1N,Model
Setup,Strategy,Fold,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
non_weighted_set,non_weighted_model,0,62.41,54.24,39.95,80.42,"(DecisionTreeClassifier(max_depth=2, max_featu..."
non_weighted_set,non_weighted_model,1,49.22,38.82,39.69,46.73,"(DecisionTreeClassifier(max_depth=1, max_featu..."
non_weighted_set,non_weighted_model,2,82.95,80.59,75.12,92.8,"(DecisionTreeClassifier(max_depth=3, max_featu..."
non_weighted_set,non_weighted_model,3,78.9,74.67,69.83,92.1,"(DecisionTreeClassifier(max_depth=3, max_featu..."
non_weighted_set,non_weighted_model,4,89.08,89.85,88.96,91.26,"(DecisionTreeClassifier(max_depth=4, max_featu..."
non_weighted_set,weighted_model,0,75.19,72.71,62.63,91.24,"(DecisionTreeClassifier(max_depth=2, max_featu..."
non_weighted_set,weighted_model,1,70.31,65.5,61.75,76.6,"(DecisionTreeClassifier(max_depth=2, max_featu..."
non_weighted_set,weighted_model,2,89.92,90.55,88.1,97.96,"(DecisionTreeClassifier(max_depth=5, max_featu..."
non_weighted_set,weighted_model,3,78.9,78.01,73.5,94.43,"(DecisionTreeClassifier(max_depth=4, max_featu..."
non_weighted_set,weighted_model,4,80.67,83.61,80.75,88.15,"(DecisionTreeClassifier(max_depth=3, max_featu..."


In [169]:
# determine best set and best weighting strategy
df_metrics = df.drop(columns=["Model"])
averages = df_metrics.groupby(["Setup", "Strategy"]).mean().round(2)
averages["wF1+F1U"] = averages["wF1"] + averages["F1U"]
averages = averages.sort_values("wF1+F1U", ascending=False)
averages

Unnamed: 0_level_0,Unnamed: 1_level_0,OA,wF1,F1U,F1N,wF1+F1U
Setup,Strategy,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
weighted_set,weighted_model,79.36,79.22,74.04,91.21,153.26
non_weighted_set,weighted_model,79.0,78.08,73.35,89.68,151.43
weighted_set,non_weighted_model,72.53,68.4,63.51,81.15,131.91
non_weighted_set,non_weighted_model,72.51,67.63,62.71,80.66,130.34


In [170]:
best_setup, best_strategy = averages["wF1+F1U"].idxmax()
best_setup, best_strategy

('weighted_set', 'weighted_model')

In [171]:
# show individual model performance of the best  set and best weighting strategy
df_metrics.loc[best_setup, best_strategy]

Unnamed: 0_level_0,OA,wF1,F1U,F1N
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,78.2,75.46,68.24,90.03
1,67.19,65.8,58.29,83.15
2,89.15,89.67,85.68,100.0
3,80.73,80.52,77.77,91.14
4,81.51,84.67,80.23,91.72


In [172]:
# show the average metrics of the best set and best weighting strategy
averages.loc[best_setup, best_strategy]

OA          79.36
wF1         79.22
F1U         74.04
F1N         91.21
wF1+F1U    153.26
Name: (weighted_set, weighted_model), dtype: float64

In [173]:
# get the models
rf_models = df.loc[(best_setup, best_strategy), "Model"].tolist()
rf_models

[RandomForestClassifier(class_weight='balanced', max_depth=3, max_features=68,
                        n_jobs=-1, random_state=0),
 RandomForestClassifier(class_weight='balanced', max_depth=3, max_features=451,
                        n_jobs=-1, random_state=0),
 RandomForestClassifier(class_weight='balanced', max_depth=6, max_features=512,
                        n_jobs=-1, random_state=0),
 RandomForestClassifier(class_weight='balanced', max_depth=4, max_features=25,
                        n_jobs=-1, random_state=0),
 RandomForestClassifier(class_weight='balanced', max_depth=3, max_features=55,
                        n_jobs=-1, random_state=0)]

## Prediction

In [174]:
better_set = weighted_set
rf_models = df.loc[(best_setup, best_strategy), "Model"].tolist()
cnn_models = [cnn_fold0, cnn_fold1, cnn_fold2, cnn_fold3, cnn_fold4]
output = [r'outputs\s4\rome_S4_fold0.tif', r'outputs\s4\rome_S4_fold1.tif', r'outputs\s4\rome_S4_fold2.tif', r'outputs\s4\rome_S4_fold3.tif', r'outputs\s4\rome_S4_fold4.tif']

In [175]:
# whole satellite image to patches
feature_patches = cnn_utils.generate_feature_patches_loader(image_path = image,patch_size = patch_size,stride = stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top)

Total patches loaded: 181196


In [176]:
# for all image patches extract morphometrics
feature_patches_urbanform = cnn_utils.generate_feature_patches_loader(image_path =better_set,patch_size = patch_size,stride = stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top)

Total patches loaded: 181196


In [None]:
for i in range(5):
    train_polygons_raster = fr'rome_train_f{i}.tif'

    # load model
    cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
    cnn_model = cnn_model.cuda()
    trained_model = torch.load(cnn_models[i])
    cnn_model.load_state_dict(trained_model['model_state'])
    print('cnn model loaded')
    
    train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

    mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
    feature_patches_norm = cnn_utils.normalize_loader(feature_patches, mean, std)
    print('image patches normalized')
    
    # extract embeddings
    cnn_model.eval()
    embeddings = list()
    with torch.no_grad():
        for feature in feature_patches_norm:
            feature = feature.cuda()
            embedding = cnn_model.get_embedding_raw_fc(feature)
            embeddings.append(embedding.cpu().numpy())
    embeddings = np.concatenate(embeddings, axis=0)
    print('embeddings extracted')
    
    # extract morphometrics
    urbanform = list()
    for feature in feature_patches_urbanform:
        urbanform.append(feature.cpu().numpy())
    urbanform = np.concatenate(urbanform, axis=0)
    print('morphometrics extracted')
    
    # aggregate morphometrics
    mean_urbanform = urbanform.mean(axis=(2,3))
    min_urbanform = urbanform.min(axis=(2,3))
    max_urbanform = urbanform.max(axis=(2,3))
    std_urbanform = urbanform.std(axis=(2,3))
    med_urbanform = np.median(urbanform, axis=(2, 3))
    print('morphometrics aggregated')
    
    # merge
    all_features = np.hstack((embeddings,mean_urbanform,min_urbanform,max_urbanform,std_urbanform,med_urbanform))

    # prediction
    rf_model = rf_models[i]
    prediction = rf_model.predict(all_features)
    print('prediction done')

    offset_left_calc, offset_top_calc = cnn_utils.calculate_optimal_offsets(image, patch_size, stride)

    output_path = output[i]
    output_path = output_path.replace(".tif", "_temp.tif")
    cnn_utils.lcz_map(offset_left_calc, offset_top_calc, image, prediction, output_path)

## Per pixel validation

In [177]:
# provide test polygons raster path
test_polygons_path = ['rome_test_f0.tif','rome_test_f1.tif','rome_test_f2.tif','rome_test_f3.tif','rome_test_f4.tif']

In [265]:
# resample lcz map to 100m
for f in output:
    out = f
    temp_f = f.replace(".tif", "_temp.tif")
    utils.resample_lcz_map(temp_f, out)
    # if os.path.exists(temp_f):
    #     try:
    #         os.remove(temp_f)
    #     except:
    #         pass

 saved to s4_outputs\rome_S4_fold0.tif
 saved to s4_outputs\rome_S4_fold1.tif
 saved to s4_outputs\rome_S4_fold2.tif
 saved to s4_outputs\rome_S4_fold3.tif
 saved to s4_outputs\rome_S4_fold4.tif


In [178]:
metrics, confusion_matrices = utils.perpixel_validation(output, test_polygons_path, splited_ref_data)

In [179]:
df_perpixel = pd.DataFrame(metrics)
df_perpixel = df_perpixel.set_index("Fold")
df_perpixel

Unnamed: 0_level_0,OA,wF1,wF1_Urban,wF1_Natural,F1_Class_1,F1_Class_2,F1_Class_3,F1_Class_4,F1_Class_5,F1_Class_6,...,F1_Class_8,F1_Class_9,F1_Class_10,F1_Class_11,F1_Class_12,F1_Class_13,F1_Class_14,F1_Class_15,F1_Class_16,F1_Class_17
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,74.6,73.29,64.79,90.53,,70.66,20.93,,63.11,64.83,...,66.94,,75.68,91.76,45.33,,99.03,,,100.0
1,63.31,62.05,51.34,85.84,,39.57,20.2,,52.96,50.22,...,92.06,,0.0,97.37,60.39,,87.63,,,100.0
2,84.51,85.52,82.22,93.62,,92.46,,,73.44,64.35,...,91.67,,,95.65,90.0,,72.04,,,100.0
3,76.67,76.84,72.92,90.44,,84.17,,,58.46,54.73,...,87.68,,,40.0,76.52,,98.68,,,
4,78.89,81.7,75.34,91.54,,66.96,,,78.45,71.05,...,97.1,,,92.36,35.29,,95.29,,,


In [180]:
df_perpixel_mean = df_perpixel.mean().round(2)
df_perpixel_mean

OA              75.60
wF1             75.88
wF1_Urban       69.32
wF1_Natural     90.39
F1_Class_1        NaN
F1_Class_2      70.76
F1_Class_3      20.56
F1_Class_4        NaN
F1_Class_5      65.28
F1_Class_6      61.04
F1_Class_7        NaN
F1_Class_8      87.09
F1_Class_9        NaN
F1_Class_10     37.84
F1_Class_11     83.43
F1_Class_12     61.51
F1_Class_13       NaN
F1_Class_14     90.53
F1_Class_15       NaN
F1_Class_16       NaN
F1_Class_17    100.00
dtype: float64

In [181]:
# export all results to csv
df_perpixel.to_csv(r"results\s4\rome_S4_results.csv")

In [182]:
# export confusion matrices
with open(r"results\s4\rome_S4_confusion_matrices.pkl", "wb") as f:
    pickle.dump(confusion_matrices, f)

# Sao Paulo

In [3]:
# load splited reference data
splited_ref_data = gpd.read_file(r'ref_data\saopaulo_ref_splitS2S3S4.gpkg')

In [4]:
# load satellite image (10 m resolution)
image = r'imagery\sao_paulo_20170726.tif'

In [5]:
# load rasterized subset of morphometrics from non-weighted RF S1 models and weighted RF S1 models (10 m resolution)
non_weighted_set = r'rasterized_morphometrics\saopaulo_rasterized_morphometrics_fold4.tif'
weighted_set = r'rasterized_morphometrics\saopaulo_rasterized_morphometrics_fold1_weighted.tif'

In [6]:
# load trained S2 CNN models for each fold
cnn_fold0 = r's2_cnn_models\saopaulo_S2_fold0_epoch67.pth'
cnn_fold1 = r's2_cnn_models\saopaulo_S2_fold1_epoch84.pth'
cnn_fold2 = r's2_cnn_models\saopaulo_S2_fold2_epoch67.pth'
cnn_fold3 = r's2_cnn_models\saopaulo_S2_fold3_epoch78.pth'
cnn_fold4 = r's2_cnn_models\saopaulo_S2_fold4_epoch37.pth'

In [7]:
# recording results
setups = ["non_weighted_set", "weighted_set"]
strategies = ["non_weighted_model", "weighted_model"]
folds = [0, 1, 2, 3, 4]

# results[setup][strategy][fold]
results = {
    setup: {
        strat: {
            fold: {} for fold in folds
        } for strat in strategies
    } for setup in setups
}

## Non-weighted RF S1 morphometric subset

### Fold 0

In [8]:
fold = 0
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [9]:
train_polygons_raster = r"saopaulo_train_f0.tif"
test_polygons_raster = r"saopaulo_test_f0.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [10]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [11]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold0)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 67
  Train Accuracy: 0.8979%
  Test Accuracy: 0.8595%
  Gap (Train - Test): 0.0384%


In [12]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1787
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 71  12 422  43  19 135 148  17   2 588  23  31   9  10 257]


In [13]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 370
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 20   2 103   6   4  38  40  20  13  26   3   5   1   3  86]


In [14]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [15]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1787, 640) (1787,)
(370, 640) (370,)


In [16]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path=non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1787
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 71  12 422  43  19 135 148  17   2 588  23  31   9  10 257]


In [17]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path=non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 370
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 20   2 103   6   4  38  40  20  13  26   3   5   1   3  86]


In [18]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1787, 740) (1787,)
(370, 740) (370,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=4, max_features=180, class_weight=False)

In [None]:
param, train, test, diff

In [19]:
model_fold0 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=5, n_estimators=100)
model_fold0.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,5
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [20]:
# training accuracy
pred_train = model_fold0.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  90.65
wF1 train:  88.19


In [21]:
# test accuracy
pred_test = model_fold0.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold0.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold0.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold0
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  83.51
wF1 test:  78.42
Urban wF1 test:  73.07
Natural wF1 test:  91.79


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=180, class_weight=True)

In [38]:
param, train, test, diff

(41, 85.28, 81.89, 3.39)

In [22]:
model_fold0_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=2, max_features=41, class_weight='balanced', n_estimators=100)
model_fold0_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,2
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,41
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [23]:
# training accuracy
pred_train = model_fold0_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  85.28
wF1 train:  86.85


In [24]:
# test accuracy
pred_test = model_fold0_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold0_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold0_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold0_weighted
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  81.89
wF1 test:  82.22
Urban wF1 test:  80.05
Natural wF1 test:  89.85


### Fold 1

In [25]:
fold = 1
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [26]:
train_polygons_raster = r"saopaulo_train_f1.tif"
test_polygons_raster = r"saopaulo_test_f1.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [27]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [28]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold1)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 84
  Train Accuracy: 0.9419%
  Test Accuracy: 0.8966%
  Gap (Train - Test): 0.0453%


In [29]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1799
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 70  11 387  39  21 140 147  30  13 587  17  32   9  12 284]


In [30]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 358
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 21   3 138  10   2  33  41   7   2  27   9   4   1   1  59]


In [31]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [32]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1799, 640) (1799,)
(358, 640) (358,)


In [33]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1799
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 70  11 387  39  21 140 147  30  13 587  17  32   9  12 284]


In [34]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 358
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 21   3 138  10   2  33  41   7   2  27   9   4   1   1  59]


In [35]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1799, 740) (1799,)
(358, 740) (358,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=15, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=7, max_features=120, class_weight=False)

In [57]:
param, train, test, diff

(70, 92.5, 87.71, 4.79)

In [36]:
model_fold1 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=6, max_features=70, n_estimators=100)
model_fold1.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,6
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,70
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [37]:
# training accuracy
pred_train = model_fold1.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  92.5
wF1 train:  90.94


In [38]:
# test accuracy
pred_test = model_fold1.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold1.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold1.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold1 
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  87.71
wF1 test:  84.92
Urban wF1 test:  85.86
Natural wF1 test:  86.43


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=6, max_features=120, class_weight=True)

In [64]:
param, train, test, diff

(14, 94.94, 91.06, 3.88)

In [39]:
model_fold1_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=6, max_features=14, class_weight='balanced', n_estimators=100)
model_fold1_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,6
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,14
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [40]:
# training accuracy
pred_train = model_fold1_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  94.94
wF1 train:  95.3


In [41]:
# test accuracy
pred_test = model_fold1_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold1_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold1_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold1_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  91.06
wF1 test:  91.14
Urban wF1 test:  90.89
Natural wF1 test:  93.43


### Fold 2

In [42]:
fold = 2
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [43]:
train_polygons_raster = r"saopaulo_train_f2.tif"
test_polygons_raster = r"saopaulo_test_f2.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [44]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [45]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold2)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 67
  Train Accuracy: 0.9154%
  Test Accuracy: 0.8834%
  Gap (Train - Test): 0.0320%


In [46]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1548
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 74  11 426  38  11 137 145  27  15 331  20  27  10  11 265]


In [47]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 609
Unique Labels: [ 1  2  3  4  5  6  8  9 11 12 14 16 17]
Counts: [ 17   3  99  11  12  36  43  10 283   6   9   2  78]


In [48]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [49]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1548, 640) (1548,)
(609, 640) (609,)


In [50]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1548
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 74  11 426  38  11 137 145  27  15 331  20  27  10  11 265]


In [51]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 609
Unique Labels: [ 1  2  3  4  5  6  8  9 11 12 14 16 17]
Counts: [ 17   3  99  11  12  36  43  10 283   6   9   2  78]


In [52]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1548, 740) (1548,)
(609, 740) (609,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=6, max_features=120, class_weight=False)

In [81]:
param, train, test, diff

(29, 92.38, 88.51, 3.87)

In [53]:
model_fold2 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=6, max_features=29, n_estimators=100)
model_fold2.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,6
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,29
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [54]:
# training accuracy
pred_train = model_fold2.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  92.38
wF1 train:  90.9


In [55]:
# test accuracy
pred_test = model_fold2.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold2.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold2.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold2 
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  88.51
wF1 test:  86.81
Urban wF1 test:  80.82
Natural wF1 test:  91.48


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=4, max_features=180, class_weight=True)

In [89]:
param, train, test, diff

(59, 90.83, 87.52, 3.31)

In [56]:
model_fold2_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=59, class_weight='balanced', n_estimators=100)
model_fold2_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,59
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [57]:
# training accuracy
pred_train = model_fold2_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  90.83
wF1 train:  91.46


In [58]:
# test accuracy
pred_test = model_fold2_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold2_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold2_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold2_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  87.52
wF1 test:  87.59
Urban wF1 test:  80.19
Natural wF1 test:  92.49


### Fold 3

In [59]:
fold = 3
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [60]:
train_polygons_raster = r"saopaulo_train_f3.tif"
test_polygons_raster = r"saopaulo_test_f3.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [61]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [62]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold3)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 78
  Train Accuracy: 0.9262%
  Test Accuracy: 0.8830%
  Gap (Train - Test): 0.0431%


In [63]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1815
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 77  11 434  42  20 140 155  37  15 543  22  25   9   9 276]


In [64]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 342
Unique Labels: [ 1  2  3  4  5  6  8 11 12 14 15 16 17]
Counts: [14  3 91  7  3 33 33 71  4 11  1  4 67]


In [65]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [66]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1815, 640) (1815,)
(342, 640) (342,)


In [67]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path=non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1815
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 77  11 434  42  20 140 155  37  15 543  22  25   9   9 276]


In [68]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 342
Unique Labels: [ 1  2  3  4  5  6  8 11 12 14 15 16 17]
Counts: [14  3 91  7  3 33 33 71  4 11  1  4 67]


In [69]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1815, 740) (1815,)
(342, 740) (342,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=6, max_features=120, class_weight=False)

In [106]:
param, train, test, diff

(13, 93.99, 89.77, 4.23)

In [70]:
model_fold3 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=6, max_features=13, n_estimators=100)
model_fold3.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,6
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,13
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [71]:
# training accuracy
pred_train = model_fold3.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  93.99
wF1 train:  92.71


In [72]:
# test accuracy
pred_test = model_fold3.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold3.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold3.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold3 
}

results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  89.77
wF1 test:  87.96
Urban wF1 test:  84.38
Natural wF1 test:  93.66


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=3, max_features=160, class_weight=True)

In [112]:
param, train, test, diff

(145, 82.53, 78.95, 3.59)

In [73]:
model_fold3_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=3, max_features=145, class_weight='balanced', n_estimators=100)
model_fold3_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,3
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,145
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [74]:
# training accuracy
pred_train = model_fold3_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  82.53
wF1 train:  81.22


In [75]:
# test accuracy
pred_test = model_fold3_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold3_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold3_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold3_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  78.95
wF1 test:  79.0
Urban wF1 test:  68.05
Natural wF1 test:  92.74


### Fold 4

In [76]:
fold = 4
test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

In [77]:
train_polygons_raster = r"saopaulo_train_f4.tif"
test_polygons_raster = r"saopaulo_test_f4.tif"

# rasterize
train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
utils.rasterize_reference_polygons(train_polygons, image, train_temp)
utils.rasterize_reference_polygons(test_polygons, image, test_temp)

In [78]:
# train and test images matched to 10m image
train_image_matched = utils.match_rasters(train_temp, image)
test_image_matched = utils.match_rasters(test_temp, image)

# save
train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")

train_image_matched.close()
test_image_matched.close()
train_image_matched = None
test_image_matched = None
gc.collect()
if os.path.exists(train_temp):
    os.remove(train_temp)
if os.path.exists(test_temp):
    os.remove(test_temp)

In [79]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold4)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 37
  Train Accuracy: 0.9303%
  Test Accuracy: 0.9184%
  Gap (Train - Test): 0.0119%


In [80]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1679
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 72  11 431  34  21 140 157  37  15 407  22  29   3  10 290]


In [81]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 478
Unique Labels: [ 1  2  3  4  5  6  8 11 12 14 15 16 17]
Counts: [ 19   3  94  15   2  33  31 207   4   7   7   3  53]


In [82]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [83]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1679, 640) (1679,)
(478, 640) (478,)


In [84]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1679
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 72  11 431  34  21 140 157  37  15 407  22  29   3  10 290]


In [85]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path =non_weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 478
Unique Labels: [ 1  2  3  4  5  6  8 11 12 14 15 16 17]
Counts: [ 19   3  94  15   2  33  31 207   4   7   7   3  53]


In [86]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1679, 740) (1679,)
(478, 740) (478,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=7, max_features=180, class_weight=False)

In [129]:
param, train, test, diff

(2, 94.58, 92.26, 2.32)

In [87]:
model_fold4 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=7, max_features=2, n_estimators=100)
model_fold4.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,7
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,2
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [88]:
# training accuracy
pred_train = model_fold4.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  94.58
wF1 train:  93.4


In [89]:
# test accuracy
pred_test = model_fold4.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold4.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold4.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold4 
}


results["non_weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  92.26
wF1 test:  90.62
Urban wF1 test:  90.24
Natural wF1 test:  92.69


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=6, max_features=180, class_weight=True)

In [137]:
param, train, test, diff

(2, 94.94, 89.96, 4.98)

In [90]:
model_fold4_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=7, max_features=1, class_weight='balanced', n_estimators=100)
model_fold4_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,7
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [91]:
# training accuracy
pred_train = model_fold4_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  96.78
wF1 train:  96.82


In [92]:
# test accuracy
pred_test = model_fold4_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = model_fold4_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = model_fold4_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": model_fold4_weighted 
}

results["non_weighted_set"]["weighted_model"][fold] = metrics

OA test:  91.63
wF1 test:  91.58
Urban wF1 test:  90.91
Natural wF1 test:  92.78


## Weighted RF S1 morphometric subset

### Fold 0

In [93]:
if os.path.isfile(r"saopaulo_train_f0.tif"):
    fold = 0
    train_polygons_raster = r'saopaulo_train_f0.tif'
    test_polygons_raster = r'saopaulo_test_f0.tif'
else:
    fold = 0
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"saopaulo_train_f0.tif"
    test_polygons_raster = r"saopaulo_test_f0.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [94]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold0)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 67
  Train Accuracy: 0.8979%
  Test Accuracy: 0.8595%
  Gap (Train - Test): 0.0384%


In [95]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1787
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 71  12 422  43  19 135 148  17   2 588  23  31   9  10 257]


In [96]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 370
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 20   2 103   6   4  38  40  20  13  26   3   5   1   3  86]


In [97]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [98]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1787, 640) (1787,)
(370, 640) (370,)


In [99]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1787
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 71  12 422  43  19 135 148  17   2 588  23  31   9  10 257]


In [100]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 370
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 20   2 103   6   4  38  40  20  13  26   3   5   1   3  86]


In [101]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1787, 740) (1787,)
(370, 740) (370,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=5, max_features=180, class_weight=False)

In [None]:
param, train, test, diff

In [102]:
ws_model_fold0 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=5, max_features=48, n_estimators=100)
ws_model_fold0.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,5
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,48
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [103]:
# training accuracy
pred_train = ws_model_fold0.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  92.05
wF1 train:  90.06


In [104]:
# test accuracy
pred_test = ws_model_fold0.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold0.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold0.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold0 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  84.59
wF1 test:  79.46
Urban wF1 test:  74.24
Natural wF1 test:  92.51


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=2, max_features=180, class_weight=True)

In [161]:
param, train, test, diff

(7, 86.68, 82.7, 3.98)

In [105]:
ws_model_fold0_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=2, max_features=7, class_weight='balanced', n_estimators=100)
ws_model_fold0_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,2
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,7
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [106]:
# training accuracy
pred_train = ws_model_fold0_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  86.68
wF1 train:  87.86


In [107]:
# test accuracy
pred_test = ws_model_fold0_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold0_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold0_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold0_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  82.7
wF1 test:  83.5
Urban wF1 test:  79.66
Natural wF1 test:  92.8


### Fold 1

In [108]:
if os.path.isfile(r"saopaulo_train_f1.tif"):
    fold = 1
    train_polygons_raster = r'saopaulo_train_f1.tif'
    test_polygons_raster = r'saopaulo_test_f1.tif'
else:
    fold = 1
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"saopaulo_train_f1.tif"
    test_polygons_raster = r"saopaulo_test_f1.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [109]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold1)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 84
  Train Accuracy: 0.9419%
  Test Accuracy: 0.8966%
  Gap (Train - Test): 0.0453%


In [110]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1799
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 70  11 387  39  21 140 147  30  13 587  17  32   9  12 284]


In [111]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 358
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 21   3 138  10   2  33  41   7   2  27   9   4   1   1  59]


In [112]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [113]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1799, 640) (1799,)
(358, 640) (358,)


In [114]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1799
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 70  11 387  39  21 140 147  30  13 587  17  32   9  12 284]


In [115]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 358
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 21   3 138  10   2  33  41   7   2  27   9   4   1   1  59]


In [116]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1799, 740) (1799,)
(358, 740) (358,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=15, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=6, max_features=180, class_weight=False)

In [176]:
param, train, test, diff

(9, 93.16, 88.83, 4.34)

In [117]:
ws_model_fold1 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=6, max_features=9, n_estimators=100)
ws_model_fold1.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,6
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,9
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [118]:
# training accuracy
pred_train = ws_model_fold1.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  93.16
wF1 train:  91.66


In [119]:
# test accuracy
pred_test = ws_model_fold1.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold1.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold1.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold1 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  88.83
wF1 test:  85.91
Urban wF1 test:  85.73
Natural wF1 test:  89.4


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=5, max_features=180, class_weight=True)

In [182]:
param, train, test, diff

(29, 93.77, 91.34, 2.43)

In [120]:
ws_model_fold1_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=5, max_features=29, class_weight='balanced', n_estimators=100)
ws_model_fold1_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,5
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,29
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [121]:
# training accuracy
pred_train = ws_model_fold1_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  93.77
wF1 train:  94.14


In [122]:
# test accuracy
pred_test = ws_model_fold1_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold1_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold1_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold1_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  91.34
wF1 test:  90.82
Urban wF1 test:  90.55
Natural wF1 test:  93.24


### Fold 2

In [123]:
if os.path.isfile(r"saopaulo_train_f2.tif"):
    fold = 2
    train_polygons_raster = r'saopaulo_train_f2.tif'
    test_polygons_raster = r'saopaulo_test_f2.tif'
else:
    fold = 2
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"saopaulo_train_f2.tif"
    test_polygons_raster = r"saopaulo_test_f2.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [124]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold2)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 67
  Train Accuracy: 0.9154%
  Test Accuracy: 0.8834%
  Gap (Train - Test): 0.0320%


In [125]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1548
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 74  11 426  38  11 137 145  27  15 331  20  27  10  11 265]


In [126]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 609
Unique Labels: [ 1  2  3  4  5  6  8  9 11 12 14 16 17]
Counts: [ 17   3  99  11  12  36  43  10 283   6   9   2  78]


In [127]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [128]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1548, 640) (1548,)
(609, 640) (609,)


In [129]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1548
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 74  11 426  38  11 137 145  27  15 331  20  27  10  11 265]


In [130]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 609
Unique Labels: [ 1  2  3  4  5  6  8  9 11 12 14 16 17]
Counts: [ 17   3  99  11  12  36  43  10 283   6   9   2  78]


In [131]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1548, 740) (1548,)
(609, 740) (609,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=6, max_features=180, class_weight=False)

In [198]:
param, train, test, diff

(12, 92.18, 88.34, 3.84)

In [132]:
ws_model_fold2 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=6, max_features=12, n_estimators=100)
ws_model_fold2.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,6
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,12
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [133]:
# training accuracy
pred_train = ws_model_fold2.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  92.18
wF1 train:  90.59


In [134]:
# test accuracy
pred_test = ws_model_fold2.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold2.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold2.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold2 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  88.34
wF1 test:  86.5
Urban wF1 test:  79.87
Natural wF1 test:  91.65


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=4, max_features=180, class_weight=True)

In [204]:
param, train, test, diff

(122, 92.25, 88.01, 4.23)

In [135]:
ws_model_fold2_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=4, max_features=122, class_weight='balanced', n_estimators=100)
ws_model_fold2_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,122
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [136]:
# training accuracy
pred_train = ws_model_fold2_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  92.25
wF1 train:  93.0


In [137]:
# test accuracy
pred_test = ws_model_fold2_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold2_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold2_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold2_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  88.01
wF1 test:  88.4
Urban wF1 test:  82.19
Natural wF1 test:  92.54


### Fold 3

In [138]:
if os.path.isfile(r"saopaulo_train_f3.tif"):
    fold = 3
    train_polygons_raster = r'saopaulo_train_f3.tif'
    test_polygons_raster = r'saopaulo_test_f3.tif'
else:
    fold = 3
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"saopaulo_train_f3.tif"
    test_polygons_raster = r"saopaulo_test_f3.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [139]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold3)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 78
  Train Accuracy: 0.9262%
  Test Accuracy: 0.8830%
  Gap (Train - Test): 0.0431%


In [140]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1815
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 77  11 434  42  20 140 155  37  15 543  22  25   9   9 276]


In [141]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 342
Unique Labels: [ 1  2  3  4  5  6  8 11 12 14 15 16 17]
Counts: [14  3 91  7  3 33 33 71  4 11  1  4 67]


In [142]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [143]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1815, 640) (1815,)
(342, 640) (342,)


In [144]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1815
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 77  11 434  42  20 140 155  37  15 543  22  25   9   9 276]


In [145]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 342
Unique Labels: [ 1  2  3  4  5  6  8 11 12 14 15 16 17]
Counts: [14  3 91  7  3 33 33 71  4 11  1  4 67]


In [146]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1815, 740) (1815,)
(342, 740) (342,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=6, max_features=180, class_weight=False)

In [219]:
param, train, test, diff

(21, 94.27, 89.77, 4.5)

In [147]:
ws_model_fold3 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=6, max_features=21, n_estimators=100)
ws_model_fold3.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,6
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,21
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [148]:
# training accuracy
pred_train = ws_model_fold3.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  94.27
wF1 train:  93.08


In [149]:
# test accuracy
pred_test = ws_model_fold3.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold3.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold3.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold3 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  89.77
wF1 test:  88.08
Urban wF1 test:  84.63
Natural wF1 test:  93.66


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=2, max_features=180, class_weight=True)

In [228]:
param, train, test, diff

(93, 83.42, 78.65, 4.76)

In [150]:
ws_model_fold3_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=2, max_features=93, class_weight='balanced', n_estimators=100)
ws_model_fold3_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,2
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,93
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [151]:
# training accuracy
pred_train = ws_model_fold3_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  83.42
wF1 train:  82.51


In [152]:
# test accuracy
pred_test = ws_model_fold3_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold3_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold3_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold3_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  78.65
wF1 test:  77.49
Urban wF1 test:  65.36
Natural wF1 test:  92.11


### Fold 4

In [153]:
if os.path.isfile(r"saopaulo_train_f4.tif"):
    fold = 4
    train_polygons_raster = r'saopaulo_train_f4.tif'
    test_polygons_raster = r'saopaulo_test_f4.tif'
else:
    fold = 4
    test_polygons = splited_ref_data[splited_ref_data["fold"] == fold]
    train_polygons = splited_ref_data[splited_ref_data["fold"] != fold]

    train_polygons_raster = r"saopaulo_train_f4.tif"
    test_polygons_raster = r"saopaulo_test_f4.tif"

    # rasterize
    train_temp = train_polygons_raster.replace(".tif", "_temp.tif")
    test_temp = test_polygons_raster.replace(".tif", "_temp.tif")
    utils.rasterize_reference_polygons(train_polygons, image, train_temp)
    utils.rasterize_reference_polygons(test_polygons, image, test_temp)

    # train and test images matched to 10m image
    train_image_matched = utils.match_rasters(train_temp, image)
    test_image_matched = utils.match_rasters(test_temp, image)
    
    # save
    train_image_matched.rio.to_raster(train_polygons_raster, driver="GTiff", compress="LZW")
    test_image_matched.rio.to_raster(test_polygons_raster, driver="GTiff", compress="LZW")
    
    train_image_matched.close()
    test_image_matched.close()
    train_image_matched = None
    test_image_matched = None
    gc.collect()
    if os.path.exists(train_temp):
        os.remove(train_temp)
    if os.path.exists(test_temp):
        os.remove(test_temp)

In [154]:
# load model
cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
cnn_model = cnn_model.cuda()

trained_model = torch.load(cnn_fold4)
cnn_model.load_state_dict(trained_model['model_state'])

epoch_num = trained_model['epoch_num']
train_accuracy = trained_model['train_accuracy']
test_accuracy = trained_model['test_accuracy']
gap = trained_model['gap']

# Optionally, print the stats
print(f"Loaded Model from Epoch {epoch_num}")
print(f"  Train Accuracy: {train_accuracy:.4f}%")
print(f"  Test Accuracy: {test_accuracy:.4f}%")
print(f"  Gap (Train - Test): {gap:.4f}%")

Loaded Model from Epoch 37
  Train Accuracy: 0.9303%
  Test Accuracy: 0.9184%
  Gap (Train - Test): 0.0119%


In [155]:
train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1679
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 72  11 431  34  21 140 157  37  15 407  22  29   3  10 290]


In [156]:
test_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 478
Unique Labels: [ 1  2  3  4  5  6  8 11 12 14 15 16 17]
Counts: [ 19   3  94  15   2  33  31 207   4   7   7   3  53]


In [157]:
# norm
mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
train_patches_embeddings_norm = cnn_utils.normalize_loader(train_patches_embeddings, mean, std)
test_patches_embeddings_norm = cnn_utils.normalize_loader(test_patches_embeddings, mean, std)

In [158]:
# extract embedding vectors
train_embeddings, y_train, test_embeddings, y_test = cnn_utils.extract_embeddings(train_patches_embeddings_norm, test_patches_embeddings_norm, cnn_model)
print(train_embeddings.shape, y_train.shape)
print(test_embeddings.shape, y_test.shape)

(1679, 640) (1679,)
(478, 640) (478,)


In [159]:
# patches of rasterized morphometrics
train_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 1679
Unique Labels: [ 1  2  3  4  5  6  8  9 10 11 12 14 15 16 17]
Counts: [ 72  11 431  34  21 140 157  37  15 407  22  29   3  10 290]


In [160]:
test_patches_urbanform = cnn_utils.generate_labeled_patches_loader(image_path = weighted_set,reference_path = test_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

Total ground truth patches generated: 478
Unique Labels: [ 1  2  3  4  5  6  8 11 12 14 15 16 17]
Counts: [ 19   3  94  15   2  33  31 207   4   7   7   3  53]


In [161]:
X_train, X_test = cnn_utils.aggregate_morphometrics(train_patches_urbanform, test_patches_urbanform, train_embeddings, test_embeddings)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1679, 740) (1679,)
(478, 740) (478,)


#### Non-weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=False)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=8, max_features=180, class_weight=False)

In [243]:
param, train, test, diff

(2, 96.31, 91.84, 4.47)

In [162]:
ws_model_fold4 = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=8, max_features=2, n_estimators=100)
ws_model_fold4.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,8
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,2
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [163]:
# training accuracy
pred_train = ws_model_fold4.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  96.31
wF1 train:  95.67


In [164]:
# test accuracy
pred_test = ws_model_fold4.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold4.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold4.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold4 
}

results["weighted_set"]["non_weighted_model"][fold] = metrics

OA test:  91.84
wF1 test:  90.46
Urban wF1 test:  90.35
Natural wF1 test:  92.11


#### Weighted RF model

In [None]:
utils.finetune_height(X_train, y_train, X_test, y_test, max_height=10, class_weight=True)

In [None]:
param, train, test, diff = utils.finetune_max_features(X_train, y_train, X_test, y_test, height=6, max_features=180, class_weight=True)

In [249]:
param, train, test, diff

(23, 95.0, 90.59, 4.41)

In [165]:
ws_model_fold4_weighted = RandomForestClassifier(random_state=0, n_jobs=-1, max_depth=6, max_features=23, class_weight='balanced', n_estimators=100)
ws_model_fold4_weighted.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,6
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,23
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [166]:
# training accuracy
pred_train = ws_model_fold4_weighted.predict(X_train)
train_accuracy = accuracy_score(y_train, pred_train)
print('OA train: ', round(train_accuracy*100,2))
train_wf1 = f1_score(y_train, pred_train, average='weighted', labels=np.unique(y_train))
print('wF1 train: ', round(train_wf1*100,2))

OA train:  95.0
wF1 train:  95.36


In [167]:
# test accuracy
pred_test = ws_model_fold4_weighted.predict(X_test)
test_accuracy = accuracy_score(y_test, pred_test)
print('OA test: ', round(test_accuracy*100,2))
test_wf1 = f1_score(y_test, pred_test, average='weighted', labels=np.unique(y_test))
print('wF1 test: ', round(test_wf1*100,2))

# F1U
X_test_urban = X_test[y_test<=10]
y_test_urban = y_test[y_test<=10]
y_test_pred_urb = ws_model_fold4_weighted.predict(X_test_urban)
wf1_urb = f1_score(y_test_urban, y_test_pred_urb, average='weighted', labels=np.unique(y_test[y_test<=10]))
print('Urban wF1 test: ', round(wf1_urb*100,2))

# F1N
X_test_nat = X_test[y_test>10]
y_test_nat = y_test[y_test>10]
y_test_pred_nat = ws_model_fold4_weighted.predict(X_test_nat)
wf1_nat = f1_score(y_test_nat, y_test_pred_nat, average='weighted', labels=np.unique(y_test[y_test>10]))
print('Natural wF1 test: ', round(wf1_nat*100,2))

metrics = {
    "OA":  round(test_accuracy*100,2),
    "wF1": round(test_wf1*100,2),
    "F1U": round(wf1_urb*100,2),
    "F1N": round(wf1_nat*100,2),
    "Model": ws_model_fold4_weighted 
}

results["weighted_set"]["weighted_model"][fold] = metrics

OA test:  90.59
wF1 test:  92.32
Urban wF1 test:  91.64
Natural wF1 test:  93.09


## Evaluation

In [168]:
df = pd.DataFrame.from_dict({
    (i, j, k): results[i][j][k] 
    for i in results.keys() 
    for j in results[i].keys() 
    for k in results[i][j].keys()
}, orient='index')

df.index.names = ["Setup", "Strategy", "Fold"]
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,OA,wF1,F1U,F1N,Model
Setup,Strategy,Fold,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
non_weighted_set,non_weighted_model,0,83.51,78.42,73.07,91.79,"(DecisionTreeClassifier(max_depth=4, max_featu..."
non_weighted_set,non_weighted_model,1,87.71,84.92,85.86,86.43,"(DecisionTreeClassifier(max_depth=6, max_featu..."
non_weighted_set,non_weighted_model,2,88.51,86.81,80.82,91.48,"(DecisionTreeClassifier(max_depth=6, max_featu..."
non_weighted_set,non_weighted_model,3,89.77,87.96,84.38,93.66,"(DecisionTreeClassifier(max_depth=6, max_featu..."
non_weighted_set,non_weighted_model,4,92.26,90.62,90.24,92.69,"(DecisionTreeClassifier(max_depth=7, max_featu..."
non_weighted_set,weighted_model,0,81.89,82.22,80.05,89.85,"(DecisionTreeClassifier(max_depth=2, max_featu..."
non_weighted_set,weighted_model,1,91.06,91.14,90.89,93.43,"(DecisionTreeClassifier(max_depth=6, max_featu..."
non_weighted_set,weighted_model,2,87.52,87.59,80.19,92.49,"(DecisionTreeClassifier(max_depth=4, max_featu..."
non_weighted_set,weighted_model,3,78.95,79.0,68.05,92.74,"(DecisionTreeClassifier(max_depth=3, max_featu..."
non_weighted_set,weighted_model,4,91.63,91.58,90.91,92.78,"(DecisionTreeClassifier(max_depth=7, max_featu..."


In [169]:
# determine best set and best weighting strategy
df_metrics = df.drop(columns=["Model"])
averages = df_metrics.groupby(["Setup", "Strategy"]).mean().round(2)
averages["wF1+F1U"] = averages["wF1"] + averages["F1U"]
averages = averages.sort_values("wF1+F1U", ascending=False)
averages

Unnamed: 0_level_0,Unnamed: 1_level_0,OA,wF1,F1U,F1N,wF1+F1U
Setup,Strategy,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
weighted_set,non_weighted_model,88.67,86.08,82.96,91.87,169.04
non_weighted_set,non_weighted_model,88.35,85.75,82.87,91.21,168.62
weighted_set,weighted_model,86.26,86.51,81.88,92.76,168.39
non_weighted_set,weighted_model,86.21,86.31,82.02,92.26,168.33


In [170]:
best_setup, best_strategy = averages["wF1+F1U"].idxmax()
best_setup, best_strategy

('weighted_set', 'non_weighted_model')

In [171]:
# show individual model performance of the best  set and best weighting strategy
df_metrics.loc[best_setup, best_strategy]

Unnamed: 0_level_0,OA,wF1,F1U,F1N
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,84.59,79.46,74.24,92.51
1,88.83,85.91,85.73,89.4
2,88.34,86.5,79.87,91.65
3,89.77,88.08,84.63,93.66
4,91.84,90.46,90.35,92.11


In [172]:
# show the average metrics of the best set and best weighting strategy
averages.loc[best_setup, best_strategy]

OA          88.67
wF1         86.08
F1U         82.96
F1N         91.87
wF1+F1U    169.04
Name: (weighted_set, non_weighted_model), dtype: float64

In [173]:
# get the models
rf_models = df.loc[(best_setup, best_strategy), "Model"].tolist()
rf_models

[RandomForestClassifier(max_depth=5, max_features=48, n_jobs=-1, random_state=0),
 RandomForestClassifier(max_depth=6, max_features=9, n_jobs=-1, random_state=0),
 RandomForestClassifier(max_depth=6, max_features=12, n_jobs=-1, random_state=0),
 RandomForestClassifier(max_depth=6, max_features=21, n_jobs=-1, random_state=0),
 RandomForestClassifier(max_depth=8, max_features=2, n_jobs=-1, random_state=0)]

## Prediction

In [174]:
better_set = weighted_set
rf_models = df.loc[(best_setup, best_strategy), "Model"].tolist()
cnn_models = [cnn_fold0, cnn_fold1, cnn_fold2, cnn_fold3, cnn_fold4]
output = [r'outputs\s4\saopaulo_S4_fold0.tif', r'outputs\s4\saopaulo_S4_fold1.tif', r'outputs\s4\saopaulo_S4_fold2.tif', r'outputs\s4\saopaulo_S4_fold3.tif', r'outputs\s4\saopaulo_S4_fold4.tif']

In [175]:
# whole satellite image to patches
feature_patches = cnn_utils.generate_feature_patches_loader(image_path = image,patch_size = patch_size,stride = stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top)

Total patches loaded: 508599


In [176]:
# for all image patches extract morphometrics
feature_patches_urbanform = cnn_utils.generate_feature_patches_loader(image_path =better_set,patch_size = patch_size,stride = stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top)

Total patches loaded: 508599


In [None]:
for i in range(5):
    train_polygons_raster = fr'saopaulo_train_f{i}.tif'

    # load model
    cnn_model = MSMLA50(input_channels=10, depth=[16,32,48], num_classes=len(train_polygons["gridcode"].unique()))
    cnn_model = cnn_model.cuda()
    trained_model = torch.load(cnn_models[i])
    cnn_model.load_state_dict(trained_model['model_state'])
    print('cnn model loaded')
    
    train_patches_embeddings = cnn_utils.generate_labeled_patches_loader(image_path = image,reference_path = train_polygons_raster,patch_size = patch_size,stride = gt_stride,batch_size = batch_size_emb,offset_left = offset_left,offset_top = offset_top,background_label = background_label)

    mean, std = cnn_utils.get_normalization_parameters(train_patches_embeddings)
    feature_patches_norm = cnn_utils.normalize_loader(feature_patches, mean, std)
    print('image patches normalized')
    
    # extract embeddings
    cnn_model.eval()
    embeddings = list()
    with torch.no_grad():
        for feature in feature_patches_norm:
            feature = feature.cuda()
            embedding = cnn_model.get_embedding_raw_fc(feature)
            embeddings.append(embedding.cpu().numpy())
    embeddings = np.concatenate(embeddings, axis=0)
    print('embeddings extracted')
    
    # extract morphometrics
    urbanform = list()
    for feature in feature_patches_urbanform:
        urbanform.append(feature.cpu().numpy())
    urbanform = np.concatenate(urbanform, axis=0)
    print('morphometrics extracted')
    
    # aggregate morphometrics
    mean_urbanform = urbanform.mean(axis=(2,3))
    min_urbanform = urbanform.min(axis=(2,3))
    max_urbanform = urbanform.max(axis=(2,3))
    std_urbanform = urbanform.std(axis=(2,3))
    med_urbanform = np.median(urbanform, axis=(2, 3))
    print('morphometrics aggregated')
    
    # merge
    all_features = np.hstack((embeddings,mean_urbanform,min_urbanform,max_urbanform,std_urbanform,med_urbanform))

    # prediction
    rf_model = rf_models[i]
    prediction = rf_model.predict(all_features)
    print('prediction done')

    offset_left_calc, offset_top_calc = cnn_utils.calculate_optimal_offsets(image, patch_size, stride)

    output_path = output[i]
    output_path = output_path.replace(".tif", "_temp.tif")
    cnn_utils.lcz_map(offset_left_calc, offset_top_calc, image, prediction, output_path)

## Per pixel validation

In [177]:
# provide test polygons raster path
test_polygons_path = ['saopaulo_test_f0.tif','saopaulo_test_f1.tif','saopaulo_test_f2.tif','saopaulo_test_f3.tif','saopaulo_test_f4.tif']

In [447]:
# resample lcz map to 100m
for f in output:
    out = f
    temp_f = f.replace(".tif", "_temp.tif")
    utils.resample_lcz_map(temp_f, out)
    # if os.path.exists(temp_f):
    #     try:
    #         os.remove(temp_f)
    #     except:
    #         pass

 saved to s4_outputs\saopaulo_S4_fold0.tif
 saved to s4_outputs\saopaulo_S4_fold1.tif
 saved to s4_outputs\saopaulo_S4_fold2.tif
 saved to s4_outputs\saopaulo_S4_fold3.tif
 saved to s4_outputs\saopaulo_S4_fold4.tif


In [178]:
metrics, confusion_matrices = utils.perpixel_validation(output, test_polygons_path, splited_ref_data)

In [179]:
df_perpixel = pd.DataFrame(metrics)
df_perpixel = df_perpixel.set_index("Fold")
df_perpixel

Unnamed: 0_level_0,OA,wF1,wF1_Urban,wF1_Natural,F1_Class_1,F1_Class_2,F1_Class_3,F1_Class_4,F1_Class_5,F1_Class_6,...,F1_Class_8,F1_Class_9,F1_Class_10,F1_Class_11,F1_Class_12,F1_Class_13,F1_Class_14,F1_Class_15,F1_Class_16,F1_Class_17
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,82.5,77.31,73.4,89.34,76.73,0.0,96.43,0.0,0.0,67.43,...,89.91,0.0,0.0,87.19,0.0,,53.33,0.0,0.0,99.94
1,87.45,84.33,86.39,83.89,85.5,0.0,98.65,5.88,0.0,66.96,...,96.06,21.36,0.0,96.47,10.43,,50.55,0.0,0.0,98.85
2,87.67,85.77,79.58,90.78,80.52,0.0,93.03,27.85,0.0,73.96,...,91.52,20.75,,95.21,2.53,,47.41,0.0,45.45,89.24
3,87.97,86.17,82.52,92.06,56.0,0.0,93.33,59.13,0.0,70.55,...,89.5,,,96.18,40.0,,76.4,0.0,0.0,100.0
4,91.44,90.6,90.01,92.59,92.45,0.0,97.4,66.67,0.0,83.35,...,85.99,,,98.0,5.8,,32.98,0.0,32.65,99.36


In [180]:
df_perpixel_mean = df_perpixel.mean().round(2)
df_perpixel_mean

OA             87.41
wF1            84.84
wF1_Urban      82.38
wF1_Natural    89.73
F1_Class_1     78.24
F1_Class_2      0.00
F1_Class_3     95.77
F1_Class_4     31.91
F1_Class_5      0.00
F1_Class_6     72.45
F1_Class_7       NaN
F1_Class_8     90.60
F1_Class_9     14.04
F1_Class_10     0.00
F1_Class_11    94.61
F1_Class_12    11.75
F1_Class_13      NaN
F1_Class_14    52.13
F1_Class_15     0.00
F1_Class_16    15.62
F1_Class_17    97.48
dtype: float64

In [181]:
# export all results to csv
df_perpixel.to_csv(r"results\s4\saopaulo_S4_results.csv")

In [182]:
# export confusion matrices
with open(r"results\s4\saopaulo_S4_confusion_matrices.pkl", "wb") as f:
    pickle.dump(confusion_matrices, f)