In [76]:
import os
import random
import shutil
import numpy as np
import cv2
from sklearn.metrics import classification_report, f1_score, confusion_matrix, accuracy_score, precision_score, recall_score, jaccard_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from scipy.ndimage import generic_filter
from sklearn.utils import shuffle

In [77]:
# Get mean and variance
def getMeanAndVar(channel, window = 5):
    mean = cv2.blur(channel, (window, window))
    var = generic_filter(channel, np.var, size = window)
    return mean, var

In [78]:
def extractFeatures(rgb_img, nrg_img, window = 5):
    h, w, _ = rgb_img.shape
    nrg = nrg_img[:, :, 0].astype(np.float32)

    # Extract the pixel
    r = rgb_img[:, :, 2].astype(np.float32)
    g = rgb_img[:, :, 1].astype(np.float32)
    b = rgb_img[:, :, 0].astype(np.float32)
    ndvi = (nrg - r) / (nrg + r + 1e-6)

    # Get mean and variance of each chanel
    r_mean, r_var = getMeanAndVar(r, window)
    g_mean, g_var = getMeanAndVar(g, window)
    b_mean, b_var = getMeanAndVar(b, window)
    nrg_mean, nrg_var = getMeanAndVar(nrg, window)
    ndvi_mean, ndvi_var = getMeanAndVar(ndvi, window)

     # Get coordinate information
    x_coords, y_coords = np.meshgrid(np.arange(w), np.arange(h))
    x_norm = (x_coords / w).astype(np.float32)
    y_norm = (y_coords / h).astype(np.float32)

    # Concatenate all features
    X = np.stack([
        r.reshape(-1), g.reshape(-1), b.reshape(-1), nrg.reshape(-1),
        r_mean.reshape(-1), g_mean.reshape(-1), b_mean.reshape(-1), nrg_mean.reshape(-1),
        r_var.reshape(-1), g_var.reshape(-1), b_var.reshape(-1), nrg_var.reshape(-1), ndvi.reshape(-1), 
        ndvi_mean.reshape(-1), ndvi_var.reshape(-1), x_norm.reshape(-1), y_norm.reshape(-1)
    ], axis=1)

    return X

In [79]:
# Import Dataset
nrg_root = './data/NRG_images'
rgb_root = './data/RGB_images/'
mask_root = './data/masks'

all_files = []

for file in os.listdir(nrg_root):
    nrg_path = os.path.join(nrg_root, file)
    rgb_file = file.replace("NRG", "RGB")
    rgb_path = os.path.join(rgb_root, rgb_file)
    mask_file = file.replace("NRG", "mask")
    mask_path = os.path.join(mask_root, mask_file)

    if os.path.exists(rgb_path) and os.path.exists(mask_path):
        all_files.append((mask_file, file, rgb_file))

# Split into train set and test set
train_files, test_files = train_test_split(all_files, test_size = 0.2, random_state = 42)

In [80]:
X_train = []
y_train = []

# Load and split the images in train set
for mask_file, file, rgb_file in train_files:
    mask = cv2.resize(cv2.imread(os.path.join(mask_root, mask_file), cv2.IMREAD_GRAYSCALE), (128, 128), interpolation = cv2.INTER_NEAREST)
    mask = (mask > 128).astype(np.uint8)
    nrg_img = cv2.resize(cv2.imread(os.path.join(nrg_root, file), cv2.IMREAD_UNCHANGED), (128, 128), interpolation = cv2.INTER_AREA)
    rgb_img = cv2.resize(cv2.imread(os.path.join(rgb_root, rgb_file), cv2.IMREAD_UNCHANGED), (128, 128), interpolation = cv2.INTER_AREA)
    X = extractFeatures(rgb_img, nrg_img, window = 5)
    X_train.append(X)
    y_train.append(mask.reshape(-1))

X_train = np.vstack(X_train)
y_train = np.hstack(y_train)

In [81]:
X_test = []
y_test = []

# Load and split the images in test set
for mask_file, file, rgb_file in test_files:
    mask = cv2.resize(cv2.imread(os.path.join(mask_root, mask_file), cv2.IMREAD_GRAYSCALE), (128, 128), interpolation = cv2.INTER_NEAREST)
    mask = (mask > 128).astype(np.uint8)
    nrg_img = cv2.resize(cv2.imread(os.path.join(nrg_root, file), cv2.IMREAD_UNCHANGED), (128, 128), interpolation = cv2.INTER_AREA)
    rgb_img = cv2.resize(cv2.imread(os.path.join(rgb_root, rgb_file), cv2.IMREAD_UNCHANGED), (128, 128), interpolation = cv2.INTER_AREA)
    X = extractFeatures(rgb_img, nrg_img, window = 5)
    X_test.append(X)
    y_test.append(mask.reshape(-1))

X_test = np.vstack(X_test)
y_test = np.hstack(y_test)

In [82]:
# Random Forest
random_tree_model = RandomForestClassifier(n_estimators = 50, random_state = 10, max_depth = 15, n_jobs = -1, class_weight = 'balanced')
random_tree_model.fit(X_train, y_train)
# y_pred = random_tree_model.predict(X_test)
avg_proba = random_tree_model.predict_proba(X_test)[:, 1]
y_pred = (avg_proba > 0.748).astype(int)

# Get the evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
iou = jaccard_score(y_test, y_pred)


In [83]:
# Outcome output
print('-----------Random Forest----------------')
print(f'IoU: {iou: .4f}')
print(f'Accuracy: {accuracy: .4f}')
print(f'Precision: {precision: .4f}')
print(f'Recall: {recall: .4f}')
print(f'F1 Score: {f1: .4f}')
print(f'Confusion Matrix:')
print(confusion_matrix(y_true = y_test, y_pred = y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

-----------Random Forest----------------
IoU:  0.1909
Accuracy:  0.9574
Precision:  0.2467
Recall:  0.4577
F1 Score:  0.3206
Confusion Matrix:
[[1381330   44793]
 [  17383   14670]]
Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.97      0.98   1426123
           1       0.25      0.46      0.32     32053

    accuracy                           0.96   1458176
   macro avg       0.62      0.71      0.65   1458176
weighted avg       0.97      0.96      0.96   1458176

