▶️ **Link to Youtube Video:** [Day 12 - Perform a Supervised Random Forest Classification using Python](https://youtu.be/IWJu9XXPsE4?si=FwkdkRneIV-DeNg3)


▶️ **Link to Full Youtube Playlist:** [12 Days Geospatial Python Bootcamp](https://youtube.com/playlist?list=PLPBWT_CJ5QhL90iN3n6zWGpSXQLw42ToU&si=04Dv0mI3pPpBK29z)

# Supervised Image Classification in Python

#### Imports

In [None]:
import warnings

warnings.filterwarnings(action="ignore")

In [None]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio as rio
import keras
import matplotlib.pyplot as plt

from rasterio.plot import show as r_show
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, cohen_kappa_score, accuracy_score
from tqdm import tqdm

### Source files and folders

In [None]:
training_shapefile_path = r"./shps/signatures.shp" # the path to your signature shapefile
rasters_path = r"./images" # the path to the satellite bands

classified_path = r"classified"
os.makedirs(classified_path, exist_ok=True)

training_shapefile = gpd.read_file(training_shapefile_path)

### Categorize training labels

In [None]:
keys = {
    "Water": 1,
    "Vegetation": 2,
    "Built Up": 3
}

def categorize_labels(record):
    return keys[record]

training_shapefile["LULC_Class"] = training_shapefile["Name"].apply(categorize_labels)
training_shapefile

### Read bands into numpy array

In [None]:
images_nparray = []
images = [item for item in os.listdir(rasters_path) if item.lower().endswith('.tif')]

for raster in images:
    path = os.path.join(rasters_path, raster)
    src = rio.open(path)
    band = src.read(1)
    images_nparray.append(band)


### Extract band values into a column in the trianing shapefile

In [None]:
number_of_training_samples = len(training_shapefile['Name'])

# Spectral data
spectral_data = []

for index in tqdm(range(number_of_training_samples), ncols=100):
    # get geo coords of signatures
    UTM_x, UTM_y = training_shapefile['geometry'][index].x, training_shapefile['geometry'][index].y

    # Convert to pixel coord
    x, y = src.index(UTM_x, UTM_y)
    x, y = int(x), int(y)

    # Extract band values
    band_value_at_xy = []
    for band in range(len(images)):
        try:
            i = images_nparray[band][y, x]
            band_value_at_xy.append(i)
        except IndexError:
            band_value_at_xy.append(np.nan)

    spectral_data.append(band_value_at_xy)

training_shapefile["spectral_data"] = spectral_data
# training_shapefile.dropna(axis=1)
training_shapefile


### Create test and train split

In [None]:
train_indices, test_indices, train_data, test_data = [], [], [], []
number_of_classes = 3

for i in range(1, number_of_classes + 1):
    data_class_i = training_shapefile[training_shapefile['LULC_Class'] == i]['spectral_data'].tolist()
    indices_class_i = training_shapefile[training_shapefile['LULC_Class'] == i].index.tolist()

    train_data_list, test_data_list, train_index_list, test_index_list = train_test_split(
        data_class_i, indices_class_i, test_size=0.3, random_state=42
    )

    train_data.extend(train_data_list)
    test_data.extend(test_data_list)
    train_indices.extend(train_index_list)
    test_indices.extend(test_index_list)

# Convert data and labels into numpy arrays
train_data = np.array(train_data)
test_data = np.array(test_data)

train_labels = training_shapefile.loc[train_indices, "LULC_Class"].values
test_labels = training_shapefile.loc[test_indices, "LULC_Class"].values


# Convert labels to categorical
train_label_categorized = keras.utils.to_categorical(train_labels)
test_label_categorized = keras.utils.to_categorical(test_labels)

# Display data shapes for verification
print(f"Train data shape: {train_data.shape}")
print(f"Test data shape: {test_data.shape}")
print(f"Train labels shape: {train_label_categorized.shape}")
print(f"Test labels shape: {test_label_categorized.shape}")

### The Random Forest Classifier

In [None]:
params = {
    'max_depth': 50,
    'n_estimators': 200,
    'random_state': 10,
    'warm_start': False,
    'min_samples_split': 10
}

rf_classifier = RandomForestClassifier(**params)
rf_classifier.fit(train_data, train_label_categorized)

prediction = rf_classifier.predict(test_data)

prediction_binary = np.argmax(prediction, axis=1)
test_label_binary = np.argmax(test_label_categorized, axis=1)

# Compute accuracy metrics
accuracy = accuracy_score(test_label_binary, prediction_binary)
kappa = cohen_kappa_score(test_label_binary, prediction_binary)

# Print results
print(f"Model's Accuracy: {accuracy:.4f}")
print(f"Cohen's Kappa: {kappa:.4f}")

### Perform the randon forest classification

In [None]:
rf_predictions = []


for epoch in tqdm(range(images_nparray[0].shape[0]), ncols=100, desc="Classifying..."):
    spectral_data_at_row = []
    for i in range(len(images)):
        spectral_data_at_row.append(images_nparray[i][epoch])
    
    spectral_data_at_row_transpose = np.transpose(np.array(spectral_data_at_row))
    predicted_class = rf_classifier.predict(spectral_data_at_row_transpose)
    rf_predictions.append(np.argmax(predicted_class, axis=1))

rf_predictions = np.array(rf_predictions).astype(np.uint8)

print("Classificatoin complete...")

In [None]:
out_meta = src.meta

classified_raster_path = os.path.join(classified_path, "rf.tif")

with rio.open(classified_raster_path, "w", ** out_meta) as dst:
    dst.write(rf_predictions, 1)

with rio.open(classified_raster_path) as dataset:
    r_show(dataset, title="RF Classification")