In [29]:
import os
import cv2
import numpy as np
from skimage.feature import graycomatrix, graycoprops, local_binary_pattern
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [30]:
# method = 'uniform' , sets the binnig for p+2 , i.e. 10 bins instead of p^2 i.e. 256
# ravel converts the 2D lbp matrix to 1D Matrix , ie flattens it out
# np.arange(0, n_bins+1) produces integers [0, 1, 2, ..., n_bins].
# That means there are n_bins bins, each spanning a range like:
# Bin 0 → [0,1)
# Bin 1 → [1,2)
# Bin n_bins-1 → [n_bins-1, n_bins)
# So each possible LBP code (0 … n_bins-1) has its own bin
# Normalize the histogram (astype)
# Add a small epsilon to avoid division by zero, hist /= (hist.sum() + 1e-6)
def extract_lbp_features(gray_image,radius=1,n_points=8):
    lbp = local_binary_pattern(gray_image,n_points,radius,method='uniform')
    n_bins = n_points+2
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_bins + 1), range=(0, n_bins))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist.tolist()

In [31]:
# levels → number of gray levels (controls matrix size).
# symmetric=True → makes matrix undirected by counting (i, j) and (j, i) together.
# normed=True → converts counts into probabilities.
def extract_glcm_features(gray_image):
    distances = [1,3,5]
    angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
    glcm = graycomatrix(gray_image,distances = distances,angles = angles, levels = 256, symmetric=True,normed=True)
    contrast = graycoprops(glcm,'contrast').mean()
    dissimilarity = graycoprops(glcm,'dissimilarity').mean()
    homogeneity = graycoprops(glcm,'homogeneity').mean()
    energy = graycoprops(glcm,'energy').mean()
    correlation = graycoprops(glcm,'correlation').mean()
    asm = graycoprops(glcm,'ASM').mean()

    return [contrast,dissimilarity,homogeneity,energy,correlation,asm]

In [32]:
#Path extracted and lbp,glcm features extracted for each image and appended in features and labels columns resp.
def load_data_and_extract_features(dataset_path):
    features = []
    labels = []
    print("Feature extraction started ---")
    class_names = sorted([d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))])
    for class_name in class_names:
        class_path = os.path.join(dataset_path,class_name)
        image_files = [
            f for f in os.listdir(class_path)
            if f.lower().endswith((".tif"))
        ]
        print(f"Processing class: {class_name} ({len(image_files)} images)")
        for image_name in image_files:
            image_path = os.path.join(class_path,image_name)
            image = cv2.imread(image_path)
            if image is None:
                print(f"Warning: Could not read image {image_path}. Skipping.")
                continue
            image = cv2.resize(image,(256,256))
            gray_image = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
            glcm_features = extract_glcm_features(gray_image)
            lbp_features = extract_lbp_features(gray_image)
            combined_features = glcm_features + lbp_features
            features.append(combined_features)
            labels.append(class_name)
    print("Feature extraction completed.")
    return np.array(features), np.array(labels)

In [35]:
if __name__ == "__main__":
    dataset_path = "UCMerced_LandUse/Images"
    if not os.path.exists(dataset_path) or not os.path.isdir(dataset_path):
        print(f"Error: The directory '{dataset_path}' does not exist.")
        print("Please update the 'dataset_path' variable with the correct path to your image dataset.")
    else:
        X,y = load_data_and_extract_features(dataset_path)
        if X.shape[0] == 0:
            print("Error: No data was loaded. Please check your dataset folder structure and contents.")
        else:
            print(f"Total samples: {X.shape[0]}, Features per sample: {X.shape[1]}")
            X_train, X_test, y_train, y_test = train_test_split(
                X,y,test_size=0.25,random_state=42,stratify=y
            )
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)
            print(f"Training set size: {X_train_scaled.shape[0]}")
            print(f"Testing set size: {X_test_scaled.shape[0]}")
            print("\nStarting SVM hyperparameter tuning with GridSearchCV...")
            param_grid = {
                'C':[0.1,1,10,100],
                'gamma':[1,0.1,0.01,0.001,'scale'],
                'kernel':['rbf']
            }
            grid_search = GridSearchCV(SVC(probability=True, random_state=42), param_grid, refit=True, verbose=2, cv=3, n_jobs=-1)
            grid_search.fit(X_train_scaled, y_train)
            print("Tuning complete.")
            print("Best parameters found: ", grid_search.best_params_)
            svm_classifier = grid_search.best_estimator_
            print("\nEvaluating the best model on the test set...")
            y_pred = svm_classifier.predict(X_test_scaled)
            accuracy = accuracy_score(y_test, y_pred)
            print(f"Model Accuracy: {accuracy * 100:.2f}%")

Feature extraction started ---
Processing class: agricultural (100 images)
Processing class: airplane (100 images)
Processing class: baseballdiamond (100 images)
Processing class: beach (100 images)
Processing class: buildings (100 images)
Processing class: harbor (100 images)
Processing class: intersection (100 images)
Processing class: mediumresidential (100 images)
Processing class: mobilehomepark (100 images)
Processing class: overpass (100 images)
Processing class: parkinglot (100 images)
Processing class: river (100 images)
Processing class: runway (100 images)
Processing class: sparseresidential (100 images)
Processing class: storagetanks (100 images)
Processing class: tenniscourt (100 images)
Feature extraction completed.
Total samples: 1600, Features per sample: 16
Training set size: 1200
Testing set size: 400

Starting SVM hyperparameter tuning with GridSearchCV...
Fitting 3 folds for each of 20 candidates, totalling 60 fits
Tuning complete.
Best parameters found:  {'C': 100,