In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score


# FEATURE EXTRACTION


def generate_five_crops(image):
    h, w, _ = image.shape

    crops = []
    crops.append(image[0:h//4, :])
    crops.append(image[3*h//4:h, :])
    crops.append(image[h//4:3*h//4, w//4:3*w//4])
    crops.append(image[h//4:3*h//4, 0:w//4])
    crops.append(image[h//4:3*h//4, 3*w//4:w])

    return crops


def divide_into_segments(crop):
    segments = []
    h, w, _ = crop.shape

    seg_h = h // 3
    seg_w = w // 5

    for i in range(3):
        for j in range(5):
            seg = crop[i*seg_h:(i+1)*seg_h, j*seg_w:(j+1)*seg_w]
            segments.append(seg)

    return segments


def extract_90d_features(crop):
    features = []
    segments = divide_into_segments(crop)

    for segment in segments:
        mean_rgb = np.mean(segment, axis=(0,1))
        std_rgb = np.std(segment, axis=(0,1))

        features.extend(mean_rgb)
        features.extend(std_rgb)

    return np.array(features)


def build_dataset(base_path, class_names):
    X, y = [], []

    for label, class_name in enumerate(class_names):
        class_path = os.path.join(base_path, class_name)

        print("Reading folder:", class_path)

        for img_name in os.listdir(class_path):

            if not img_name.lower().endswith((".png",".jpg",".jpeg")):
                continue

            img_path = os.path.join(class_path, img_name)
            image = cv2.imread(img_path)

            if image is None:
                print("Skipped:", img_path)
                continue

            crops = generate_five_crops(image)

            for crop in crops:
                feat = extract_90d_features(crop)
                X.append(feat)
                y.append(label)

    return np.array(X), np.array(y)




# METRICS FOR LAB-04 A1


def classification_metrics(model, X_train, X_test, y_train, y_test):

    train_pred = model.predict(X_train)
    test_pred = model.predict(X_test)

    cm_train = confusion_matrix(y_train, train_pred)
    cm_test = confusion_matrix(y_test, test_pred)

    metrics = {
        "train_precision": precision_score(y_train, train_pred),
        "test_precision": precision_score(y_test, test_pred),
        "train_recall": recall_score(y_train, train_pred),
        "test_recall": recall_score(y_test, test_pred),
        "train_f1": f1_score(y_train, train_pred),
        "test_f1": f1_score(y_test, test_pred)
    }

    return cm_train, cm_test, metrics

# A3â€“A5 SYNTHETIC DATA VISUALIZATION

def generate_random_training():
    X = np.random.uniform(1,10,(20,2))
    y = (X[:,0] + X[:,1] > 10).astype(int)
    return X, y


def plot_training_data(X, y):
    plt.scatter(X[y==0][:,0], X[y==0][:,1], color="blue")
    plt.scatter(X[y==1][:,0], X[y==1][:,1], color="red")
    plt.title("Training Data")
    plt.show()


def plot_knn_boundary(X, y, k):
    model = KNeighborsClassifier(n_neighbors=k)
    model.fit(X,y)

    x = np.arange(0,10,0.1)
    yv = np.arange(0,10,0.1)
    xx, yy = np.meshgrid(x,yv)
    grid = np.c_[xx.ravel(), yy.ravel()]

    pred = model.predict(grid)

    plt.scatter(grid[:,0], grid[:,1], c=pred, cmap=plt.cm.coolwarm, alpha=0.2)
    plt.scatter(X[:,0], X[:,1], c=y, cmap=plt.cm.coolwarm, edgecolors="k")
    plt.title(f"k={k}")
    plt.show()



# HYPERPARAMETER TUNING (A7)


def find_best_k(X_train, y_train):
    params = {"n_neighbors": list(range(1,15))}
    grid = GridSearchCV(KNeighborsClassifier(), params, cv=5)
    grid.fit(X_train, y_train)
    return grid.best_params_, grid.best_score_



# MAIN


if __name__ == "__main__":

    dataset_path = "Lab2_Dataset"
    classes = ["Viya", "Agastya"]

    X, y = build_dataset(dataset_path, classes)

    print("Dataset shape:", X.shape)

    # TRAIN TEST SPLIT
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=42)

    # MODEL
    knn = KNeighborsClassifier(n_neighbors=3)
    knn.fit(X_train, y_train)

    # A1
    cm_train, cm_test, metrics = classification_metrics(knn, X_train, X_test, y_train, y_test)

    print("\nTRAIN CONFUSION MATRIX\n", cm_train)
    print("\nTEST CONFUSION MATRIX\n", cm_test)
    print("\nMETRICS:", metrics)

    # A7
    best_k, best_score = find_best_k(X_train, y_train)
    print("\nBest k:", best_k)
    print("Best CV score:", best_score)

    # A3-A5 demo
    X_syn, y_syn = generate_random_training()
    plot_training_data(X_syn, y_syn)

    for k in [1,3,7]:
        plot_knn_boundary(X_syn, y_syn, k)
