In [None]:

import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from scipy.spatial.distance import minkowski
from sklearn.metrics import confusion_matrix


# FEATURE EXTRACTION FUNCTIONS
#cropping the images into 5 different parts
def generate_five_crops(image):
    
    h, w, _ = image.shape

    crops = []

    crops.append(image[0:h//4, :])                
    crops.append(image[3*h//4:h, :])              
    crops.append(image[h//4:3*h//4, w//4:3*w//4]) 
    crops.append(image[h//4:3*h//4, 0:w//4])      
    crops.append(image[h//4:3*h//4, 3*w//4:w])    

    return crops
#dividing the crops into 15 segments
def divide_into_segments(crop):
    segments = []
    h, w, _ = crop.shape

    seg_h = h // 3
    seg_w = w // 5

    for i in range(3):
        for j in range(5):
            segment = crop[i*seg_h:(i+1)*seg_h,
                           j*seg_w:(j+1)*seg_w]
            segments.append(segment)

    return segments

#calculating the mean and std for r,g,b values
def extract_90d_features(crop):
    mean_rgb = np.mean(segment, axis=(0, 1))
    std_rgb = np.std(segment, axis=(0, 1))

    return mean_rgb, std_rgb

#creating the 90 dimensional vector
def build_dataset(base_path, class_names):
    segments = divide_into_segments(crop)
    features = []

    for seg in segments:
        mean_rgb, std_rgb = rgb_mean_std(seg)
        features.extend(mean_rgb)
        features.extend(std_rgb)

    return np.array(features)


def build_dataset(base_path, class_names):
    
    X, y = [], []
    for label, class_name in enumerate(class_names):
        class_path = os.path.join(base_path, class_name)
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            image = cv2.imread(img_path)
            crops = generate_five_crops(image)
            for crop in crops:
                X.append(extract_90d_features(crop))
                y.append(label)
    return np.array(X), np.array(y)

# A1
def dot_product(vec_a, vec_b):
    return sum(a*b for a, b in zip(vec_a, vec_b))


def euclidean_norm(vec):
    return np.sqrt(sum(v*v for v in vec))



# A2
def mean_vector(data):
    return np.mean(data, axis=0)
def std_vector(data):
    return np.std(data, axis=0)

def interclass_distance(c1, c2):
    return np.std(data, axis=0)



# A3

def feature_histogram(feature_column):
    hist, bins = np.histogram(feature_column, bins=10)
    return hist, bins



# A4 & A5
def minkowski_distance(vec_a, vec_b, p):
    return sum(abs(a-b)**p for a, b in zip(vec_a, vec_b))**(1/p)


# A10

def custom_knn_predict(X_train, y_train, test_vec, k):
    distances = []
    for i in range(len(X_train)):
        dist = minkowski_distance_custom(X_train[i], test_vec, 2)
        distances.append((dist, y_train[i]))
    distances.sort(key=lambda x: x[0])
    neighbors = distances[:k]
    labels = [label for _, label in neighbors]
    return max(set(labels), key=labels.count)


def custom_knn_batch(X_train, y_train, X_test, k):
    return np.array([custom_knn_predict(X_train, y_train, x, k) for x in X_test])



# A12 & A13

def compute_metrics(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    accuracy = np.trace(cm) / np.sum(cm)
    precision = cm[1,1] / (cm[0,1] + cm[1,1])
    recall = cm[1,1] / (cm[1,0] + cm[1,1])
    f1 = 2 * precision * recall / (precision + recall)
    return cm, accuracy, precision, recall, f1



# MAIN PROGRAM 

if __name__ == "__main__":

    # DATASET CREATION
    dataset_path = "Lab2_Dataset"
    classes = ["Viya", "Agastya"]   
    X, y = build_dataset(dataset_path, classes)

    print("Dataset shape:", X.shape)
    print("Labels shape:", y.shape)

    # A1
    A, B = X[0], X[1]
    print("Dot product (custom):", dot_product(A, B))
    print("Dot product (numpy):", np.dot(A, B))
    print("Norm (custom):", euclidean_norm(A))
    print("Norm (numpy):", np.linalg.norm(A))

    # A2
    class0, class1 = X[y==0], X[y==1]
    centroid0, centroid1 = mean_vector(class0), mean_vector(class1)
    print("Interclass distance:", interclass_distance(centroid0, centroid1))

    # A3
    feature_data = X[:, 0]
    hist, bins = compute_histogram(feature_data)
    plt.hist(feature_data, bins=10)
    plt.title("Histogram of Feature 0")
    plt.show()
    print("Mean:", np.mean(feature_data), "Variance:", np.var(feature_data))

    # A4 & A5
    p_vals = range(1, 11)
    distances = [minkowski_distance_custom(A, B, p) for p in p_vals]
    plt.plot(p_vals, distances)
    plt.xlabel("p")
    plt.ylabel("Distance")
    plt.show()
    print("SciPy Minkowski (p=3):", minkowski(A, B, 3))

    # A6 to A9
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    knn = KNeighborsClassifier(n_neighbors=3)
    knn.fit(X_train, y_train)
    print("kNN Accuracy:", knn.score(X_test, y_test))
    print("Predictions:", knn.predict(X_test))

    # A10 to A13
    custom_preds = custom_knn_batch(X_train, y_train, X_test, 3)
    cm, acc, prec, rec, f1 = compute_metrics(y_test, custom_preds)
    print("Confusion Matrix:\n", cm)
    print("Accuracy:", acc)
    print("Precision:", prec)
    print("Recall:", rec)
    print("F1-score:", f1)