In [None]:
import os
import cv2
import numpy as np
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib

# -------- Feature Extraction Functions -------- #

def extract_hog_features(image):
    """Extracts HOG (shape) features."""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    hog_features, hog_image = hog(
        gray,
        orientations=9,
        pixels_per_cell=(8, 8),
        cells_per_block=(2, 2),
        block_norm='L2-Hys',
        visualize=True,
        transform_sqrt=True
    )
    return hog_features


def extract_color_features(image, bins=32):
    """Extracts color histogram features in RGB and HSV."""
    rgb_hist = cv2.calcHist([image], [0, 1, 2], None,
                            [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    rgb_hist = cv2.normalize(rgb_hist, rgb_hist).flatten()

    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hsv_hist = cv2.calcHist([hsv], [0, 1, 2], None,
                            [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    hsv_hist = cv2.normalize(hsv_hist, hsv_hist).flatten()

    return np.hstack([rgb_hist, hsv_hist])


def extract_combined_features(image):
    """Combine HOG + Color features."""
    hog_feat = extract_hog_features(image)
    color_feat = extract_color_features(image)
    return np.hstack([hog_feat, color_feat])


# -------- Dataset Preparation -------- #

def load_dataset(ball_folder, non_ball_folder, image_size=(128, 128)):
    X, y = [], []
    for folder, label in [(ball_folder, 1), (non_ball_folder, 0)]:
    # for folder, label in [(ball_folder, 1)]:
        for file in os.listdir(folder):
            img_path = os.path.join(folder, file)
            image = cv2.imread(img_path)
            if image is None:
                continue
            image = cv2.resize(image, image_size)
            features = extract_combined_features(image)
            X.append(features)
            y.append(label)
    return np.array(X), np.array(y)


# -------- Model Training -------- #

ball_images_path = "../data/raw/images/ball/"
non_ball_images_path = "../data/raw/images/no_ball/"

X, y = load_dataset(ball_images_path, non_ball_images_path)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Feature matrix shape:", X_train.shape)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "ball_detector_model.pkl")
print("Model saved as 'ball_detector_model.pkl'")

Feature matrix shape: (3, 73636)
Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           1       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1

Model saved as 'ball_detector_model.pkl'


In [23]:
# -------- Test on New Image -------- #

def predict_new_image(image_path, model, image_size=(128, 128)):
    """Predicts whether a ball is present in the given image."""
    image = cv2.imread(image_path)
    if image is None:
        print("Error: Unable to read image at", image_path)
        return
    image = cv2.resize(image, image_size)
    features = extract_combined_features(image).reshape(1, -1)
    prediction = model.predict(features)[0]
    prob = model.predict_proba(features)[0][0]

    label = "No Ball Detected" if prediction == 0 else "Ball Detected"
    print(f"Prediction: {label} (Confidence: {prob:.2f})")

    # Optional: Display the image
    # cv2.imshow(label, image)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()


# Load saved model for testing
loaded_model = joblib.load("ball_detector_model.pkl")

# Example test image path
test_image_path = "../data/raw/images/no_object/sample.jpg"
predict_new_image(test_image_path, loaded_model)

test_image_path = "../data/raw/images/no_object/ground_26.jpg"
predict_new_image(test_image_path, loaded_model)

Prediction: No Ball Detected (Confidence: 0.50)
Prediction: Ball Detected (Confidence: 0.46)


# Part â€“ 3 : Visualization (2D) Using PCs and t-SNE

##### Use the data set mnist_test_nolabels.csv in the following tasks:


In [None]:
"""
1. Carry out Principal Component Analysis on the data set and create a bar-plot of variances explained by the PCs. Also create a second bar-plot, this time showing the cumulative variance explained curve in addition to the bars.
"""

import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns

# ---------------------- LOAD DATA ----------------------
df = pd.read_csv("mnist_test_nolabels.csv")
X = df.values