In [None]:
import os
import zipfile
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
from google.colab import files
import matplotlib.pyplot as plt
import gdown
import pandas as pd
from tqdm import tqdm

class DatasetPreparation:
    def __init__(self, dataset_zip_path):
        self.dataset_zip_path = dataset_zip_path
        self.images = []
        self.labels = []
        self.extract_dataset()

    def extract_dataset(self):
        with zipfile.ZipFile(self.dataset_zip_path, 'r') as zip_ref:
            zip_ref.extractall('dataset')

        for filename in os.listdir('dataset/DataSet_04'):
            file_path = os.path.join('dataset/DataSet_04', filename)
            label = int(filename.split('_')[0])
            image = cv2.imread(file_path, 0)  # Read as grayscale
            image = cv2.resize(image, (128, 128))  # Resize the image to 128x128 pixels
            self.images.append(image)
            self.labels.append(label)

class FeatureExtraction:
    def hog_features(self, images):
        return np.array([self.hog_descriptor(image) for image in images])

    def hog_descriptor(self, image):
        winSize = (128, 128)
        blockSize = (16, 16)
        blockStride = (8, 8)
        cellSize = (8, 8)
        nbins = 9
        derivAperture = 1
        winSigma = -1
        histogramNormType = 0
        L2HysThreshold = 0.2
        gammaCorrection = 1
        nlevels = 128
        hog = cv2.HOGDescriptor(winSize, blockSize, blockStride, cellSize, nbins, derivAperture, winSigma,
                                histogramNormType, L2HysThreshold, gammaCorrection, nlevels)
        return hog.compute(image).flatten()

    def pca_features(self, images, n_components=100):
        flat_images = images.reshape(images.shape[0], -1)
        pca = PCA(n_components=n_components)
        return pca.fit_transform(flat_images)

    def flatten_images(self, images):
        return images.reshape(images.shape[0], -1)

class PatternRecognition:
    def __init__(self):
        self.classifiers = {
            'KNN': KNeighborsClassifier(n_neighbors=3),
            'SVM': SVC(kernel='linear')
        }

    def train_classifier(self, classifier, X_train, y_train):
        self.classifiers[classifier].fit(X_train, y_train)

    def predict_classifier(self, classifier, X_test):
        return self.classifiers[classifier].predict(X_test)

    def accuracy(self, y_true, y_pred):
        return accuracy_score(y_true, y_pred)

def download_dataset():
    url = "https://drive.google.com/uc?export=download&id=1QvluzGHbalr_-PQ4oeh8QtvbaGhMMwmr"
    dataset_zip_path = "DataSet_04.zip"
    gdown.download(url, dataset_zip_path, quiet=False)
    print("Download complete.")

def run_experiments(dataset_zip_path):
    dataset = DatasetPreparation(dataset_zip_path)
    images = np.array(dataset.images)
    labels = np.array(dataset.labels)

    feature_extractor = FeatureExtraction()
    pattern_recognizer = PatternRecognition()

    train_test_ratios = [0.6, 0.7, 0.8]
    results = []

    for ratio in tqdm(train_test_ratios, desc="Train-Test Ratios"):
        X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=1-ratio, random_state=42)

        for feat_extraction in ['None', 'HOG']:
            X_train_feat, X_test_feat = None, None
            if feat_extraction == 'HOG':
                X_train_feat, X_test_feat = feature_extractor.hog_features(X_train), feature_extractor.hog_features(X_test)
            elif feat_extraction == 'PCA':
                X_train_feat, X_test_feat = feature_extractor.pca_features(X_train), feature_extractor.pca_features(X_test)
            elif feat_extraction == 'None':
                X_train_feat, X_test_feat = feature_extractor.flatten_images(X_train), feature_extractor.flatten_images(X_test)

            for classifier in ['KNN', 'SVM']:
                pattern_recognizer.train_classifier(classifier, X_train_feat, y_train)
                y_pred = pattern_recognizer.predict_classifier(classifier, X_test_feat)
                acc = pattern_recognizer.accuracy(y_test, y_pred)
                results.append({
                    'Feature Extraction': feat_extraction,
                    'Pattern Recognition': classifier,
                    f'{int(ratio*100)}': acc
                })

    # Format results as a table
    df_results = pd.DataFrame(results).groupby(['Feature Extraction', 'Pattern Recognition']).mean().reset_index()
    print(df_results)
    df_results

def upload_and_predict():
    feature_extractor = FeatureExtraction()
    dataset = DatasetPreparation("DataSet_04.zip")
    images = np.array(dataset.images)
    labels = np.array(dataset.labels)

    X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.3, random_state=42)
    X_train_features = feature_extractor.hog_features(X_train)

    uploaded = files.upload()
    for file_name in uploaded.keys():
        image = cv2.imread(file_name, 0)  # Read as grayscale
        image = cv2.resize(image, (128, 128))  # Resize to 128x128 pixels
        image_feature = feature_extractor.hog_descriptor(image).reshape(1, -1)

        knn = KNeighborsClassifier(n_neighbors=3)
        knn.fit(X_train_features, y_train)
        prediction = knn.predict(image_feature)

        plt.imshow(image, cmap='gray')
        plt.title(f'Predicted Label: {prediction[0]}')
        plt.show()
        print(f'Image predicted: {prediction[0]}')

In [None]:
print("Download the dataset")
download_dataset()

Download the dataset


Downloading...
From: https://drive.google.com/uc?export=download&id=1QvluzGHbalr_-PQ4oeh8QtvbaGhMMwmr
To: /content/DataSet_04.zip
100%|██████████| 14.7M/14.7M [00:00<00:00, 106MB/s] 

Download complete.





In [None]:
print("Experiments with different train-test ratios")
run_experiments("DataSet_04.zip")

Experiments with different train-test ratios


Train-Test Ratios: 100%|██████████| 3/3 [12:44<00:00, 254.98s/it]

  Feature Extraction Pattern Recognition        60        70        80
0                HOG                 KNN  0.913519  0.916483  0.922465
1                HOG                 SVM  0.974818  0.974812  0.976806
2               None                 KNN  0.407886  0.417587  0.423459
3               None                 SVM  0.899934  0.897481  0.897946





In [None]:
print("Upload an image to predict its label")
upload_and_predict()