In [None]:
#An assumption about unsupervised learning
import cv2
import numpy as np
from sklearn import svm
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import os

# Read images and labels
def read_data(label_file, base_path):
    images = []
    labels = []
    with open(label_file, 'r') as file:
        for line in file:
            parts = line.strip().split('\t')
            image_path = os.path.join(base_path, parts[0].replace("/", "\\"))
            images.append(image_path)
            labels.append(int(float(parts[1]) > 0.5))  # Convert to binary labels
    return images, labels

# Extract SIFT features and update labels
def extract_sift_features_and_labels(image_paths, labels):
    sift = cv2.SIFT_create()
    descriptors_list = []
    valid_labels = []  # List to store valid labels
    for i, image_path in enumerate(image_paths):
        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            print(f"Warning: Unable to read image at {image_path}")
            continue
        keypoints, descriptors = sift.detectAndCompute(img, None)
        if descriptors is not None:
            descriptors_list.append(descriptors)
            valid_labels.append(labels[i])  # Only add labels for valid images
    return descriptors_list, valid_labels

# Create a bag of features model
def create_bag_of_features(descriptors_list, cluster_size=50):
    bow_kmeans = KMeans(n_clusters=cluster_size)
    bow_kmeans.fit(np.vstack(descriptors_list))
    return bow_kmeans

# Convert descriptors to bag of features
def convert_to_bow_features(descriptors_list, bow_kmeans):
    bow_features = [bow_kmeans.predict(descriptors) for descriptors in descriptors_list]
    bow_histograms = [np.bincount(feature, minlength=bow_kmeans.n_clusters) for feature in bow_features]
    return np.array(bow_histograms)

# Base path for images
base_image_path = 'C:\\Users\\DOCTOR\\Desktop\\ALL_images\\'

# Read data
image_paths, labels = read_data('C:\\Users\\DOCTOR\\Desktop\\ALL_images\\labels.txt', base_image_path)

# Extract SIFT features and update labels
descriptors_list, updated_labels = extract_sift_features_and_labels(image_paths, labels)

# Create bag of features model
bow_kmeans = create_bag_of_features(descriptors_list)

# Convert to bag of features
bow_features = convert_to_bow_features(descriptors_list, bow_kmeans)

# Standardize features
scaler = StandardScaler()
bow_features_scaled = scaler.fit_transform(bow_features)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(bow_features_scaled, updated_labels, test_size=0.3, random_state=42)

# Train SVM classifier
clf = svm.SVC()
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Classification Accuracy:", accuracy)



In [None]:
# Architecture Overview:
# The script is designed for image classification using computer vision and machine
# learning techniques. It begins by reading image data and labels from a specified directory. 
# The images are then processed using the SIFT (Scale-Invariant Feature Transform) algorithm to extract features.
# These features are used to create a bag-of-features model using KMeans clustering.

# The core of the script involves transforming these SIFT descriptors into a uniform feature set (bag of features)
# and then standardizing these features. The dataset is split into training and test sets, and a Support Vector 
# Machine (SVM) classifier is trained on the training data. The performance of the model is evaluated using accuracy
# metrics on the test set. This approach showcases the integration of
# traditional computer vision techniques with modern machine learning algorithms for image classification tasks.