In [None]:
import cv2 as cv
import numpy as np
import os
from sklearn.cluster import MiniBatchKMeans
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix
from tqdm import tqdm
import matplotlib.pyplot as plt
from google.colab import drive

In [None]:
class BoneFracturePipeline:
    def __init__(self, base_dir, class_folders, image_size=(256, 256), K=100):
        self.base_dir = base_dir
        self.class_folders = class_folders
        self.image_size = image_size
        self.K = K
        self.kmeans = MiniBatchKMeans(n_clusters=K, batch_size=1000)
        self.scaler = StandardScaler()
        self.svm = SVC(kernel='rbf')

    def generate_bone_mask(self, img):
        blurred = cv.GaussianBlur(img, (5, 5), 0)
        thresh = cv.adaptiveThreshold(blurred, 255, cv.ADAPTIVE_THRESH_MEAN_C, cv.THRESH_BINARY, 11, 2)
        thresh = cv.bitwise_not(thresh)
        kernel = np.ones((3, 3), np.uint8)
        cleaned = cv.morphologyEx(thresh, cv.MORPH_OPEN, kernel, iterations=1)
        return cleaned

    def extract_sift_descriptors(self, folder_path):
        sift = cv.SIFT_create()
        all_descriptors = []

        for class_name in self.class_folders:
            class_path = os.path.join(folder_path, class_name)
            for img_file in tqdm(os.listdir(class_path), desc=f"SIFT from {class_name}"):
                img_path = os.path.join(class_path, img_file)
                img = cv.imread(img_path, cv.IMREAD_GRAYSCALE)
                if img is None:
                    continue
                img = cv.resize(img, self.image_size)
                keypoints, descriptors = sift.detectAndCompute(img, None)
                if descriptors is not None:
                    all_descriptors.extend(descriptors)
        return np.array(all_descriptors)

    def create_bow_histograms(self, folder_path):
        sift = cv.SIFT_create()
        X, y = [], []

        for label_idx, class_name in enumerate(self.class_folders):
            class_path = os.path.join(folder_path, class_name)
            for img_file in tqdm(os.listdir(class_path), desc=f"BoW for {class_name}"):
                img_path = os.path.join(class_path, img_file)
                img = cv.imread(img_path, cv.IMREAD_GRAYSCALE)
                if img is None:
                    continue
                img = cv.resize(img, self.image_size)
                keypoints, descriptors = sift.detectAndCompute(img, None)
                if descriptors is not None:
                    predictions = self.kmeans.predict(descriptors)
                    hist, _ = np.histogram(predictions, bins=np.arange(self.K + 1))
                    X.append(hist)
                    y.append(label_idx)
        return np.array(X), np.array(y)

    def train(self, train_dir):
        descriptors = self.extract_sift_descriptors(train_dir)
        self.kmeans.fit(descriptors)
        print("KMeans clustering complete.")

    def run_classification(self, train_dir, val_dir, test_dir):
        X_train, y_train = self.create_bow_histograms(train_dir)
        X_val, y_val = self.create_bow_histograms(val_dir)
        X_test, y_test = self.create_bow_histograms(test_dir)

        X_train = self.scaler.fit_transform(X_train)
        X_val = self.scaler.transform(X_val)
        X_test = self.scaler.transform(X_test)

        self.svm.fit(X_train, y_train)
        print("SVM training complete.")

        y_val_pred = self.svm.predict(X_val)
        y_test_pred = self.svm.predict(X_test)

        print("Validation Accuracy:", accuracy_score(y_val, y_val_pred))
        print("Test Accuracy:", accuracy_score(y_test, y_test_pred))
        print("Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))
        print("Test Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

    def visualize_keypoints(self, img_path, output_path):
        img = cv.imread(img_path, cv.IMREAD_GRAYSCALE)
        if img is None:
            return
        img = cv.resize(img, self.image_size)
        mask = self.generate_bone_mask(img)
        sift = cv.SIFT_create()

        kp_full, _ = sift.detectAndCompute(img, None)
        img_kp_full = cv.drawKeypoints(img, kp_full, None, flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

        kp_masked, _ = sift.detectAndCompute(img, mask)
        img_kp_masked = cv.drawKeypoints(img, kp_masked, None, flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

        plt.figure(figsize=(16, 5))
        plt.subplot(1, 4, 1); plt.imshow(img, cmap='gray'); plt.title('Original'); plt.axis('off')
        plt.subplot(1, 4, 2); plt.imshow(mask, cmap='gray'); plt.title('Bone Mask'); plt.axis('off')
        plt.subplot(1, 4, 3); plt.imshow(img_kp_full); plt.title('Keypoints (No Mask)'); plt.axis('off')
        plt.subplot(1, 4, 4); plt.imshow(img_kp_masked); plt.title('Keypoints (With Mask)'); plt.axis('off')
        plt.tight_layout()
        plt.savefig(output_path)
        plt.close()


In [None]:
drive.mount('/content/drive', force_remount=True)

base_dir = '/content/drive/MyDrive/Bone_XRay_Data'
train_dir = f'{base_dir}/train'
val_dir = f'{base_dir}/val'
test_dir = f'{base_dir}/test'
class_folders = ['fractured', 'not fractured']

Mounted at /content/drive


In [None]:

pipeline = BoneFracturePipeline(
    base_dir=base_dir,
    class_folders=class_folders,
    image_size=(256, 256),
    K=100
)

pipeline.train(train_dir)

pipeline.run_classification(train_dir, val_dir, test_dir)

output_vis_path = f'{base_dir}/visual_results'
os.makedirs(output_vis_path, exist_ok=True)

for class_name in class_folders:
    class_path = os.path.join(train_dir, class_name)
    for idx, img_file in enumerate(os.listdir(class_path)[:3]):
        img_path = os.path.join(class_path, img_file)
        out_path = os.path.join(output_vis_path, f'{class_name}_{idx+1}.png')
        pipeline.visualize_keypoints(img_path, out_path)


Mounted at /content/drive


SIFT from fractured: 100%|██████████| 4606/4606 [04:47<00:00, 16.04it/s]
SIFT from not fractured: 100%|██████████| 4649/4649 [04:42<00:00, 16.43it/s]


KMeans clustering complete.


BoW for fractured: 100%|██████████| 4606/4606 [03:02<00:00, 25.26it/s]
BoW for not fractured: 100%|██████████| 4649/4649 [02:54<00:00, 26.63it/s]
BoW for fractured: 100%|██████████| 337/337 [00:18<00:00, 18.38it/s]
BoW for not fractured: 100%|██████████| 492/492 [00:24<00:00, 20.17it/s]
BoW for fractured: 100%|██████████| 238/238 [00:13<00:00, 17.32it/s]
BoW for not fractured: 100%|██████████| 268/268 [00:15<00:00, 17.29it/s]


SVM training complete.
Validation Accuracy: 0.9300361881785284
Test Accuracy: 0.9525691699604744
Validation Confusion Matrix:
 [[286  51]
 [  7 485]]
Test Confusion Matrix:
 [[221  17]
 [  7 261]]


In [None]:
pipeline = BoneFracturePipeline(base_dir, ['fractured', 'not fractured'])
pipeline.train(train_dir)
pipeline.run_classification(train_dir, val_dir, test_dir)
pipeline.visualize_keypoints('/content/drive/MyDrive/Bone_XRay_Data/test/fractured/1-rotated1-rotated2-rotated1.jpg', '/content/drive/MyDrive/Bone_XRay_Data/test.jpg')


SIFT from fractured: 100%|██████████| 4606/4606 [03:13<00:00, 23.83it/s]
SIFT from not fractured: 100%|██████████| 4649/4649 [03:00<00:00, 25.76it/s]


KMeans clustering complete.


BoW for fractured: 100%|██████████| 4606/4606 [03:24<00:00, 22.58it/s]
BoW for not fractured: 100%|██████████| 4649/4649 [03:31<00:00, 21.97it/s]
BoW for fractured: 100%|██████████| 337/337 [00:16<00:00, 20.98it/s]
BoW for not fractured: 100%|██████████| 492/492 [00:20<00:00, 24.58it/s]
BoW for fractured: 100%|██████████| 238/238 [00:07<00:00, 30.32it/s]
BoW for not fractured: 100%|██████████| 268/268 [00:11<00:00, 24.21it/s]


SVM training complete.
Validation Accuracy: 0.9312424607961399
Test Accuracy: 0.9525691699604744
Validation Confusion Matrix:
 [[289  48]
 [  9 483]]
Test Confusion Matrix:
 [[223  15]
 [  9 259]]
