In [3]:
from multiprocessing import Pool
import cv2
import numpy as np
import os


In [6]:
def preprocess_image(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    equalized = cv2.equalizeHist(blurred)
    return equalized

def extract_features(image):
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(image, None)
    return keypoints, descriptors

def feature_matching(descriptors1, descriptors2):
    FLANN_INDEX_KDTREE = 1
    index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
    search_params = dict(checks=50)
    flann = cv2.FlannBasedMatcher(index_params, search_params)
    matches = flann.knnMatch(descriptors1, descriptors2, k=2)
    good_matches = [m for m, n in matches if m.distance < 0.7*n.distance]
    return good_matches


def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder, filename))
        if img is not None:
            images.append(img)
    return images

In [8]:

def load_images_with_labels(folder):
    images = []
    labels = []
    count = 0
    for subfolder in os.listdir(folder):
        full_path = os.path.join(folder, subfolder)
        if os.path.isdir(full_path):
            for filename in os.listdir(full_path):
                img = cv2.imread(os.path.join(full_path, filename))
                if img is not None:
                    if count % 100 == 0:
                        print("Loaded img amount:",count)
                    images.append(img)
                    labels.append(subfolder)  # The subfolder name is used as the label
                    count = count + 1
    return images, labels


In [None]:
# Other functions (preprocess_image, extract_features, feature_matching) remain the same

def classify_tumor_dataset1(test_image, all_train_folders):
    print("Classify tumor started")
    preprocessed_test_image = preprocess_image(test_image)
    _, test_descriptors = extract_features(preprocessed_test_image)
    max_matches = 0
    tumor_type = None

    for i, images_in_folder in enumerate(all_train_folders):
        total_matches = 0
        for train_image in images_in_folder:
            preprocessed_train_image = preprocess_image(train_image)
            _, train_descriptors = extract_features(preprocessed_train_image)
            matches = feature_matching(test_descriptors, train_descriptors)
            total_matches += len(matches)

        avg_matches = total_matches / len(images_in_folder)
        if avg_matches > max_matches:
            max_matches = avg_matches
            if i == 0:
                tumor_type = "meningioma_tumor"
            elif i == 1:
                tumor_type = "glioma_tumor"
            elif i == 2:
                tumor_type = "no_tumor"
            elif i == 3:
                tumor_type = "pituitary_tumor"

    print("Classify tumor ended")
    return tumor_type

def calculate_accuracy_parallel(test_images, test_labels, all_train_images, num_processes=None):
    """
    Function to calculate accuracy in parallel.
    :param test_images: List of test images.
    :param test_labels: Corresponding labels for the test images.
    :param all_train_images: Preloaded training images for classification comparison.
    :param num_processes: Number of processes to use. Defaults to None, which means using os.cpu_count() processes.
    :return: Accuracy as a float.
    """
    with Pool(processes=num_processes) as pool:

        # Map the worker function across all test image and label pairs
        predicted_labels = pool.starmap(classify_tumor_dataset1, [(test_image, all_train_images) for test_image in test_images])
        #correct_predictions = pool.starmap(classify_tumor, [(test_images, all_train_images)])

        correct_predictions = 0
        for i, predicted_label in enumerate(predicted_labels):
            if predicted_label == test_labels[i]:
                correct_predictions += 1

        accuracy = correct_predictions / len(test_images)

    return accuracy

train_base_folder = r'/home/tm216/Desktop/mehmet/Method1/dataset1/Training'
train_folders = [os.path.join(train_base_folder, f) for f in ['meningioma_tumor', 'glioma_tumor', 'no_tumor', 'pituitary_tumor']]

meningioma_images = load_images_from_folder(train_folders[0])
glioma_images = load_images_from_folder(train_folders[1])
no_tumor_images = load_images_from_folder(train_folders[2])
pituitary_images = load_images_from_folder(train_folders[3])

all_train_images = [meningioma_images, glioma_images, no_tumor_images, pituitary_images]


test_folder = r'/home/tm216/Desktop/mehmet/Method1/dataset1/Testing'

test_images, test_labels = load_images_with_labels(test_folder)
test_images_subset = test_images[:30] + test_images[130:160] + test_images[250:290]
test_labels_subset = test_labels[:30] + test_labels[130:160] + test_labels[250:290]
#accuracy = calculate_accuracy(test_images, test_labels, all_train_images)

accuracy1 = calculate_accuracy_parallel(test_images_subset[:3], test_labels_subset[:3], all_train_images, num_processes=32)
print("Accuracy:", accuracy1)
print(f"Accuracy: {accuracy1 * 100}%")

Other data
Defining functions and running the analysis


In [4]:
def classify_tumor_dataset2(test_image, all_train_folders):
    print("Classify tumor started")
    preprocessed_test_image = preprocess_image(test_image)
    _, test_descriptors = extract_features(preprocessed_test_image)
    max_matches = 0
    tumor_type = None

    for i, images_in_folder in enumerate(all_train_folders):
        total_matches = 0
        for train_image in images_in_folder:
            preprocessed_train_image = preprocess_image(train_image)
            _, train_descriptors = extract_features(preprocessed_train_image)
            matches = feature_matching(test_descriptors, train_descriptors)
            total_matches += len(matches)

        avg_matches = total_matches / len(images_in_folder)
        if avg_matches > max_matches:
            max_matches = avg_matches
            if i == 0:
                tumor_type = "meningioma_tumor"
            elif i == 1:
                tumor_type = "glioma_tumor"
            elif i == 2:
                tumor_type = "no_tumor"
            elif i == 3:
                tumor_type = "pituitary_tumor"

    print("Classify tumor ended")
    return tumor_type



In [None]:
def calculate_accuracy_parallel2(test_images, test_labels, all_train_images, num_processes=None):
    """
    Function to calculate accuracy in parallel.
    :param test_images: List of test images.
    :param test_labels: Corresponding labels for the test images.
    :param all_train_images: Preloaded training images for classification comparison.
    :param num_processes: Number of processes to use. Defaults to None, which means using os.cpu_count() processes.
    :return: Accuracy as a float.
    """
    with Pool(processes=num_processes) as pool:

        # Map the worker function across all test image and label pairs
        predicted_labels = pool.starmap(classify_tumor_dataset2, [(test_image, all_train_images) for test_image in test_images])
        #correct_predictions = pool.starmap(classify_tumor, [(test_images, all_train_images)])

        correct_predictions = 0
        for i, predicted_label in enumerate(predicted_labels):
            if predicted_label == test_labels[i]:
                correct_predictions += 1

        accuracy = correct_predictions / len(test_images)

    return accuracy
import matplotlib.pyplot as plt
import seaborn as sns

train_folder_2 = r'/home/tm216/Desktop/mehmet/Method1/dataset2/archive (5)/Training'
test_folder_2 = r'/home/tm216/Desktop/mehmet/Method1/dataset2/archive (5)/Testing'


# İkinci dataset için test yapma ve accuracy hesaplama
train_folders_2 = [os.path.join(train_folder_2, f) for f in ['meningioma_tumor', 'glioma_tumor', 'no_tumor', 'pituitary_tumor']]


meningioma_images2 = load_images_from_folder(train_folders_2[0])
glioma_images2 = load_images_from_folder(train_folders_2[1])
no_tumor_images2 = load_images_from_folder(train_folders_2[2])
pituitary_images2 = load_images_from_folder(train_folders_2[3])

all_train_images_2 = [meningioma_images2, glioma_images2, no_tumor_images2, pituitary_images2]


test_images_2, test_labels_2 = load_images_with_labels(test_folder_2)
test_images_2_subset = test_images_2[:50] +  test_images_2[350:400] + test_images_2[650:500] +  test_images_2[-50:]
test_labels_2_subset = test_labels_2[:50] +  test_labels_2[350:400] + test_labels_2[650:500] +  test_labels_2[-50:]


accuracy2 = calculate_accuracy_parallel2(test_images_2_subset, test_labels_2_subset, all_train_images_2, num_processes=32)

print("Accuracy of the second data:", accuracy2)

# Accuracy farkını yazdırma
print(f"First dataset accuracy: {accuracy1 * 100}%")
print(f"Second dataset accuracy: {accuracy2 * 100}%")
print(f"Accuracy difference: {abs(accuracy1 - accuracy2) * 100}%")

Loaded img amount: 0
Loaded img amount: 100
Loaded img amount: 200
Loaded img amount: 300
Loaded img amount: 400
Loaded img amount: 500
Loaded img amount: 600
Loaded img amount: 700
Loaded img amount: 800
Loaded img amount: 900
Loaded img amount: 1000
Loaded img amount: 1100
Loaded img amount: 1200
Loaded img amount: 1300
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify tumor started
Classify 

In [None]:
# Accuracy değerlerini görselleştirme
datasets = ['Dataset 1', 'Dataset 2']
accuracies = [accuracy1 * 100, accuracy2 * 100]

plt.figure(figsize=(10, 6))
sns.barplot(x=datasets, y=accuracies)
plt.title('Dataset Accuracies')
plt.ylabel('Accuracy (%)')
plt.show()

In [23]:
print(accuracy2)

0.0


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

datasets = ['Dataset 1', 'Dataset 2']
accuracies = [accuracy1 * 100, accuracy2 * 100]  # Ensure accuracy1 and accuracy2 are defined

plt.figure(figsize=(10, 6))
sns.barplot(x=datasets, y=accuracies)

# Adding the text on top of the bars
for i, acc in enumerate(accuracies):
    plt.text(i, acc + 0.05, f'{acc:.2f}%', ha='center', va='bottom')

plt.title('Dataset Accuracies')
plt.ylabel('Accuracy (%)')

# Adjust y-axis to zoom in more on the accuracies, assuming accuracies are close to each other
min_acc = min(accuracies)
max_acc = max(accuracies)
padding = (max_acc - min_acc) * 0.1  # Add a small padding to ensure the bars don't touch the axis limits
plt.ylim(min_acc - padding, max_acc + padding)

plt.show()