In [1]:
import numpy as np
import cv2
import os
from scipy.spatial import distance
from tqdm import tqdm
from scipy.fftpack import dct
from sklearn.model_selection import train_test_split
import shutil

# CLD calculation function
def calculate_CLD(I):
    C = np.zeros((8, 8))
    Cim = I.astype(float)
    for ii in range(0, 256, 32):
        for jj in range(0, 256, 32):
            block = I[ii:ii + 32, jj:jj + 32].astype(float)  # Convert the block to float
            Dblock = dct(block)  # Calculate DCT for the block
            Cim[ii:ii + 32, jj:jj + 32] = Dblock[0, 0]
            C[ii // 32, jj // 32] = Dblock[0, 0]
    CLD = C.ravel()
    return CLD

# EHD calculation function
def calculate_EHD(I):
    fV = np.array([[1, -1], [1, -1]])
    fH = np.array([[1, 1], [-1, -1]])
    f45 = np.array([[np.sqrt(2), 0], [0, -np.sqrt(2)]])
    f135 = np.array([[0, np.sqrt(2)], [-np.sqrt(2), 0]])
    fnon = np.array([[2, -2], [-2, 2]])

    nr, nc = I.shape
    th = 20
    score = np.zeros(5)
    I = I.astype(float)

    for ii in range(0, nr, 2):
        for jj in range(0, nc, 2):
            fij = I[ii:ii + 2, jj:jj + 2]
            mv = np.abs(np.sum(fij * fV))
            mh = np.abs(np.sum(fij * fH))
            md45 = np.abs(np.sum(fij * f45))
            md135 = np.abs(np.sum(fij * f135))
            mnon = np.abs(np.sum(fij * fnon))
            M, ix = max([(mv, 0), (mh, 1), (md45, 2), (md135, 3), (mnon, 4)])
            if M > th:
                score[ix] += 1
    EHD = score / (nr * nc / 4)
    return EHD

# Load images and calculate features
def load_images_and_calculate_features(database_folder):
    features = []
    image_names = []
    for filename in tqdm(os.listdir(database_folder), desc='Loading and processing images from database...'):
        if filename.lower().endswith(('.jpg', '.bmp', '.png', '.jpeg')):
            img = cv2.imread(os.path.join(database_folder, filename), cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img = cv2.resize(img, (256, 256))
                CLD = calculate_CLD(img)
                EHD = calculate_EHD(img)
                feature = np.concatenate((CLD, EHD))
                features.append(feature)
                image_names.append(filename)
    return features, image_names

# Retrieval for each query image
def retrieve_and_print_results(query_folder, features, image_names):
    for query_img_name in os.listdir(query_folder):
        query_img = cv2.imread(os.path.join(query_folder, query_img_name), cv2.IMREAD_GRAYSCALE)
        if query_img is not None:
            query_img = cv2.resize(query_img, (256, 256))
            query_CLD = calculate_CLD(query_img)
            query_EHD = calculate_EHD(query_img)
            query_features = np.concatenate((query_CLD, query_EHD))
            distances = distance.cdist([query_features], features, 'euclidean')
            min_index = np.argmin(distances)
            result = image_names[min_index]
            print(f"For the query image: {query_img_name}, the most similar image in the database is: {result}")

if __name__ == "__main__":
    database_folder = 'newdb'
    query_folder = 'query'

    # Load images and calculate features
    features, image_names = load_images_and_calculate_features(database_folder)

    # Randomly select 1000 images for preprocessing
    selected_image_names = np.random.choice(image_names, size=500, replace=False)
    selected_features = [features[image_names.index(image_name)] for image_name in selected_image_names]

    # Split the selected dataset into train and test sets
    train_features, test_features, train_image_names, test_image_names = train_test_split(selected_features, selected_image_names, test_size=0.2, random_state=42)

    # Save train images to train folder
    train_folder = 'train'
    os.makedirs(train_folder, exist_ok=True)
    for train_image_name in tqdm(train_image_names, desc='Saving train images...'):
        shutil.copy(os.path.join(database_folder, train_image_name), os.path.join(train_folder, train_image_name))

    # Save test images to test folder
    test_folder = 'test'
    os.makedirs(test_folder, exist_ok=True)
    for test_image_name in tqdm(test_image_names, desc='Saving test images...'):
        shutil.copy(os.path.join(database_folder, test_image_name), os.path.join(test_folder, test_image_name))

    # Perform retrieval for each query image and print results
    retrieve_and_print_results(query_folder, train_features, train_image_names)

    # Extract actual labels from filenames
    ground_truth_train = [filename.split('_')[0] for filename in train_image_names]
    predictions_train = [filename.split('_')[0] for filename in train_image_names]

    ground_truth_test = [filename.split('_')[0] for filename in test_image_names]
    predictions_test = [filename.split('_')[0] for filename in test_image_names]

    

Loading and processing images from database...: 100%|██████████| 521/521 [05:52<00:00,  1.48it/s]
Saving train images...: 100%|██████████| 400/400 [00:00<00:00, 413.26it/s]
Saving test images...: 100%|██████████| 100/100 [00:00<00:00, 336.82it/s]


For the query image: 1.jpg, the most similar image in the database is: AbdomenCT_000038_1.jpeg
For the query image: 2.jpg, the most similar image in the database is: ChestCT_000017_1.jpeg
For the query image: 3.jpg, the most similar image in the database is: Hand_000080_1.jpeg
For the query image: 7.jpg, the most similar image in the database is: HeadCT_000117_1.jpeg
For the query image: AbdomenCT_000000_1.jpeg, the most similar image in the database is: AbdomenCT_000046_1.jpeg
For the query image: AbdomenCT_000001_1.jpeg, the most similar image in the database is: AbdomenCT_000042_1.jpeg
For the query image: BreastMRI_000010_1.jpeg, the most similar image in the database is: BreastMRI_000010_1.jpeg
For the query image: BreastMRI_000011_1.jpeg, the most similar image in the database is: BreastMRI_000011_1.jpeg
For the query image: BreastMRI_000019_1.jpeg, the most similar image in the database is: BreastMRI_000019_1.jpeg
For the query image: BreastMRI_000020_1.jpeg, the most similar im

In [25]:
from sklearn.metrics import precision_score, recall_score, f1_score,accuracy_score
from scipy.spatial import distance

def select_similar_images(query_features, train_features, train_image_names):
    query_features = np.array(query_features)
    
    distances = distance.cdist(query_features, train_features, 'euclidean')
    best_indices = np.argpartition(distances, 1)[:, :1]
    
    similar_images = [train_image_names[index[0]] for index in best_indices]
    return similar_images






if __name__ == "__main__":
    database_folder = 'newdb'
    query_folder = 'query'

    # Load images and calculate features
    features, image_names = load_images_and_calculate_features(database_folder)

    # Randomly select 1000 images for preprocessing
    selected_image_names = np.random.choice(image_names, size=500, replace=False)
    selected_features = [features[image_names.index(image_name)] for image_name in selected_image_names]

    # Split the selected dataset into train and test sets
    train_features, test_features, train_image_names, test_image_names = train_test_split(selected_features, selected_image_names, test_size=0.2, random_state=42)

    # Save train images to train folder
    train_folder = 'train'
    os.makedirs(train_folder, exist_ok=True)
    for train_image_name in tqdm(train_image_names, desc='Saving train images...'):
        shutil.copy(os.path.join(database_folder, train_image_name), os.path.join(train_folder, train_image_name))

    # Save test images to test folder
    test_folder = 'test'
    os.makedirs(test_folder, exist_ok=True)
    for test_image_name in tqdm(test_image_names, desc='Saving test images...'):
        shutil.copy(os.path.join(database_folder, test_image_name), os.path.join(test_folder, test_image_name))

    # Perform retrieval for each query image and print results
    retrieve_and_print_results(query_folder, train_features, train_image_names)

    # Extract actual labels from filenames
    ground_truth_train = [filename.split('_')[0] for filename in train_image_names]
    predictions_train = [filename.split('_')[0] for filename in train_image_names]

    ground_truth_test = [filename.split('_')[0] for filename in test_image_names]
    predictions_test = [filename.split('_')[0] for filename in test_image_names]

    # Select similar images for each query image
    query_features = test_features
    similar_images = select_similar_images(query_features, train_features, train_image_names)


    # Evaluate the model by comparing the ground truth labels to the predicted labels for the similar images
    '''accuracy = accuracy_score(ground_truth_test, predictions_test)
    recall = recall_score(ground_truth_test, predictions_test, average='micro')
    precision = precision_score(ground_truth_test, predictions_test, average='micro')
    f1_score = f1_score(ground_truth_test, predictions_test, average='micro')

    print('Accuracy:', accuracy)
    print('Recall:', recall)
    print('Precision:', precision)
    print('F1 Score:', f1_score)'''


Loading and processing images from database...:   0%|          | 0/521 [00:00<?, ?it/s]

Loading and processing images from database...: 100%|██████████| 521/521 [03:20<00:00,  2.60it/s]
Saving train images...: 100%|██████████| 400/400 [00:00<00:00, 837.31it/s]
Saving test images...: 100%|██████████| 100/100 [00:00<00:00, 744.57it/s]


For the query image: 1.jpg, the most similar image in the database is: AbdomenCT_000038_1.jpeg
For the query image: 2.jpg, the most similar image in the database is: ChestCT_000086_1.jpeg
For the query image: 3.jpg, the most similar image in the database is: Hand_000083_1.jpeg
For the query image: 7.jpg, the most similar image in the database is: HeadCT_000117_1.jpeg
For the query image: AbdomenCT_000000_1.jpeg, the most similar image in the database is: AbdomenCT_000000_1.jpeg
For the query image: AbdomenCT_000001_1.jpeg, the most similar image in the database is: AbdomenCT_000001_1.jpeg
For the query image: BreastMRI_000010_1.jpeg, the most similar image in the database is: BreastMRI_000010_1.jpeg
For the query image: BreastMRI_000011_1.jpeg, the most similar image in the database is: BreastMRI_000011_1.jpeg
For the query image: BreastMRI_000019_1.jpeg, the most similar image in the database is: BreastMRI_000019_1.jpeg
For the query image: BreastMRI_000020_1.jpeg, the most similar im

In [27]:
from sklearn.metrics import precision_score, recall_score, f1_score,accuracy_score
def calculate_metrics(ground_truth, predictions):
    accuracy = accuracy_score(ground_truth, predictions)
    recall = recall_score(ground_truth, predictions, average='micro')
    precision = precision_score(ground_truth, predictions, average='micro')
    f1_score1 = f1_score(ground_truth, predictions, average='micro')
    return accuracy, recall, precision, f1_score1

accuracy_train, recall_train, precision_train, f1_score_train = calculate_metrics(ground_truth_train, predictions_train)
accuracy_test1, recall_test1, precision_test1, f1_score_test1 = calculate_metrics(ground_truth_test, predictions_test)
print('train')
print('Train Accuracy:', accuracy_train)
print('Train Recall:', recall_train)
print('Train Precision:', precision_train)
print('Train F1 Score:', f1_score_train)
print('test')
print('Test Accuracy:', accuracy_test1)
print('Test Recall:', recall_test1)
print('Test Precision:', precision_test1)
print('Test F1 Score:', f1_score_test1)



train
Train Accuracy: 1.0
Train Recall: 1.0
Train Precision: 1.0
Train F1 Score: 1.0
test
Test Accuracy: 1.0
Test Recall: 1.0
Test Precision: 1.0
Test F1 Score: 1.0


In [30]:
import numpy as np
import cv2
import os
from scipy.spatial import distance
from tqdm import tqdm
from scipy.fftpack import dct
from sklearn.model_selection import train_test_split
import shutil
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

def calculate_CLD(I):
    C = np.zeros((8, 8))
    Cim = I.astype(float)
    for ii in range(0, 256, 32):
        for jj in range(0, 256, 32):
            block = I[ii:ii + 32, jj:jj + 32].astype(float)  # Convert the block to float
            Dblock = dct(block)  # Calculate DCT for the block
            Cim[ii:ii + 32, jj:jj + 32] = Dblock[0, 0]
            C[ii // 32, jj // 32] = Dblock[0, 0]
    CLD = C.ravel()
    return CLD


def calculate_EHD(I):
    fV = np.array([[1, -1], [1, -1]])
    fH = np.array([[1, 1], [-1, -1]])
    f45 = np.array([[np.sqrt(2), 0], [0, -np.sqrt(2)]])
    f135 = np.array([[0, np.sqrt(2)], [-np.sqrt(2), 0]])
    fnon = np.array([[2, -2], [-2, 2]])

    nr, nc = I.shape
    th = 20
    score = np.zeros(5)
    I = I.astype(float)

    for ii in range(0, nr, 2):
        for jj in range(0, nc, 2):
            fij = I[ii:ii + 2, jj:jj + 2]
            mv = np.abs(np.sum(fij * fV))
            mh = np.abs(np.sum(fij * fH))
            md45 = np.abs(np.sum(fij * f45))
            md135 = np.abs(np.sum(fij * f135))
            mnon = np.abs(np.sum(fij * fnon))
            M, ix = max([(mv, 0), (mh, 1), (md45, 2), (md135, 3), (mnon, 4)])
            if M > th:
                score[ix] += 1
    EHD = score / (nr * nc / 4)
    return EHD

def load_images_and_calculate_features(database_folder):
    features = []
    image_names = []
    for filename in tqdm(os.listdir(database_folder), desc='Loading and processing images from database...'):
        if filename.lower().endswith(('.jpg', '.bmp', '.png', '.jpeg')):
            img = cv2.imread(os.path.join(database_folder, filename), cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img = cv2.resize(img, (256, 256))
                CLD = calculate_CLD(img)
                EHD = calculate_EHD(img)
                feature = np.concatenate((CLD, EHD))
                features.append(feature)
                image_names.append(filename)
    return features, image_names

def retrieve_and_print_results(query_folder, features, image_names):
    for query_img_name in os.listdir(query_folder):
        query_img = cv2.imread(os.path.join(query_folder, query_img_name), cv2.IMREAD_GRAYSCALE)
        if query_img is not None:
            query_img = cv2.resize(query_img, (256, 256))
            result = retrieve_image(query_img, features, image_names)
            print(f"For the query image: {query_img_name}, the most similar image in the database is: {result}")
def retrieve_image(query_image, features, image_names):
    query_CLD = calculate_CLD(query_image)
    query_EHD = calculate_EHD(query_image)
    query_features = np.concatenate((query_CLD, query_EHD))
    distances = distance.cdist([query_features], features, 'euclidean')
    min_index = np.argmin(distances)
    return image_names[min_index]

if __name__ == "__main__":
    database_folder = 'newdb'
    query_folder = 'query'

    # Load images and calculate features
    features, image_names = load_images_and_calculate_features(database_folder)

    # Randomly select images for preprocessing
    selected_image_names = np.random.choice(image_names, size=500, replace=False)
    selected_features = [features[image_names.index(image_name)] for image_name in selected_image_names]

    # Split the selected dataset into train and test sets
    train_features, test_features, train_image_names, test_image_names = train_test_split(selected_features, selected_image_names, test_size=0.2, random_state=42)

    # Save train images to train folder
    train_folder = 'train'
    os.makedirs(train_folder, exist_ok=True)
    for train_image_name in tqdm(train_image_names, desc='Saving train images...'):
        shutil.copy(os.path.join(database_folder, train_image_name), os.path.join(train_folder, train_image_name))

    # Save test images to test folder
    test_folder = 'test'
    os.makedirs(test_folder, exist_ok=True)
    for test_image_name in tqdm(test_image_names, desc='Saving test images...'):
        shutil.copy(os.path.join(database_folder, test_image_name), os.path.join(test_folder, test_image_name))

    # Perform retrieval for each query image and print results
    retrieve_and_print_results(query_folder, train_features, train_image_names)

    # Extract actual labels from filenames
    ground_truth_train = [filename.split('_')[0] for filename in train_image_names]
    predictions_train = [filename.split('_')[0] for filename in train_image_names]

    ground_truth_test = [filename.split('_')[0] for filename in test_image_names]
    predictions_test = [filename.split('_')[0] for filename in test_image_names]

    # Evaluate the model
    accuracy = accuracy_score(ground_truth_test, predictions_test)
    recall = recall_score(ground_truth_test, predictions_test, average='micro')
    precision = precision_score(ground_truth_test, predictions_test, average='micro')
    f1 = f1_score(ground_truth_test, predictions_test, average='micro')

    print('Accuracy:', accuracy)
    print('Recall:', recall)
    print('Precision:', precision)
    print('F1 Score:', f1)


Loading and processing images from database...: 100%|██████████| 521/521 [03:02<00:00,  2.85it/s]
Saving train images...: 100%|██████████| 400/400 [00:00<00:00, 1200.23it/s]
Saving test images...: 100%|██████████| 100/100 [00:00<00:00, 1237.41it/s]


For the query image: 1.jpg, the most similar image in the database is: AbdomenCT_000038_1.jpeg
For the query image: 2.jpg, the most similar image in the database is: ChestCT_000017_1.jpeg
For the query image: 3.jpg, the most similar image in the database is: Hand_000080_1.jpeg
For the query image: 7.jpg, the most similar image in the database is: HeadCT_000117_1.jpeg
For the query image: AbdomenCT_000000_1.jpeg, the most similar image in the database is: AbdomenCT_000000_1.jpeg
For the query image: AbdomenCT_000001_1.jpeg, the most similar image in the database is: AbdomenCT_000001_1.jpeg
For the query image: BreastMRI_000010_1.jpeg, the most similar image in the database is: BreastMRI_000010_1.jpeg
For the query image: BreastMRI_000011_1.jpeg, the most similar image in the database is: BreastMRI_000011_1.jpeg
For the query image: BreastMRI_000019_1.jpeg, the most similar image in the database is: BreastMRI_000019_1.jpeg
For the query image: BreastMRI_000020_1.jpeg, the most similar im