### VLAD를 통해 디스크립터 계산

In [66]:
import cv2
import numpy as np

def compute_vlad(image, des, labels, centers):
    # SIFT 특징점 검출과 특징 디스크립터 계산을 한 번에 수행
    sift = cv2.SIFT_create(edgeThreshold=80)
    _, des = sift.detectAndCompute(image, None)
    
    if des is None: 
        return None
    
    # VLAD 벡터 초기화
    vlad = np.zeros((centers.shape[0], des.shape[1]), dtype=np.float32)
    
    # 누적합
    for i in range(des.shape[0]):
        vlad[labels[i]] += des[i] - centers[labels[i]]
        
    # VLAD 벡터 정규화
    vlad = cv2.normalize(vlad, None).flatten()
    vlad /= np.linalg.norm(vlad)
    return vlad

def similar_images(query_image, category_images, k):
    sift = cv2.SIFT_create(edgeThreshold=80)
    
    # 특징점 및 디스크립터 계산
    query_image = cv2.resize(query_image, (300, 300))
    _, query_des = sift.detectAndCompute(query_image, None) # 이미지 특징점 검출과 특징 디스크립터 계산을 한 번에 수행
    if query_des is None:
        return None

    # data 이미지에 대해 클러스터링 수행
    category_des = []
    for category, images in category_images.items():
        for image in images:
            _, des = sift.detectAndCompute(image, None)
            if des is not None:
                category_des.extend(des)

    category_des = np.array(category_des)

    # k-means clustering
    num_clusters = k
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
    flags = cv2.KMEANS_RANDOM_CENTERS
    _, labels, centers = cv2.kmeans(category_des, num_clusters, None, criteria, 5, flags)

    # 입력 이미지 VLAD 연산
    query_vlad = compute_vlad(query_image, query_des, labels, centers)
    if query_vlad is None:
        return None

    similarity_scores = {}
    for category, images in category_images.items():
        category_vlads = []
        for image in images:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            image = cv2.resize(image, (300, 300))
            _, des = sift.detectAndCompute(image, None)
            if des is not None:
                category_vlads.append(compute_vlad(image, des, labels, centers))

        category_vlads = np.array(category_vlads)

        # distances 계산
        distances = np.linalg.norm(query_vlad - category_vlads, axis=1)
        similarity_scores[category] = np.mean(distances)

    # 유사성을 기준으로 정렬, 유사도가 큰 것 부터 내림차순 정렬
    sorted_categories = sorted(similarity_scores, key=similarity_scores.get)
    top_categories = sorted_categories[:k]

    return top_categories

path = 'c:/data/temp/'

# 입력 이미지
query_image_path =  path + 'flower6.jpg'
# query_image_path =  path + 'mouse6.jpg'
# query_image_path =  path + 'umb6.jpg'

# 분류할 카테고리와 해당 카테고리의 이미지들을 딕셔너리로 정의
categories = {
    'mouse': [cv2.imread(path + 'mouse1.jpg'), cv2.imread(path + 'mouse2.jpg'), cv2.imread(path + 'mouse3.jpg'), 
              cv2.imread(path + 'mouse4.jpg'), cv2.imread(path + 'mouse5.jpg')],
    'flower': [cv2.imread(path + 'flower1.jpg'), cv2.imread(path + 'flower2.jpg'), cv2.imread(path + 'flower3.jpg'), 
               cv2.imread(path + 'flower4.jpg'), cv2.imread(path + 'flower5.jpg')],
    'umbrella': [cv2.imread(path + 'umb1.jpg'), cv2.imread(path + 'umb2.jpg'), cv2.imread(path + 'umb3.jpg'),
                 cv2.imread(path + 'umb4.jpg'), cv2.imread(path + 'umb5.jpg')]
}

# 입력 이미지 불러오기
query_image = cv2.imread(query_image_path, cv2.IMREAD_GRAYSCALE)

# 유사도 검사
k = 1 # 출력할 카테고리 수
top_categories = similar_images(query_image, categories, k) # 실행

# 유사한 카테고리를 유사도가 높은 순으로 출력
print(top_categories)

['flower', 'mouse', 'umbrella']


In [5]:
import cv2
import numpy as np

imgSize = (300, 300)

def siftDes(src):
    gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
    siftF = cv2.SIFT_create()
    kp = siftF.detect(gray)
    kp = sorted(kp, key=lambda f: f.response, reverse=True)
    return kp, siftF.compute(gray, kp)[1]

def compute_vlad(descriptors, k):
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 0.1)
    flags = cv2.KMEANS_RANDOM_CENTERS
    _, labels, centers = cv2.kmeans(descriptors, k, None, criteria, 10, flags)
    vlad_vector = np.zeros((k, descriptors.shape[1]), dtype=np.float32)
    for i in range(descriptors.shape[0]):
        vlad_vector[labels[i]] += descriptors[i] - centers[labels[i]]
    vlad_vector = cv2.normalize(vlad_vector, vlad_vector, norm_type=cv2.NORM_L2)
    return vlad_vector.flatten()

def detect_similar_images(query_image_path, data_image_paths, num_results=5):
    # Load the query image
    query_image = cv2.imread(query_image_path)
    query_image = cv2.resize(query_image, imgSize)
    
    # Create the feature detector (SIFT)
    sift = cv2.xfeatures2d.SIFT_create()

    # Detect keypoints and compute descriptors for the query image
    query_keypoints, query_descriptors = siftDes(query_image)

    # Group data images by category
    data_categories = {
        'mouse': [],
        'flower': [],
        'umbrella': []
    }

    for image_path in data_image_paths:
        if 'mouse' in image_path:
            data_categories['mouse'].append(image_path)
        elif 'flower' in image_path:
            data_categories['flower'].append(image_path)
        elif 'umb' in image_path:
            data_categories['umbrella'].append(image_path)

    # Perform VLAD computation for each category
    vlad_vectors = {}
    for category, image_paths in data_categories.items():
        descriptors = []
        for image_path in image_paths:
            image = cv2.imread(image_path)
            _, desc = siftDes(image)
            descriptors.append(desc)
        descriptors = np.concatenate(descriptors)
        vlad_vector = compute_vlad(descriptors, k=3)
        vlad_vectors[category] = vlad_vector

    # Compute the VLAD vector for the query image
    query_vlad = compute_vlad(query_descriptors, k=3)

    # Calculate similarity scores
    similarity_scores = {}
    for category, vlad_vector in vlad_vectors.items():
        distance = np.linalg.norm(query_vlad - vlad_vector)
        similarity_scores[category] = distance

    # Sort the categories based on similarity scores (lower score indicates higher similarity)
    sorted_categories = sorted(similarity_scores, key=similarity_scores.get)

    # Retrieve the top similar images from the most similar category
    similar_images = []
    for category in sorted_categories:
        images = data_categories[category][:num_results]
        for image_path in images:
            image = cv2.imread(image_path)
            similar_images.append(image)

    return similar_images

path = 'c:/data/temp/'
query_image_path = path + 'mouse (6).jpg'
data_image_paths = [path + 'flower1.jpg', path + 'flower2.jpg', path + 'flower3.jpg', path + 'flower4.jpg', path + 'flower5.jpg',
                    path + 'umb1.jpg', path + 'umb2.jpg', path + 'umb3.jpg', path + 'umb4.jpg', path + 'umb5.jpg',
                    path + 'mouse (1).jpg', path + 'mouse (2).jpg', path + 'mouse (3).jpg', path + 'mouse (4).jpg']

# Detect similar images
similar_images = detect_similar_images(query_image_path, data_image_paths)

# Display the similar images
for i in range(len(similar_images)):
    image = cv2.resize(similar_images[i], (300, 300))
    cv2.imshow('Similar Image ' + str(i+1), image)
    cv2.waitKey(0)
cv2.destroyAllWindows()