In [1]:
import numpy as np
import shutil
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image
from keras.models import Model
from sklearn.metrics.pairwise import cosine_similarity
import os
import cv2
import sqlite3 as db
import array
import tqdm




In [2]:
# Connect to the feature database (or create one if it doesn't exist)
conn = db.connect('featureDB.db')
# Create a 'cursor' for executing commands
cursor = conn.cursor()

# create a table named "ImageFeatures"  if it doesnt exist
cursor.execute("CREATE TABLE IF NOT EXISTS ImageFeatures (iID TEXT, fV BLOB)")

<sqlite3.Cursor at 0x21dea293110>

In [3]:
def getImageArrayFromFile(img_path):
    img       = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    return img_array

In [4]:
def computeFeatureVector(img_path, model):
    img_array = getImageArrayFromFile(img_path)
    feature_vector = model.predict(img_array)
    return feature_vector.flatten()

In [5]:
def store_feature_vector(image_ID, feature_vector):
    # Convert th feature vector as bytes and store it into the database
    fv_bytes = feature_vector.tobytes()
    cursor.execute("INSERT INTO ImageFeatures (iID, fV)  VALUES (?, ?)",(image_ID,fv_bytes))
    conn.commit()

In [6]:
def retrieve_feature_vector(image_ID):
    #Get the row
    query = "SELECT * FROM ImageFeatures WHERE iID = '" + image_ID + "'"
    cursor.execute(query)
    result = cursor.fetchone()

    if result is not None:
        # Convert the bytes back to a float array and return it
        float_array_bytes = result[1]
        float_array = np.frombuffer(float_array_bytes, dtype='float32')
        return float_array
    else:
        return None

In [7]:
def check_fv_inDB(image_ID):  
    # Retrieve the record from the database
    query = "SELECT * FROM ImageFeatures WHERE iID = '" + image_ID + "'"
    cursor.execute(query)
    result = cursor.fetchone()
    
    #If there is no result then return false
    if result is None:
        return False
    else:
        return True

In [8]:
def updateFV_ImagesInFolder (folderPath, model):
    #For each of the image in the directory
    for img in tqdm.tqdm(os.listdir(folderPath)):
        img_path = os.path.join(folderPath, img) 
        
        #If the image was not analysed before, compute the feature vector and update in DB
        if(not check_fv_inDB(img)):
            fv = computeFeatureVector(img_path, model)
            store_feature_vector(img, fv) 

In [10]:
# This function is not used any more!
def find_similar_images(query_image_path, image_directory, model, top_n=30):
    query_vector = computeFeatureVector(query_image_path, model)

    #image_paths = [os.path.join(image_directory, img) for img in os.listdir(image_directory)]
    #image_paths = [os.path.join(image_directory, img) for img in os.listdir(image_directory)]
    #image_vectors = [compute_feature_vector(img_path, model) for img_path in image_paths]
    image_files = [img for img in os.listdir(image_directory)]
    image_vectors = [compute_feature_vector(os.path.join(image_directory, img_file), model) for img_file in image_files]
    
    similarities = cosine_similarity([query_vector], image_vectors).flatten()
    indices = np.argsort(similarities)[::-1]

    similar_images_folder = 'D:\projects\dva\simimages'

    print(f"\nTop {top_n} similar images to '{query_image_path}':")
    for i in range(top_n):
        print(f"{i + 1}. {image_paths[indices[i]]} (Similarity: {similarities[indices[i]]:.4f})")
        #Copy the file to the folder
        shutil.copy2(image_paths[indices[i]], similar_images_folder)


In [11]:
def calculate_similarityscore(image_id, fv_query):
    fv_image = retrieve_feature_vector(image_id)
    score = cosine_similarity([fv_image],[fv_query]).flatten()[0]
    return 

In [12]:
def store_similar_images(query_image_path, image_directory, similar_images_folder, model):
    #Find the feature vector of the query image
    fv_query = computeFeatureVector(query_image_path, model)
  
    #Drop the similarity table if it already exists
    cursor.execute("DROP TABLE IF EXISTS sim_table")
    
    # create the similaritytable
    create_simtable_query = '''
                            CREATE TABLE IF NOT EXISTS sim_table (
                                id TEXT PRIMARY KEY,
                                score INTEGER)
                            '''
    print("Computing similairty scores...")
    
    # Execute the SQL command to create the similarity table
    cursor.execute(create_simtable_query)
   
    #Get the list of image paths
    cursor.execute('SELECT DISTINCT iID FROM ImageFeatures')
    # Fetch the result
    imageIDs = cursor.fetchall()
    
    # Define the SQL command to insert data into the similarity table
    insert_data_simtable_query = '''
                                INSERT INTO sim_table (id, score)
                                VALUES (?, ?)
                                '''
    # Execute the SQL command to compute and insert data into similarity table
    cursor.executemany(insert_data_simtable_query, [(iID[0], calculate_similarityscore(iID[0], fv_query)) for iID in imageIDs])
    
    print("Done.")
    
    # Query the top n similar images
    query_similar_images = '''
                            SELECT id FROM sim_table
                            ORDER BY score DESC       
                            LIMIT 30
                            '''  
    # Execute the SQL command
    cursor.execute(query_similar_images)

    # Fetch the result
    top_rows = cursor.fetchall()

    # Print the file name and copy the files to the folder
    for row in top_rows:
        print(row)
        shutil.copy2(os.path.join(image_directory, row[0]), similar_images_folder)

In [14]:
if __name__ == "__main__":
    # Load pre-trained VGG16 model
    base_model = VGG16(weights='imagenet')
    model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc2').output)

    #Path to the directory with all the images
    image_directory  = 'D:\projects\dva\data\samples\CAM_FRONT'
    # Path to the query image
    query_image_path = 'D:\projects\dva\data\samples\CAM_FRONT\\n015-2018-07-24-11-22-45+0800__CAM_FRONT__1532402936662460.jpg'
    # Folder to store similar images
    similar_images_folder = 'D:\projects\dva\simimages'
   
    #Compute feature vectors for all the images in the folder
    print("Analysing Images images in folder...")
    updateFV_ImagesInFolder(image_directory, model)
    
    # Find and store similar images
    store_similar_images(query_image_path, image_directory, similar_images_folder, model)
    
    print("Done...")

Analysing Images images in folder...


100%|██████████| 404/404 [00:00<00:00, 22627.71it/s]


Computing similairty scores...
Done.
('n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151603512404.jpg',)
('n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151604012404.jpg',)
('n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151604512404.jpg',)
('n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151605012404.jpg',)
('n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151605512404.jpg',)
('n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151606012404.jpg',)
('n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151606512404.jpg',)
('n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151607012404.jpg',)
('n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151607512404.jpg',)
('n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151608012404.jpg',)
('n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151608512404.jpg',)
('n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151609012404.jpg',)
('n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151609512404.jpg',)
('n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151609912404.jpg',)
('n008-2018