In [21]:
import faiss

# Path to your binary file
index_file_path = "./../dict/faiss_index_file.bin"

# Load the index
index = faiss.read_index(index_file_path)

# Now you can use the index for search or further operations


In [22]:
import gensim.downloader as api
model = api.load("glove-wiki-gigaword-50")

In [23]:
import os
import json

keyframes_dir = './../dict/context_encoded/frame_tags_encoded'
all_keyframe_dict = dict()

# Iterate through each data part
for part in sorted(os.listdir(keyframes_dir)):
    data_part = part.split('_')[-1]  # Extract data part like L01, L02
    if data_part[0] == 'L':
        data_part_path = f'{keyframes_dir}/{data_part}'
        video_dirs = sorted(os.listdir(data_part_path))

        # Iterate through each video directory
        for video_dir in video_dirs:
            if video_dir[0] != 'V':
                continue
            vid_dir = video_dir[0:4]
            json_file_path = os.path.join(data_part_path, video_dir)

            # Open and read the JSON file
            with open(json_file_path, 'r') as json_file:
                json_data = json.load(json_file)
            
            # Merge JSON data into the main dictionary
            for frame, tags in json_data.items():
                new_key = f'{data_part}_{vid_dir}_{frame}'  # Create a new key
                all_keyframe_dict[new_key] = tags

# Now all_keyframe_paths contains the merged data


In [24]:
import numpy as np

def get_tag_vector(tag, model):
    """Get the vector for a single tag."""
    if tag in model:
        return model[tag]
    else:
        return np.zeros(model.vector_size)  # Return zero vector if tag not in model

def embed_tags(tags, model):
    """Embed a set of tags into a single vector by averaging."""
    vectors = [get_tag_vector(tag, model) for tag in tags]
    if vectors:
        return np.mean(vectors, axis=0)
    else:
        return np.zeros(model.vector_size)  # Return zero vector if no tags

# Create embeddings for each key
embeddings = {key: embed_tags(tags, model) for key, tags in all_keyframe_dict.items()}

In [25]:
def search_tags(query_tags, model, index, embeddings, top_k=1):
    """Search for tags similar to the given query tags."""
    query_vector = embed_tags(query_tags, model)
    query_vector = np.expand_dims(query_vector, axis=0).astype('float32')
    
    distances, indices = index.search(query_vector, top_k)
    
    results = []
    for i in indices[0]:
        key = list(embeddings.keys())[i]
        results.append(key)
    
    return results



In [26]:
# Example query
query = {"people", "father", "son", "christmas"}
results = search_tags(query, model, index, embeddings)
print("Search results:", results)

Search results: ['L08_V001_008']


In [27]:
vid = "L03_V026_117"

In [28]:
print(list(embeddings.keys())[0])

L01_V001_005


In [29]:
index.d

50