In [4]:
import os
from sentence_transformers import SentenceTransformer
from PIL import Image
import numpy as np
import faiss
import gradio as gr

#### Модель

In [5]:
model = SentenceTransformer('clip-ViT-B-32')

#### Извлечение embedding'ов

In [6]:
data_dir = 'data/processed/'

def extract_embeddings(data_dir):
    embeddings = []
    image_paths = []
    for cls in os.listdir(data_dir):
        cls_dir = os.path.join(data_dir, cls)
        for img_file in os.listdir(cls_dir):
            img_path = os.path.join(cls_dir, img_file)
            img = Image.open(img_path).convert('RGB')
            emb = model.encode(img)
            embeddings.append(emb)
            image_paths.append(img_path)
    return np.array(embeddings), image_paths

embeddings, image_paths = extract_embeddings(data_dir)
np.save("data/embeddings.npy", embeddings)
np.save("data/image_paths.npy", image_paths)

#### Создание индекса

In [7]:
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

faiss.write_index(index, "data/faiss_index.bin")

#### Приложение

In [None]:
index = faiss.read_index("data/faiss_index.bin")
image_paths = np.load("data/image_paths.npy", allow_pickle=True)

def search_similar(image):
    img = Image.fromarray(image).convert('RGB')
    emb = model.encode(img)
    emb = np.array([emb]).astype('float32')
    _, indices = index.search(emb, 10)
    similar_images = [Image.open(image_paths[idx]) for idx in indices[0]]
    return similar_images

iface = gr.Interface(
    fn=search_similar,
    inputs=gr.Image(),
    outputs=gr.Gallery(label="Топ-10 похожих изображений"),
    title="Поиск похожих бабочек"
)

iface.launch()