# Lindorm 多模态检索

在以文搜图的场景下，为了获取更好的召回效果，我们引入 qwen-vl 对图片进行内容识别，再将描述信息与图片向量化信息共同录入 lindorm 库中，利用 lindorm 的融合检索获取更好的检索效果

lindorm + qwen-vl 实例利用大模型实现多模态检索

图片一键入库

多模态检索

    以图搜图
    以文搜图
    增量图片去重导入 

## PicturesImport - 批量导入图片
### 数据源介绍
    数据源: https://www.kaggle.com/datasets/iamsouravbanerjee/animal-image-dataset-90-different-animals
    Zooming in on Wildlife: 5400 Animal Images Across 90 Diverse Classes
    
    下载测试数据，解压后，在data目录下
    
### 数据入库
  首先配置待导入的数据目录

  目录中所有图片数据经过 lindorm ai 引擎进行向量化，将向量写入lindorm

In [None]:
import ipywidgets as widgets

# 创建输入框和按钮
dir_input = widgets.Text(
    value='data/animals',
    placeholder='data/animals',
    description='图片目录:',
    disabled=False,
    layout=widgets.Layout(width='auto')
)

display(dir_input)

In [None]:
import os
# -*- coding: utf-8 -*-

import concurrent
from tqdm import tqdm

from concurrent.futures import ThreadPoolExecutor
from src.lindorm import Lindorm
    
def handle_picture(file_path: str):
    # 读取本地图片内容
    with open(file_path, 'rb') as f:
        # print(f"图片描述 {file_path}")
        code, description = lindorm.qwen_vl_picture_withdraw(file_path)
        if code != 0:
            print(f"图片描述失败 {file_path}, code {code}, error {description}")
            return None
        # print(f"图片描述成功 {file_path}, description {description}")
        content = f.read()
        code, embedding = lindorm.image_text_embedding(content, description)
        if code != 0:
            print(f"图文向量化失败 {file_path}, error {embedding}")
            raise Exception(f"图文向量化失败 {file_path}, error {embedding}")
        return lindorm.write_doc_with_description(file_path, description, embedding)

def find_jpg_files(dir: str):
    jps_paths = []
    for root, dirs, files in os.walk(dir):
        for file in files:
            if file.endswith(".jpg"):
                jps_paths.append(os.path.join(root, file))
    return jps_paths

def import_all_data(dir: str):
    # 遍历目录下所有图片
    print('导入数据目录:', dir)
    file_paths = find_jpg_files(dir)
    
    with ThreadPoolExecutor(max_workers=8) as executor:
        # 创建一个任务列表
        future_to_record = {executor.submit(handle_picture, key): key for key in file_paths}
        
        # 使用 tqdm 显示进度条
        for future in tqdm(concurrent.futures.as_completed(future_to_record), total=len(file_paths), desc="Importing Data"):
            result = future.result()
    print("向量化后的数据入库完成")


if __name__ == '__main__':
    print("start")
    lindorm = Lindorm("multimodal_search_index2")
    
    if lindorm.get_index() is None:
        # lindorm.drop_index()
        lindorm.create_search_index()
    print("索引创建完成")
    import_all_data(dir_input.value)

## 交互式多模态检索 

提供以图搜图和以文本搜图两种检索方式

以图搜图: 输入图片 url, 返回与输入图片相似的图片
以文本搜图: 输入文本, 返回与输入文本相似的图片

In [None]:
import io
import ipywidgets as widgets
import matplotlib.pyplot as plt
from PIL import Image
from IPython.display import display, HTML, clear_output
from src.lindorm import Lindorm

lindorm = Lindorm("multimodal_search_index2")

# 创建输入框和按钮
text_input = widgets.Text(
    value='',
    placeholder='输入关键字',
    description='以文搜图:',
    disabled=False
)

file_input = widgets.FileUpload(
    accept='image/*',
    description='以图搜图',
    multiple=False  # 只允许选择一张图片
)

# 定义搜索按钮
search_buttons = [
    widgets.Button(description="纯向量检索", button_style=''),
    widgets.Button(description="RRF融合检索", button_style=''),
]

output = widgets.Output()

def show_hits(hits):
    if hits is None or len(hits) == 0:
        print("图片搜索失败")
        return
    print('search, count', len(hits))
    for hit in hits:
        print(hit.get('_id'), hit.get('_score'), hit.get('_source').get("description")[:45].replace('\n', ';') + "..." if hit.get('_source') else "")
    # 创建一个 nx3 的子图
    n = int((lindorm.top_k + 2) / 3)
    fig, axes = plt.subplots(n, 3, figsize=(25, 15))
    # 遍历图片 URL
    for ax, hit in zip(axes.flatten(), hits):
        url = hit.get('_id')
        # 在子图中显示图片
        img = Image.open(url)
        # print(url, img)
        ax.imshow(img)
        ax.axis('off')  # 隐藏坐标轴
    plt.tight_layout()  # 调整布局
    plt.show()

# 定义按钮点击事件处理函数
def on_button_clicked(b):
    with output:
        clear_output()  # 清除上次输出
        if text_input.value:
            code, embedding = lindorm.text_embedding(text_input.value)
        else:
            print("请先输入描述文字")
            return
        
        if b.description == "纯向量检索":
            hits = lindorm.knn_search(embedding)
        elif b.description == "RRF融合检索":
            if text_input.value == '':
                print("请先输入关键字")
                return
            hits = lindorm.rrf_search(text_input.value, embedding)
        print("文本搜图")
        show_hits(hits)
        
# 定义图片显示函数
def show_uploaded_image(change):
    with output:
        clear_output()  # 清除上次输出
        if change['new']:
            # 获取上传的文件内容
            uploaded_file = change['new'][0]
            content = uploaded_file['content']
            # 使用 PIL 打开图片
            image = Image.open(io.BytesIO(content))
            # 使用 matplotlib 显示图片
            plt.imshow(image)
            plt.axis('off')  # 不显示坐标轴
            plt.show()
            code, embedding = lindorm.picture_embedding(content)
            hits = lindorm.knn_search(embedding)
            print("图片上传检索")
            show_hits(hits)
            
        
for button in search_buttons:
    button.on_click(lambda b: on_button_clicked(b))
    
# 绑定文件上传事件
file_input.observe(show_uploaded_image, names='value')

display(file_input, text_input, *search_buttons, output)