## PicturesImport - 批量导入图片
### 数据源介绍
    数据源 102flowers: https://thor.robots.ox.ac.uk/flowers/102/102flowers.tgz (329M)
    下载测试数据，解压后，在data目录下

### 数据入库
  首先配置待导入的数据目录

  目录中所有图片数据经过 lindorm ai 引擎进行向量化，将向量写入lindorm

In [None]:
import ipywidgets as widgets

# 创建输入框和按钮
dir_input = widgets.Text(
    value='data/jpg',
    placeholder='data/jpg',
    description='图片目录:',
    disabled=False,
    layout=widgets.Layout(width='auto')
)

display(dir_input)

In [None]:
import os
# -*- coding: utf-8 -*-

import concurrent
from tqdm import tqdm

from concurrent.futures import ThreadPoolExecutor
from src.lindorm import Lindorm

lindorm = Lindorm()

def handle_picture(file_path: str):
    # 读取本地图片内容
    with open(file_path, 'rb') as f:
        content = f.read()
        # print(f"读取本地图片内容 {file_path}, size {len(content)}")
        code, embedding = lindorm.picture_embedding(content)
        if code != 0:
            print(f"图文向量化失败 {file_path}, error {embedding}")
            raise Exception(f"图文向量化失败 {file_path}, error {embedding}")
        return lindorm.write_doc(file_path, embedding)


def import_all_data(dir: str):
    # 遍历目录下所有图片
    print('导入数据目录:', dir)
    file_paths = [os.path.join(dir, f) for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f)) and f.endswith('.jpg')]
    
    with ThreadPoolExecutor(max_workers=8) as executor:
        # 创建一个任务列表
        future_to_record = {executor.submit(handle_picture, key): key for key in file_paths}
        
        # 使用 tqdm 显示进度条
        for future in tqdm(concurrent.futures.as_completed(future_to_record), total=len(file_paths), desc="Importing Data"):
            result = future.result()
    print("向量化后的数据入库完成")


if __name__ == '__main__':
    print("start")
    lindorm = Lindorm()
    
    if lindorm.get_index() is not None:
        print("索引已存在，删除")
        lindorm.drop_index()
    lindorm.create_search_index()
    print("索引创建完成")
    import_all_data(dir_input.value)
