In [41]:
# from dotenv import load_dotenv
# load_dotenv()
import os
os.environ["OPENAI_API_KEY"]
import logging
logging.basicConfig(level=logging.INFO)

## 初始化数据库

In [72]:
from pymilvus import Collection, MilvusException, connections, db, utility

# 链接数据库
conn = connections.connect(host="127.0.0.1", port=19530)
# 获取所有的数据表： ['default', 'milvus_db', 'hgh']
existing_databases = db.list_database()

db_name = 'milvus_db'
if db_name in existing_databases:
    # 使用某个数据库
    db.using_database(db_name)
    # 删除数据库
    db.drop_database(db_name)
    print(f'使用并删除数据库:{db_name}')

# 创建数据库
db.create_database(db_name)
print(f'创建数据库: {db_name}')

创建数据库: milvus_db


In [60]:
# 删除集合
for collection_name in collections:
    collection = Collection(name=collection_name)
    collection.drop()

In [1]:
from langchain_openai import OpenAIEmbeddings
from langchain_milvus import Milvus

embeddings = OpenAIEmbeddings(model='text-embedding-3-small')

vector_store = Milvus(
    embedding_function=embeddings,
    connection_args={"uri": 'http://192.168.255.10:19530/', "db_name": "milvus_db"},
    collection_name="langchain_milvus",
    index_params={
        "index_type": "FLAT",  # 指定索引类型为 FLAT，这是一种精确搜索的方式。
        "metric_type": "L2" # 使用欧氏距离进行比较。
    },
    consistency_level="Strong", # 指定一致性级别为 Strong，保证在所有副本上数据的一致性。
    drop_old=False # 设置为 False，意味着不会删除旧的数据或索引。
)
vector_store
# query = "What are the novels Lila has written and what are their contents?"
# # 在存储的向量数据中查找与查询向量相似的数据。
# vector_store.similarity_search(
#     query, k=1, 
#     ranker_type="weighted",  # 使用 "weighted" 类型的排序方法。它可能根据某种加权机制评估相似度。
#     ranker_params={"weights": [0.6, 0.4]} # 排序参数
# )

<langchain_milvus.vectorstores.milvus.Milvus at 0x208f8e26690>

## 创建集合数据

In [None]:
from IPython.display import display

from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and 111111111 eggs for breakfast this morning.",
    metadata={ 'namespace':'ankush',
               "source": "tweet"
             }
)
document_2 = Document(
    page_content="The weather forecast for tomorrow 11111111111is cloudy and overcast, with a high of 62 degrees.",
    metadata={'namespace':'harrison',
              "source": "news"
             },
)
documents = [document_1, document_2]
uuids = [str(uuid4()) for _ in range(len(documents))]

# 这种方式多个metadata会报错
# results = vector_store.add_documents(documents=documents, ids=uuids)

vectorstore = Milvus.from_documents(
    documents,
    embeddings,
    connection_args={"uri": 'http://192.168.255.10:19530/', "db_name": "milvus_db"},
    drop_old=True,
    collection_name="langchain_milvus",
    partition_key_field="namespace",  # Use the "namespace" field as the partition key
)

display(vectorstore)
print('---')
# documents

In [110]:
vectorstore

<langchain_milvus.vectorstores.milvus.Milvus at 0x1d628ee2ea0>

## 数据准备