-
Notifications
You must be signed in to change notification settings - Fork 82
Closed
Description
My configuration is as follows:
psycopg==3.2.3
Here is my build code:
from xinference.client import Client
from langchain_postgres import PGVector
from input.load_input import load_input
from langchain_core.documents import Document
import psycopg
import numpy as np
client = Client("http://192.0.0.181:9997")
list_models_run = client.list_models()
# model_uid = client.launch_model(model_name='local-bge-reranker-v2-m3', model_type="rerank") #运行新模型 结束时要注销掉
model_uid = list_models_run['bge-m3']['id']
embedding_client = client.get_model(model_uid)
create_table_query = """
CREATE TABLE documents (
id SERIAL PRIMARY KEY,
content TEXT NOT NULL,
embedding FLOAT[] NOT NULL
);
"""
insert_query = """
INSERT INTO documents (content, embedding)
VALUES (%s, %s);
"""
# 连接到数据库
conn = psycopg.connect(
dbname="grag",
user="grag",
password="netted",
host="192.0.0.181"
)
cur = conn.cursor()
cur.execute("SELECT to_regclass('public.documents');")
table_exists = cur.fetchone()[0] is not None
if not table_exists:
cur.execute(create_table_query)
def text_embed(text_unit,embedding_model):
for row in text_unit.itertuples(index=True):
text_list = row.text
ids_list = []
text_embedding_list = []
metadatas_list = []
for index,text in enumerate(text_list):
text_id = f'{row.id}{index}'
# 1024 dim
text_embedding = embedding_client.create_embedding(text)['data'][0]['embedding']
cur.execute(insert_query, (text,text_embedding))
path = f'/run/back/nlp/ql/dlz01/d1'
text_units = load_input(path,'txt')
text_embed(text_units,embedding_client)
I successfully embedded the data, and you can see the following in pgAdmin:
Here is my search code:
from xinference.client import Client
# from langchain_postgres import PGVector
from input.load_input import load_input
from langchain_core.documents import Document
import psycopg
client = Client("http://192.0.0.181:9997")
list_models_run = client.list_models()
# model_uid = client.launch_model(model_name='local-bge-reranker-v2-m3', model_type="rerank") #运行新模型 结束时要注销掉
model_uid = list_models_run['bge-m3']['id']
embedding_client = client.get_model(model_uid)
def get_db_connection():
return psycopg.connect(
dbname="grag",
user="grag",
password="netted",
host="192.0.0.181"
)
# 定义一个函数来获取最相似的5个记录
def get_most_similar_contents(input_vector, limit=5):
# 将输入向量转换为PostgreSQL数组格式
#input_vector_str = '[' + ','.join(map(str, input_vector)) + ']'
# SQL查询
query = f"""
SELECT content, embedding
FROM documents
ORDER BY embedding <-> %(query_embedding)s DESC
LIMIT %(k)s;
"""
# 执行查询并获取结果
with get_db_connection() as conn:
with conn.cursor() as cur:
cur.execute(query,{'query_embedding': input_vector, 'k': 5})
results = cur.fetchall()
# 返回结果
return results
def get_first_content_and_embedding():
# SQL查询,获取第一行数据
query = """
SELECT content, embedding
FROM documents
LIMIT 1;
"""
# 执行查询并获取结果
with get_db_connection() as conn:
with conn.cursor() as cur:
cur.execute(query)
result = cur.fetchone() # 使用fetchone()获取第一行数据
# 返回结果
return result
quuery = '4.根据权利要求3所述的一种用于合金的等离子体浸没离子注入一体机,其特征在于:所述传动齿轮(12)的半圆周上设有轮齿,另半圆周为光滑面,所述卡齿盘(13)的中部开设有传动槽,所述传动槽的上下两端均设置有一排轮齿,传动齿轮(12)与两排轮齿啮合。'
input_vector = embedding_client.create_embedding(quuery)['data'][0]['embedding']
# 调用函数并打印结果
similar_contents = get_most_similar_contents(input_vector)
for content, embedding, similarity in similar_contents:
print(f"Content: {content}\nSimilarity: {similarity}\n")
# first_content_and_embedding = get_first_content_and_embedding()
# Content = first_content_and_embedding[0]
# Embedding = first_content_and_embedding[1]
Search failed with the following error message:
Why is it like this? There is clearly this operator
Does pgvertor not support double precision operation?
Metadata
Metadata
Assignees
Labels
No labels