Skip to content

Unable to search #105

@minglong-huang

Description

@minglong-huang

My configuration is as follows:
psycopg==3.2.3

Here is my build code:

from xinference.client import Client
from langchain_postgres import PGVector
from input.load_input import load_input
from langchain_core.documents import Document
import psycopg
import numpy as np

client = Client("http://192.0.0.181:9997")
list_models_run = client.list_models()
# model_uid = client.launch_model(model_name='local-bge-reranker-v2-m3', model_type="rerank") #运行新模型 结束时要注销掉
model_uid = list_models_run['bge-m3']['id']
embedding_client = client.get_model(model_uid)

create_table_query = """
CREATE TABLE documents (
    id SERIAL PRIMARY KEY,
    content TEXT NOT NULL,
    embedding FLOAT[] NOT NULL
);
"""

insert_query = """
INSERT INTO documents (content, embedding)
VALUES (%s, %s);
"""
# 连接到数据库
conn = psycopg.connect(
    dbname="grag",
    user="grag",
    password="netted",
    host="192.0.0.181"
)
cur = conn.cursor()

cur.execute("SELECT to_regclass('public.documents');")
table_exists = cur.fetchone()[0] is not None
if not table_exists:
    cur.execute(create_table_query)


def text_embed(text_unit,embedding_model):

    for row in text_unit.itertuples(index=True):
        text_list = row.text
        ids_list = []
        text_embedding_list = []
        metadatas_list = []
        for index,text in enumerate(text_list):
            text_id = f'{row.id}{index}'
            # 1024 dim
            text_embedding = embedding_client.create_embedding(text)['data'][0]['embedding']
            cur.execute(insert_query, (text,text_embedding))

path = f'/run/back/nlp/ql/dlz01/d1'
text_units = load_input(path,'txt')
text_embed(text_units,embedding_client)

I successfully embedded the data, and you can see the following in pgAdmin:

1732179169363

Here is my search code:

from xinference.client import Client
# from langchain_postgres import PGVector
from input.load_input import load_input
from langchain_core.documents import Document
import psycopg

client = Client("http://192.0.0.181:9997")
list_models_run = client.list_models()
# model_uid = client.launch_model(model_name='local-bge-reranker-v2-m3', model_type="rerank") #运行新模型 结束时要注销掉
model_uid = list_models_run['bge-m3']['id']
embedding_client = client.get_model(model_uid)


def get_db_connection():
    return psycopg.connect(
        dbname="grag",
        user="grag",
        password="netted",
        host="192.0.0.181"
    )


# 定义一个函数来获取最相似的5个记录
def get_most_similar_contents(input_vector, limit=5):
    # 将输入向量转换为PostgreSQL数组格式
    #input_vector_str = '[' + ','.join(map(str, input_vector)) + ']'

    # SQL查询
    query = f"""
    SELECT content, embedding
    FROM documents
    ORDER BY embedding <-> %(query_embedding)s DESC
    LIMIT %(k)s;
    """

    # 执行查询并获取结果
    with get_db_connection() as conn:
        with conn.cursor() as cur:
            cur.execute(query,{'query_embedding': input_vector, 'k': 5})
            results = cur.fetchall()

    # 返回结果
    return results

def get_first_content_and_embedding():
    # SQL查询,获取第一行数据
    query = """
    SELECT content, embedding
    FROM documents
    LIMIT 1;
    """

    # 执行查询并获取结果
    with get_db_connection() as conn:
        with conn.cursor() as cur:
            cur.execute(query)
            result = cur.fetchone()  # 使用fetchone()获取第一行数据

    # 返回结果
    return result

quuery = '4.根据权利要求3所述的一种用于合金的等离子体浸没离子注入一体机,其特征在于:所述传动齿轮(12)的半圆周上设有轮齿,另半圆周为光滑面,所述卡齿盘(13)的中部开设有传动槽,所述传动槽的上下两端均设置有一排轮齿,传动齿轮(12)与两排轮齿啮合。'
input_vector = embedding_client.create_embedding(quuery)['data'][0]['embedding']
# 调用函数并打印结果
similar_contents = get_most_similar_contents(input_vector)
for content, embedding, similarity in similar_contents:
    print(f"Content: {content}\nSimilarity: {similarity}\n")

# first_content_and_embedding = get_first_content_and_embedding()
# Content = first_content_and_embedding[0]
# Embedding = first_content_and_embedding[1]

Search failed with the following error message:
image

Why is it like this? There is clearly this operator
Does pgvertor not support double precision operation?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions