In [4]:
import pandas as pd
import numpy as np
import ast # 用于解析看起来像 Python 字面量或元组、列表、字典、数字、字符串、布尔值和 None 的字符串
from openai import OpenAI
import os
import dotenv

# 1.读取存储向量

In [5]:
# 读取评论向量化的结果集
df = pd.read_csv('data/评论的向量画结果集.csv')
# 把str转换成矩阵
df['embedding_vec'] = df['embedding'].apply(ast.literal_eval)
print(len(df['embedding_vec'][0]))

1536


# 2.计算向量距离

In [6]:
# 计算两个向量之间的余弦距离 (向量空间中，语义相似的词或文本，距离是靠近的)
def cosine_distance(a, b):
    return np.dot(a,b)/(np.linalg.norm(a) * np.linalg.norm(b))

# 3.相似搜索

In [8]:
# 加载环境变量
dotenv.load_dotenv()
# 客户端
client = OpenAI(
    base_url = os.getenv('OPENAI_API_BASE'),
    api_key = os.getenv('OPENAI_API_KEY')
)

# 读取客户端响应
def embedding_resp(text):
    resp = client.embeddings.create(input=text, model='text-embedding-3-small')
    return resp.data[0].embedding
    
# 根据指定文本去向量空间进行相似搜索
def search_by_word(df, work_key, n_result=3, print_flag=True):
    # 指定文本向量化
    word_embedding = embedding_resp(work_key)
    # 计算相似度
    df['similarity'] = df.embedding_vec.apply(lambda x: cosine_distance(x, word_embedding))
    # 按相似度排序并去除Title和Content
    res = (
        df.sort_values('similarity', ascending=False)
        .head(n_result)
        .combined.str.replace('Title:', "")
        .str.replace('; Content:', ';')
    )
    # 打印
    if print_flag:
        for r in res:
            print(r)
            print()
    return res

In [10]:
# 查询
search_by_word(df, 'awful', 3)
print('搜索结束....')

Good standard English breakfast - decent brand!;This English Breakfast tea from Higgins & Burke, is good: it's basic, good tasting black tea (English Breakfast blend) and is tasty.  I wasn't blown away by the flavor, but it was good: I enjoyed the tea, it brewed up quickly in these nice tea bags (I recommend about one minute of brew time), and then added a splash of cream: it was quite nice!

Mellow;This honey made from blueberry blossoms has a milder, more mellow flavor profile than a lot of other honeys. It's really good on waffles because it doesn't overwhelm the vanilla in them.

Wild Honey;This really is unfiltered honey made from wildflowers, so if you have allergies expect a reaction, at least initially. If you are planning to visit Central Florida on vacation this is a good buy to prep your immune system against the local flora. The flavor is a little stronger than single cultivar honeys, so you don't need as much to flavor your food and drink with. I detect more than a hint of