In [14]:
# 初始化環境
from rag_to_vector_db_pinecone.rag_pipeline.file_2_text import file_2_text
from rag_to_vector_db_pinecone.rag_pipeline.text_2_chunks import text_2_chunks
from rag_to_vector_db_pinecone.rag_pipeline.chunks_embeding_2_vector_db import (
    jina_embedding,
    embeddings_2_vector_db
)

import json
from pinecone import Pinecone, ServerlessSpec
# 載入 env 設定
with open('env.json', 'r', encoding='utf-8') as file:
    config = json.load(file)

# 初始化 Pinecone gRPC 客戶端
    pc = Pinecone(api_key=config['pinecone_api_key'])
    index = pc.Index("rag-vector-db-test")

In [None]:
# 使用者輸入查詢 pinecone
def query_pincone(query, config):
    query_embedding = jina_embedding(query, config)
    results = index.query(
        vector=query_embedding,
        top_k=3,
        include_metadata=True
    )

    matches = results['matches']
    # 用 id 匹配資料庫文字
    with open('data/chunks.json', 'r', encoding='utf-8') as file:
        chunks = json.load(file)

    match_chunks_data = []
    for match in matches:
        for chunk in chunks:
            if chunk['id'] == match['id']:
                match_chunks_data.append({
                    "id" : match['id'],
                    "score" : match['score'],
                    "text" : chunk['text']
                })
                break

    return match_chunks_data


def create_augmented_prompt(query, match_chunks_data):
    # 將檢索到的上下文整合成一個字符串
    contexts = ""
    for i, context in enumerate(match_chunks_data):
        contexts += f"參考資料 {i+1} [相關度: {context['score']:.2f}]:\n{context['text']}\n\n"
    
    # 構建結構化提示
    prompt = f"""以下是使用者的查詢問題，請根據提供的參考資料回答。如果參考資料中沒有相關資訊，請明確說明。

    使用者查詢: {query}

    {contexts}

    請根據以上參考資料提供準確、簡潔且有條理的回答。引用資料時請註明來源編號。"""
    
    return prompt

if __name__ == "__main__":
    query = '黴菌的生物學特性'
    match_chunks_data = query_pincone(query, config)
    from pprint import pprint
    pprint(match_chunks_data, sort_dicts=False)
    print('\n\n===================prompt==============')
    prompt = create_augmented_prompt(query, match_chunks_data)
    print(prompt)

In [21]:
# 設定呼叫大型語言模型
from langchain_openai import ChatOpenAI

# 初始化 OpenAI 模型
llm = ChatOpenAI(
    model="gpt-3.5-turbo",  # 可以改為 gpt-3.5-turbo 測試
    openai_api_key=config["chat_gpt_key"]
)

def query_llm(prompt):
    try:
        response = llm.invoke(prompt)
        return response.content
    except Exception as e:
        return f"An error occurred: {e}"
    
if __name__ == "__main__":
    result = query_llm('我想出去玩')
    print(result)

An error occurred: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************B-cA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
