保存 OpenAI API Key 為環境變量

In [1]:
from getpass import getpass
import os
from dotenv import load_dotenv

# 載入環境變數
load_dotenv()
# 兩個 API 的密鑰
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key:")

In [2]:
from haystack.dataclasses import ChatMessage
from haystack.components.generators.chat import OpenAIChatGenerator

# 創建系統消息和用戶消息的 ChatMessage 對象
messages = [
    ChatMessage.from_system(
        "即使某些輸入資料採用其他語言，也始終以繁體中文回應。"
    ),
    ChatMessage.from_user(
        "什麼是自然語言處理？要簡潔。"
    ),
]

# 初始化 OpenAIChatGenerator
chat_generator = OpenAIChatGenerator(model="gpt-4-turbo")
# 傳入消息並運行
chat_generator.run(messages=messages)

  from .autonotebook import tqdm as notebook_tqdm


{'replies': [ChatMessage(content='自然語言處理（NLP）是一們資訊科技領域，致力於讓計算機能夠理解、解釋、操作和生成人類語言。这项技术涉及語言學、計算機科學和人工智慧的交叉發展。', role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-4-turbo-2024-04-09', 'index': 0, 'finish_reason': 'stop', 'usage': {'completion_tokens': 96, 'prompt_tokens': 67, 'total_tokens': 163}})]}

In [3]:
from haystack.dataclasses import ChatMessage
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.components.generators.utils import print_streaming_chunk

# 使用流式回調函數初始化 OpenAIChatGenerator
chat_generator = OpenAIChatGenerator(
    model="gpt-4-turbo",
    streaming_callback=print_streaming_chunk
)
# 傳入消息並運行
response = chat_generator.run(messages=messages)

自然語言處理是人工普遍和語言學領域的一部分，它幫助計算機理解、解釋、操作和生成人類語言。

In [4]:
from haystack import Pipeline, Document
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.writers import DocumentWriter
from haystack.components.embedders import SentenceTransformersDocumentEmbedder

# 創建文件
documents = [
    Document(content="我的名字是 Jean，我住在 Paris。"),
    Document(content="我的名字是 Mark，我住在 Berlin。"),
    Document(content="我的名字是 Giorgio，我住在 Rome。"),
    Document(content="我的名字是 Marta，我住在 Madrid。"),
    Document(content="我的名字是 Harry，我住在 London。"),
]

# 初始化內存文件儲存
document_store = InMemoryDocumentStore()

# 創建索引管道
indexing_pipeline = Pipeline()
indexing_pipeline.add_component(
    instance=SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"), name="doc_embedder"
)
indexing_pipeline.add_component(
    instance=DocumentWriter(document_store=document_store),
    name="doc_writer"
)

# 連接嵌入器和文件寫入器
indexing_pipeline.connect(
    "doc_embedder.documents",
    "doc_writer.documents"
)

# 運行管道
indexing_pipeline.run({
    "doc_embedder": {"documents": documents}
})

Batches: 100%|██████████| 1/1 [00:01<00:00,  2.00s/it]


{'doc_writer': {'documents_written': 5}}

In [5]:
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator

# 定義提示模板
template = """
根據給定的上下文回答問題。

上下文:
{% for document in documents %}
    {{ document.content }}
{% endfor %}
問題: {{ question }}
答案:
"""

# 創建 RAG 管道
rag_pipe = Pipeline()
rag_pipe.add_component(
    "embedder",
    SentenceTransformersTextEmbedder(
        model="sentence-transformers/all-MiniLM-L6-v2"
    )
)
rag_pipe.add_component(
    "retriever",
    InMemoryEmbeddingRetriever(
        document_store=document_store
    )
)
rag_pipe.add_component(
    "prompt_builder",
    PromptBuilder(
        template=template
    )
)
rag_pipe.add_component(
    "llm",
    OpenAIGenerator(model="gpt-4-turbo")
)

# 連接組件
rag_pipe.connect(
    "embedder.embedding",
    "retriever.query_embedding"
)
rag_pipe.connect(
    "retriever",
    "prompt_builder.documents"
)
rag_pipe.connect(
    "prompt_builder",
    "llm"
)

<haystack.core.pipeline.pipeline.Pipeline object at 0x325298a30>
🚅 Components
  - embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - prompt_builder: PromptBuilder
  - llm: OpenAIGenerator
🛤️ Connections
  - embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> prompt_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.prompt (str)

In [6]:
query = "Mark 住在哪裡？"
rag_pipe.run({
    "embedder": {"text": query},
    "prompt_builder": {"question": query}
})

Batches: 100%|██████████| 1/1 [00:01<00:00,  1.23s/it]


{'llm': {'replies': ['Mark 住在 Berlin。'],
  'meta': [{'model': 'gpt-4-turbo-2024-04-09',
    'index': 0,
    'finish_reason': 'stop',
    'usage': {'completion_tokens': 7,
     'prompt_tokens': 127,
     'total_tokens': 134}}]}}

In [7]:
def rag_pipeline_func(query: str):
    result = rag_pipe.run({
        "embedder": {"text": query},
        "prompt_builder": {"question": query}
    })
    return {"reply": result["llm"]["replies"][0]}

In [8]:
WEATHER_INFO = {
    "Berlin": {
        "weather": "mostly sunny", "temperature": 7, "unit": "celsius"
    },
    "Paris": {
        "weather": "mostly cloudy", "temperature": 8, "unit": "celsius"
    },
    "Rome": {
        "weather": "sunny", "temperature": 14, "unit": "celsius"
    },
    "Madrid": {
        "weather": "sunny", "temperature": 10, "unit": "celsius"
    },
    "London": {
        "weather": "cloudy", "temperature": 9, "unit": "celsius"
    },
}

def get_current_weather(location: str):
    if location in WEATHER_INFO:
        return WEATHER_INFO[location]
    else:
        # 回退數據
        return {
            "weather": "sunny",
            "temperature": 21.8,
            "unit": "fahrenheit"
        }

In [9]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "rag_pipeline_func",
            "description": "獲取有關人們居住地點的信息",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "搜尋中使用的查詢。從用戶的消息中推斷出這一點。它應該是一個問題或一個陳述。",
                    }
                },
                "required": ["query"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "取得當前天氣",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "城市和州，例如加州舊金山"
                    }
                },
                "required": ["location"],
            },
        },
    },
]

In [10]:
from haystack.dataclasses import ChatMessage
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.components.generators.utils import print_streaming_chunk

# 創建消息列表，包含系統消息和用戶查詢
messages = [
    ChatMessage.from_system(
        "不要假設將哪些值插入函數中。如果用戶要求不明確，請要求澄清。"
    ),
    ChatMessage.from_user("你能告訴我 Mark 住在哪裡嗎？"),
]

# 初始化 OpenAIChatGenerator
chat_generator = OpenAIChatGenerator(
    model="gpt-4-turbo",
    streaming_callback=print_streaming_chunk
)
# 傳入消息和工具列表並運行
response = chat_generator.run(
    messages=messages,
    generation_kwargs={"tools": tools}
)
# 輸出查看
print(response)

{'replies': [ChatMessage(content='[{"index": 0, "id": "call_2ARyzeivzPqS0ETx3jVHt4ct", "function": {"arguments": "{\\"query\\":\\"Where does Mark live?\\"}", "name": "rag_pipeline_func"}, "type": "function"}]', role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-4-turbo-2024-04-09', 'index': 0, 'finish_reason': 'tool_calls', 'usage': {}})]}
