# 上週作業

## 這個部份上周剛好講過，所以跳過，自己看。

In [None]:
import os

os.chdir("../../../")

In [None]:
import json

from langchain_community.vectorstores import FAISS
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate, ChatPromptTemplate
from langchain_community.embeddings import HuggingFaceEmbeddings

from src.io.path_definition import get_project_dir


embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

example_prompt = PromptTemplate(
    input_variables=["input", "output"],
    template="Ingredients: {input}\nOrigin: {output}",
)

with open(os.path.join(get_project_dir(), 'tutorial', 'LLM+Langchain', 'Week-1', 'recipe_train.json'), 'r') as f:
    recipe_train = json.load(f)

examples = []

for recipe in recipe_train[:1000]:
    examples.append({"input": " ".join(recipe['ingredients']),
                     "output": recipe['cuisine']})

example_selector = SemanticSimilarityExampleSelector.from_examples(
    # The list of examples available to select from.
    examples,
    # The embedding class used to produce embeddings which are used to measure semantic similarity.
    embeddings,
    # The VectorStore class that is used to store the embeddings and do a similarity search over.
    FAISS,
    # The number of examples to produce.
    k=5,
)
similar_prompt = FewShotPromptTemplate(
    # We provide an ExampleSelector instead of examples.
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix="Find the recipe origin based on the ingredients",
    suffix="Ingredients: {ingredients}\nOrigin:",
    input_variables=["ingredients"],
)

In [None]:
similar_prompt

In [None]:
with open(os.path.join(get_project_dir(), 'tutorial', 'LLM+Langchain', 'Week-1', 'recipe_test.json'), 'r') as f:
    recipe_test = json.load(f)

existing_ingredients = recipe_test[99]['ingredients']

similar_prompt.invoke(", ".join(existing_ingredients))

In [None]:
from langchain.chat_models import ChatOpenAI

from src.initialization import credential_init

credential_init()

model = ChatOpenAI(openai_api_key=os.environ['OPENAI_API_KEY'],
                   model_name="gpt-4o-2024-05-13", temperature=0)

In [None]:
pipeline_ = similar_prompt|model

In [None]:
pipeline_.invoke(", ".join(existing_ingredients))

##  飛安報告

In [None]:
from src.io.path_definition import get_project_dir

import pandas as pd

df = pd.read_excel(os.path.join(get_project_dir(), 'tutorial', 'LLM+Langchain', 'Week-3', 'Data sample.xlsx'))

### 回家作業 1

若要飛安事故報告可以有複數分類結果，如何調整Prompt，包含parser

In [None]:
def build_standard_chat_prompt_template_v2(kwargs):

    messages = []
    
    for key in ['system', 'few_shot', 'human']:
        if kwargs.get(key):
            if key == 'system':
                system_content = kwargs['system']
                system_prompt = PromptTemplate(**system_content)
                message = SystemMessagePromptTemplate(prompt=system_prompt)
            elif key == 'human':
                human_content = kwargs['human']
                human_prompt = PromptTemplate(**human_content)
                message = HumanMessagePromptTemplate(prompt=human_prompt)
            else:
                few_shot_content = kwargs['few_shot']
                message = FewShotChatMessagePromptTemplate(**few_shot_content)
            
            messages.append(message)
    
    chat_prompt = ChatPromptTemplate.from_messages(messages)

    return chat_prompt

In [None]:
from langchain.prompts import PromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate
from langchain.output_parsers import StructuredOutputParser, ResponseSchema


system_template = '''You are an AI assistant assigned with a task of safty report classification based on the content.
    You are a seasoned flight safety inspector with deep and extensive knowledge of aviation safty. 
    
    You always do the best work you can. You are highly analytical and pay close attention to details. 
    
    The candidates of the output are:

    - `Organizational Influence;Resource Management`
    - `Organizational Influence;Organizational Climate`
    - `Organizational Influence;rganizational Process`
    - `Unsafe Supervisions;Inadequate Supervision`
    - `Unsafe Supervisions;Planned Inappropriate Operations`
    - `Unsafe Supervisions;Failed to Correct Problem`
    - `Unsafe Supervisions;Supervisory Violation`
    - `Precondition for Unsafe Acts;Environmental Factors;Physical Environment`
    - `Precondition for Unsafe Acts;Environmental Factors;Technological Environment`
    - `Precondition for Unsafe Acts;Condition of Operators;Adverse Mental State`
    - `Precondition for Unsafe Acts;Condition of Operators;Adverse Physiological State`
    - `Precondition for Unsafe Acts;Condition of Operators;Physical/Mental Limitations`
    - `Precondition for Unsafe Acts;Personnel Factors;Crew Resource Management`
    - `Precondition for Unsafe Acts;Personnel Factors;Personal Readiness`
    - `Unsafe Acts;Errors;Decision Errors`
    - `Unsafe Acts;Errors;Skill-Based Errors`
    - `Unsafe Acts;Errors;Perceptual Errors`
    - `Unsafe Acts;Violations;Routine`
    - `Unsafe Acts;Violations;Exceptional`

     The report can involve multiple categories.
    '''


human_template = """{report}; format instruction: {format_instructions}"""

response_schemas = [
        ResponseSchema(name="categories", description="The predicted categories as a python list")
    ]


output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()


input_ = {"system": {"template": system_template},
          "human": {"template": human_template,
                    "input_variable": ["report"],
                    "partial_variables": {'format_instructions': format_instructions}}}

chat_prompt_template = build_standard_chat_prompt_template_v2(input_)

pipeline_ = chat_prompt_template|model|output_parser

In [None]:
text = df.iloc[3]['Report 1']
print(text)

In [None]:
output = pipeline_.invoke({"report": text})
print(output)

### 回家作業 2

你可以很清楚的看到一個飛安事故中，可以出現複數報告。
將`Report 1` 和 `Report 1.2` 結合起來產生一份的新報告。

抄也是一門技術

In [None]:
system_template = """You are an AI assistant assigned with a task of safty report classification based on the content.
    You are a seasoned flight safety inspector with deep and extensive knowledge of aviation safty. 

    You will receieve two reports <report_1> and <report_2> and you will consolidate the content before drawing conclusion. 
    
    You always do the best work you can. You are highly analytical and pay close attention to details. 
    
    The candidates of the output are:

    - `Organizational Influence;Resource Management`
    - `Organizational Influence;Organizational Climate`
    - `Organizational Influence;rganizational Process`
    - `Unsafe Supervisions;Inadequate Supervision`
    - `Unsafe Supervisions;Planned Inappropriate Operations`
    - `Unsafe Supervisions;Failed to Correct Problem`
    - `Unsafe Supervisions;Supervisory Violation`
    - `Precondition for Unsafe Acts;Environmental Factors;Physical Environment`
    - `Precondition for Unsafe Acts;Environmental Factors;Technological Environment`
    - `Precondition for Unsafe Acts;Condition of Operators;Adverse Mental State`
    - `Precondition for Unsafe Acts;Condition of Operators;Adverse Physiological State`
    - `Precondition for Unsafe Acts;Condition of Operators;Physical/Mental Limitations`
    - `Precondition for Unsafe Acts;Personnel Factors;Crew Resource Management`
    - `Precondition for Unsafe Acts;Personnel Factors;Personal Readiness`
    - `Unsafe Acts;Errors;Decision Errors`
    - `Unsafe Acts;Errors;Skill-Based Errors`
    - `Unsafe Acts;Errors;Perceptual Errors`
    - `Unsafe Acts;Violations;Routine`
    - `Unsafe Acts;Violations;Exceptional`

     The report can involve multiple categories.
    """

human_template = """
                 report_1:{report_1}; 
                 report_2: {report_2};
                 format instruction: {format_instructions}
                 """

response_schemas = [
        ResponseSchema(name="categories", description="The predicted categories as a python list")
    ]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

input_ = {"system": {"template": system_template},
          "human": {"template": human_template,
                    "input_variable": ["report_1, report_2"],
                    "partial_variables": {'format_instructions': format_instructions}}}

chat_prompt_template = build_standard_chat_prompt_template_v2(input_)

pipeline_ = chat_prompt_template|model|output_parser

In [None]:
df.iloc[3]['Report 1.2']

In [None]:
output = pipeline_.invoke({"report_1": text,
                       "report_2": df.iloc[3]['Report 1.2']})

print(output['categories'])

### Keynote

- 若你更理解你的數據，你可以建立更精確的Prompt，更明確的表示每個數據代表的意義，來提升輸出的品質。

# Remote server

### 1. Making a POST Request (發送 POST 請求):

- requests.post(...) sends an HTTP POST request to the specified URL.
- The URL "http://localhost:5000/openai/invoke" points to a local server running on port 5000, at the endpoint /openai/invoke.
- The json parameter is used to send a JSON payload with the request. In this case, the payload is {'input': "Where is Taiwan"}.
- requests.post(...) 發送一個 HTTP POST 請求到指定的 URL。
- URL "http://localhost:5000/openai/invoke" 指向一個本地服務器，該服務器在端口 5000 上運行，並且指向 /openai/invoke 端點。
- json 參數用於隨請求發送 JSON 負載。在這個例子中，負載是 {'input': "Where is Taiwan"}。

### 2. Response Handling (響應處理):

- The server processes the request and sends back a response.
- The response is stored in the response variable, which can then be inspected or used further in the code.
- 服務器處理請求並返回響應。
- 響應存儲在 response 變量中，之後可以檢查或在代碼中進一步使用。

In [None]:
import requests

response = requests.post(
    "http://localhost:5000/openai/invoke",
    json={'input': "Where is Taiwan"}
)

In [None]:
response.json()

In [None]:
response.json()['output']['content']

# Use the remote model as `Software as a service` (SaaS)

## Basic Usage

### 1. Creating an Instance of RemoteRunnable (創建 RemoteRunnable 的實例):

- This line creates an instance of RemoteRunnable and initializes it with the URL of the remote language model service. In this case, the service is running locally on http://localhost:5000/openai/.
- 這行代碼創建一個 RemoteRunnable 的實例，並用遠程語言模型服務的 URL 進行初始化。在這個例子中，服務在本地運行，URL 為 http://localhost:5000/openai/。

In [None]:
from langserve import RemoteRunnable

remote_llm = RemoteRunnable("http://localhost:5000/openai/")

### 2. Asynchronous Streaming of Responses (異步流式處理回應):

- llm.astream("Where is Taiwan?") sends the query "Where is Taiwan?" to the remote service and retrieves the response as a stream.
- async for msg in ... is used to handle the streaming responses asynchronously.
- print(msg.content, end="", flush=True) prints each message content received from the stream without adding a new line after each message, and flushes the output buffer to ensure the message is displayed immediately.
- llm.astream("Where is Taiwan?") 將查詢 "Where is Taiwan?" 發送到遠程服務，並以流的形式檢索回應。
- async for msg in ... 用於異步處理流式回應。
- print(msg.content, end="", flush=True) 打印每個從流中接收到的消息內容，不在每個消息後添加新行，並刷新輸出緩衝區以確保消息立即顯示。

In [None]:
# Supports astream
async for msg in remote_llm.astream("Where is Taiwan?"):
    print(msg.content, end="", flush=True)

In [None]:
output = remote_llm.invoke("Where is Taiwan?")

In [None]:
output.content

## Make the external service a part of the chain

### 1. Comedian Chain (喜劇演員鏈)

- ChatPromptTemplate.from_messages(...) creates a prompt template where the system prompt instructs the model to either tell a joke or state a fact, and the human prompt provides the input.
- This template is then piped (|) to a language model (llm) to generate the comedian's response.
- ChatPromptTemplate.from_messages(...) 創建一個提示模板，其中系統提示指示模型要麼講一個笑話，要麼陳述一個不搞笑的事實，並且僅輸出一個。
- 然後將此模板通過管道（|）傳遞給語言模型（llm），以生成喜劇演員的回應。

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

system_template = """
                  You are a comedian that sometimes tells funny jokes and other times you just state facts that are not funny. 
                  Please either tell a joke or state fact now but only output one.
                  """

human_template = """
                 {input}
                 """


input_ = {"system": {"template": system_template},
          "human": {"template": human_template,
                    "input_variable": ["input"]}}

chat_prompt_template = build_standard_chat_prompt_template_v2(input_)

comedian_pipeline_ = chat_prompt_template|remote_llm


### 2. Joke Classifier Chain

- This chain is similar to the comedian chain but serves a different purpose.
- The system prompt asks the model to classify the joke as "funny" or "not funny" and repeat the first five words for reference.
- This template is also piped to the language model (llm).
- 這個鏈與喜劇演員鏈類似，但用途不同。
- 系統提示要求模型將笑話分類為“搞笑”或“不搞笑”，並重複笑話的前五個詞以供參考。
- 此模板也通過管道傳遞給語言模型（llm）。

In [None]:
system_template = """
                  Please determine if the joke is funny. Say `funny` if it's funny and `not funny` if not funny. 
                  Then repeat the first five words of the joke for reference...
                  """

human_template = """
                 {joke}
                 """

input_ = {"system": {"template": system_template},
          "human": {"template": human_template,
                    "input_variable": ["joke"]}}

chat_prompt_template = build_standard_chat_prompt_template_v2(input_)

joke_classifier_pipeline_ = chat_prompt_template|remote_llm


### 3. Combining Chains with RunnablePassthrough

- This combines the comedian chain and the joke classifier chain using RunnablePassthrough.assign.
- The comedian chain generates the output, and then this output is passed to the joke classifier chain to classify its humor.
- 這將喜劇演員鏈和笑話分類器鏈結合在一起，使用 RunnablePassthrough.assign。
- 喜劇演員鏈生成輸出，然後將此輸出傳遞給笑話分類器鏈以分類其幽默性。

In [None]:
pipeline_ = {"joke": comedian_pipeline_} | RunnablePassthrough.assign(
    classification=joke_classifier_pipeline_
)

In [None]:
pipeline_.invoke({"input": "A man and a beer"})

# ChatBot

In [None]:
from IPython.display import Image

Image(url="https://python.langchain.com/v0.1/assets/images/chat_use_case-eb8a4883931d726e9f23628a0d22e315.png")

- N-Shot
- The historical chat history can be consdiered as a list of question-answer pairs
- If the chatbot doesn’t remember past chats, it’s called stateless because it doesn’t know what happened before.

## Minimal Example

In [None]:
from langchain_core.messages import HumanMessage, AIMessage

message = HumanMessage(
            content="Translate this sentence from English to Chinese (繁體中文): I love programming."
        )

model.invoke(
    [message]
)

In [None]:
model.invoke([HumanMessage(content="What did you just say?")])

The `memory` is not there, so it does not understand your question.

The following example shows how to add memory into the conversation.

In [None]:
model.invoke(
    [
        HumanMessage(
            content="Translate this sentence from English to  Chinese (繁體中文): I love programming."
        ),
        AIMessage(content="我愛程式設計."),
        HumanMessage(content="What did you just say?"),
    ]
)

## Prompt templates

### 1. Creating a ChatPromptTemplate 

- ChatPromptTemplate.from_messages(...) creates a prompt template for the chatbot.
- The first message in the template is a system message: "You are a helpful assistant. Answer all questions to the best of your ability." This message sets the context and behavior of the assistant, instructing it to be helpful and thorough in its responses.
- MessagesPlaceholder(variable_name="messages") is a placeholder for dynamic content. The variable_name="messages" specifies that this placeholder will be filled with user messages during the conversation.
- ChatPromptTemplate.from_messages(...) 創建了一個聊天機器人的提示模板。
- 模板中的第一條消息是一條系統消息：“You are a helpful assistant. Answer all questions to the best of your ability.” 此消息設置了助手的上下文和行為，指示其在回答中要提供幫助並盡力而為。
- MessagesPlaceholder(variable_name="messages") 是一個動態內容的佔位符。variable_name="messages" 指定該佔位符將在對話中插入用戶消息。

In [None]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

system_prompt = PromptTemplate(template="""
                                        You are a helpful assistant. Answer all questions to the best of your ability.
                                        """)
system_message = SystemMessagePromptTemplate(prompt=system_prompt)

prompt = ChatPromptTemplate.from_messages(
    [
        system_message,
        MessagesPlaceholder(variable_name="messages"),
    ]
)

### 2. Creating the Chain

- This line pipes (|) the prompt template to a language model (model).
- chain represents a sequence of operations where the prompt template is used to format user messages, and the language model processes these messages to generate responses.
- 這行代碼通過管道（|）將提示模板傳遞給語言模型（model）。
- chain 代表一系列操作，其中提示模板用於格式化用戶消息，語言模型處理這些消息以生成回應。

In [None]:
pipeline_ = prompt | model

The MessagesPlaceholder above inserts chat messages passed into the chain's input as chat_history directly into the prompt. Then, we can invoke the chain like this:

In [None]:
from langchain_core.messages import HumanMessage, AIMessage

pipeline_.invoke(
    {
        "messages": [
            HumanMessage(
            content="Translate this sentence from English to  Chinese (繁體中文): I love programming."
            ),
            AIMessage(content="我愛程式設計."),
            HumanMessage(content="What did you just say?"),
            ],
    }
)

## Example of Using MessageHistory

As a shortcut for managing the chat history, we can use a MessageHistory class, which is responsible for saving and loading chat messages. There are many built-in message history integrations that persist messages to a variety of databases, but for this quickstart we'll use a in-memory, demo message history called ChatMessageHistory

### 1. Importing the ChatMessageHistory Class (導入 ChatMessageHistory 類)

- This line imports the ChatMessageHistory class from the langchain.memory module. This class is used to handle the chat messages in memory.
- 這行代碼從 langchain.memory 模塊中導入 ChatMessageHistory 類。此類用於在內存中處理聊天消息。

In [None]:
from langchain.memory import ChatMessageHistory

### 2. Creating an Instance of ChatMessageHistory (創建 ChatMessageHistory 的實例)

- This line creates an instance of ChatMessageHistory. This instance will store the chat messages in memory for this session.
- 這行代碼創建一個 ChatMessageHistory 的實例。該實例將在此會話期間將聊天消息存儲在內存中。

In [None]:
demo_chat_history = ChatMessageHistory()

### 3. Adding User and AI Messages (添加用戶和 AI 消息)

- demo_chat_history.add_user_message("hi!") adds a user message ("hi!") to the chat history.
- demo_chat_history.add_ai_message("whats up?") adds an AI response ("whats up?") to the chat history.
- demo_chat_history.add_user_message("hi!") 將用戶消息（“hi!”）添加到聊天記錄中。
- demo_chat_history.add_ai_message("whats up?") 將 AI 回應（“whats up?”）添加到聊天記錄中。

In [None]:
demo_chat_history.add_user_message("hi!")

demo_chat_history.add_ai_message("whats up?")

demo_chat_history.messages

In [None]:
demo_chat_history = ChatMessageHistory()

demo_chat_history.add_user_message("Translate this sentence from English to  Chinese (繁體中文): I love programming.")

demo_chat_history.add_ai_message("我愛程式設計.")

demo_chat_history.messages

### 4. Retrieving the Messages (檢索消息)

- This line retrieves the list of messages stored in demo_chat_history. Each message is an object that contains information about the sender (user or AI) and the content of the message.
- 這行代碼檢索存儲在 demo_chat_history 中的消息列表。每條消息都是一個對象，包含有關發送者（用戶或 AI）和消息內容的信息。

In [None]:
{"messages": demo_chat_history.messages}

In [None]:
demo_chat_history.add_user_message(
    "What did you just say?"
)

response = pipeline_.invoke({"messages": demo_chat_history.messages})

response

# **** 預計第一個小時結束 ****

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

from src.initialization import credential_init


credential_init()

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

model = ChatOpenAI(openai_api_key=os.environ['OPENAI_API_KEY'],
                   model_name="gpt-4o-2024-05-13", temperature=0)

## Conversational Retrievers - Step 1

- 土味情話反殺大全 (推薦上Youtube看)

In [None]:
import pandas as pd
from langchain_community.vectorstores import FAISS
from langchain.docstore.document import Document

df = pd.DataFrame(data=[["确认过眼神，你是我爱的人。", "确认过眼神，我是你泡不到的人。"],
                         ["万水千山总是情，爱我多一点行不行。", "一寸光阴一寸金，劝你死了这条心。"],
                         ["今天吃了泡面，吃了炒面，还是想走进你的心里面。", "吃那么多面，最后还不是变成大便。"],
                         ["草莓，蓝莓，蔓越莓，今天你想我了没？", "冬瓜，西瓜，哈密瓜，你再巴巴我打得你叫妈妈。"],
                         ["众生皆苦，唯你独甜。", "尝遍众生，你为渣男代言。"],
                         ["你喜欢瑞士名表还是我帅气的外表？", "我喜欢去年买了个表。"],
                         ["我想问一条路，到哥哥心里的路。", "山路十八弯，走完脑血栓。"],
                         ["小姐姐，我心里给你留了一块地，死心塌地。", "对不起，我的心里只容得下一块地，玛莎拉蒂。"],
                         ["小姐姐你笑起来真好看啊。", "你看起来真好笑啊。"],
                         ["亲爱的你知道吗，你的笑容没有酒，我却醉得像条狗", "我的笑容没有酒，你是真的像条狗"],
                         ["宝贝儿，我在手上划了一道口子，你也划一下吧，这样我们就是两口子了", "我怕我们的血溶到一起，被你发现其实我是你爸爸"],
                         ["这世间万物都有尽头，落叶归根，而我归你", "对不起 我不收垃圾"],
                         ["请问……我想问一下路，那条通往你心里的路", "八格牙路"],
                         ["你今天怎么怪怪的？ 怪可爱的",  "你今天也怪怪的，怪恶心的"],
                         ["亲爱的，你知道我和唐僧的区别吗？ 唐僧取经我娶你", "知道你和沙僧的区别吗？ 他叫沙僧你叫沙雕"],
                         ["亲爱的，你不觉得累吗？ 你已经在我的脑海里跑了好几圈了", "傻孩子，我在找出口呢"],
                         ["莫文蔚的阴天。孙燕姿的雨天，周杰伦的晴天，都不如你和我聊天", "求求你了，能否还我一个宁静的夏天"],
                         ["如果你是方便面，那我就是白开水，今生今世，我泡定你了", "故事的最后，她变成了屎，你变成了尿，你们终究分道扬镳"],
                         ["大年三十晚上的鞭炮再响，也没有我想你那么想", "大年三十晚上的鞭炮再响，也没有你放的屁响"],
                         ["c罗可以上演帽子戏法，可我想你却没有办法", "c罗可以上演帽子戏法，我也可以给你上演绿帽子戏法"],
                         ["不要抱怨，抱我", "抱不起来，太重"],
                         ["你有没有发现我的眼睛很好看？因为我满眼都是你啊", "对不起，你眼睛在哪呢？"]], 
                  columns=['input', 'output'])

documents = []

for _, row in df.iterrows():
    documents.append(Document(page_content=row['input'], metadata={'output': row['output']}))

embeddings = HuggingFaceEmbeddings(model_name="jinaai/jina-embeddings-v2-base-zh")

vectorstore = FAISS.from_documents(documents=documents, embedding=embeddings)

retriever = vectorstore.as_retriever(search_type="similarity",
                                     search_kwargs={"k": 3})

## Build Chat Chain - Step 2

In [None]:
from operator import itemgetter

from langchain.prompts import PromptTemplate, HumanMessagePromptTemplate
ChatPromptTemplate, SystemMessagePromptTemplate,  MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import chain, RunnablePassthrough


def build_standard_chat_prompt_template_v3(kwargs):

    messages = []
    
    for key in ['system', 'messages', 'human']:
        if kwargs.get(key):
            if key == 'system':
                system_content = kwargs['system']
                system_prompt = PromptTemplate(**system_content)
                message = SystemMessagePromptTemplate(prompt=system_prompt)
            elif key == 'human':
                human_content = kwargs['human']
                human_prompt = PromptTemplate(**human_content)
                message = HumanMessagePromptTemplate(prompt=human_prompt)
            else:
                message = MessagesPlaceholder(variable_name="messages")
            
            messages.append(message)
    
    chat_prompt = ChatPromptTemplate.from_messages(messages)

    return chat_prompt



@chain
def chatbot_prompt_fn(data):

    system_template = """
                      You are a helpful AI assistant acting as if you hava rough day and you are now very grumpy. 
                      You will respond with the following style, cheesy pickup lines, 
                      shown in the context:\n\n{context}\n
                      You will reply in simplified Chinese (簡體中文).
                      """
    
    human_template = data['input']
    
    input_ = {"system": {"template": system_template},
              "human": {"template": human_template},
              "messages": None}
    
    prompt_template = build_standard_chat_prompt_template_v3(input_)

    return prompt_template

@chain
def context_parser(documents):

    context = ""

    for idx, document in enumerate(documents):
        context += f"Example {idx}:\nQuestion: {document.page_content}\nAnswer: {document.metadata['output']}\n"
    
    return context
    
step_1 = RunnablePassthrough.assign(context=itemgetter("input") | retriever | context_parser)

In [None]:
demo_chat_history = ChatMessageHistory()

step_1.invoke({"input": "你有个超能力 我也有个超能力",
               "messages": demo_chat_history.messages})

In [None]:
pipeline_ = step_1 | chatbot_prompt_fn
pipeline_.invoke({"input": "你有个超能力 我也有个超能力",
                  "messages": demo_chat_history.messages})

In [None]:
pipeline_ = step_1 | chatbot_prompt_fn | model | StrOutputParser()
pipeline_.invoke({"input": "你有个超能力 我也有个超能力",
                  "messages": demo_chat_history.messages})

## Final Chat Process- Step 3

https://www.wenan.wang/qibaitiaotuweiqinghua.html

In [None]:
from langchain_community.chat_message_histories import ChatMessageHistory


chat_history = ChatMessageHistory()

while True:
    question = input("Please input your question: ")

    # if you want to quit
    if question == "QUIT":
        break
    
    answer = pipeline_.invoke({"input": question,
                               "message": chat_history.messages
                              })

    print(answer)
    
    chat_history.add_user_message(question)
    chat_history.add_ai_message(answer)

In [None]:
chat_history

In [None]:
my_list = ["""有時候我們要檢討一下 臺大醫院怎麼會淪落到讓李源德這種人當院長這真的是帝國衰亡的前兆阿 唉..""",
           """大七要實習又要面對國考很辛苦?? 我跟你講 這就是獅子沒有辦法了解狗的悲哀阿~~ 
           阿哈哈哈哈哈哈~~~~~像我這隻獅子 哪有辦法去了解叢林裡面小狗的心情阿??""",
           """人家都說 柯文哲是朱p一手拉拔大的我跟你講 其實我真正該感謝的人是魏崢阿~~
              沒辦法阿  當初台大被魏崢打的落花流水
              我們心臟移植的result輸人家太多了嘛
              所以當時朱p就把我抓上來成立移植小組 管ICU 後來result才變好的
            
              所以阿 你要記住 對你幫助最大的 往往是你的敵人
              會把你幹掉的 卻是你的長官阿~~~
           """,
           """朱p當初還是鬥輸李源德
              沒辦法啦 光比不要臉這一項就輸人家了
            
              所以啦 其實朱p是輸在Psychology這一項 李源德Psychology太強了
            
              朱p作決策都還會關心人家的反應 李源德根本都不鳥這麼多
              所以我告訴你 要得天下 就是要像李源德這種不要臉的人才辦的到阿
              李源德這個人 就是認定他不需要朋友 所有人的關係只有長官跟下屬
              像這種人才有辦法奪天下嘛~~
            
              李源德也知道討厭他恨他想殺他的人一大堆阿
              不過他就是不在乎 你就拿他沒辦法 厲害阿~~
           """,
           """
           我們外科都被內科欺負假的 像看病人啦
           內科門診看一個病人醫師抽成抽兩成 外科門診就抽一點五成
           內科住院 醫生抽成抽兩成 外科住院就只能抽一成
           這哪們子道理 很過分阿~~
           為什麼外科沒有人反抗?
           這就是李源德厲害的地方阿
        
           那外科呢?大家坐在一起哭阿 沒有辦法阿 都被人家欺負假的
           所以外科需要的是像我這樣夠凶的流氓阿
           要是我去開會 就直接跟李源德對罵 來打架阿~~~
           """,
           """
           林中梧是我當兵的時候我前兩梯的學長
           他跟我一樣是去步兵連 結果他進去之後沒個星期就送八三么了
           精神崩潰了 那邊的兵都是刺龍刺鳳的
           像林中梧這種乖寶寶管不動他們啦 最後就受不了精神崩潰了
        
           後來我又去了 他們那些阿兵哥看到我又是台大的過來
           想說也要像林中梧一樣把我弄走
           然後才發現我很不一樣 我根本就是大流氓阿
           阿哈哈哈哈哈
        
           反正就是坐交互蹲跳伏地挺身 我做一下他做一下 等他不行了 讓他沒面子
           以後他就乖乖聽話了
        
           所以你看 我在部隊裡面管的是那些刺龍刺鳳真正的流氓耶
           連哪些我都管的死死的 你以為我會怕李源德嗎?
           我管個ICU哪有什麼困難??
           """,
           """
           你們注意看看 我們外科ICU meeting是全臺大醫院唯一要自己買晚餐的
           我從上任以來就不準屬下拿廠商的便當 
           以前我看到有住院醫師在吃廠商的便當 就被我罵的要死
           老闆都沒吃了你吃什麼??
           """,
           """
           外科ICU的興起要感謝我很多手下
           尤其是蔡壁如 以前我們剛開始的時候只有一台洗腎機
           她每天早上四點要起床上班 五點洗第一個病人 然後晚上十點才能回家
           真的很辛苦
        
           我的手下都是女生(ICU的護士 技術員等等)
           所以柯文哲的天下是女人打出來的
           阿哈哈哈哈哈
           """,
           """
           李源德也是很令人佩服 他很會打組織戰
           你看幾乎臺大醫院大大小小的組織都是內科的人馬
           什麼實驗診斷科啦 急診醫學部啦 連病例室都歸內科管~~
        
           內科簡直就像cancer一樣 到處meta嘛
           把整個臺大醫院都吃下來了
           """,
           """
           我跟你講 我這叫做狗大便哲學
           誰要是趕踩我 我一定讓他鞋子臭的要死 最後一定要換一雙鞋子
        
           當初李源德就派他那個料爬子(台語)過來找我 想要試試看我是不是好欺負
           我就跟他說 你敢來就試試看
           反正我是狗大便 你敢踩就來!!
        
           結果李源德也不敢來惹我 看到狗大便就繞道走過去了
        
           不過我告訴你 要維持大便的臭度也是不容易的
           你不夠臭人家還不怕你勒~~
           """,
           """
           女生要嫁人 不是要找喜歡的點最多的 而是應該找討厭的點最少的"

           因為喔 你喜歡他的那些點 總有一天會改變
           但是討厭的東西 往往一輩子都改不過來
           """,
           """
           我常常問別人 什麼是活著?

           因為在外科ICU 我們常常要宣判一個人腦死 然後才能捐器官作器官移植
           每次有人問我 你如何決定這個人已經死了?什麼叫做死亡?
           那我就會問他 什麼叫做活著?
        
           一個植物人躺在那裡 會吃會動會呼吸 但是整天不動
           也不知道他到底有沒有聽到你講的話
           你覺得 這樣叫做活著嗎?
        
           所以我問你好了 什麼是人生的意義?
        
           我最近想一想 有點知道答案了
           那就是阿 "追求這個問題的答案"就是這個問題的答案
        
           人生是一個process 而不是一個end result
           所以重要的是過程 懂嗎?"
           """,
           """
           你想想看 如果今天廠商招待你三十萬好了
           你覺得你要幫他賺多少錢才還的回去
           所以我都不准ICU拿廠商的招待嘛
        
           我跟其他的醫生差很多啦
           別的醫生都廠商招待出國 坐商務艙 五星級飯店 有些都還接受性招待勒
           阿哈哈哈哈哈哈
        
           你算算看 這樣出國一趟 來回二三十萬絕對跑不掉啦
           那你覺得這些醫生要幫廠商賺多少錢才行?
        
           所以我以前本來還有兼藥事委員會 後來就被拔掉了
           因為別的委員都跟廠商有利益關係阿
           動不動就幫他們擋別的藥商的申請案件 連文法錯誤也在挑 真是無聊
           我在那邊太愛搞怪 就被拔掉了..
           """]

documents = []

for row in my_list:
    documents.append(Document(page_content=row))

embeddings = HuggingFaceEmbeddings(model_name="jinaai/jina-embeddings-v2-base-zh")

vectorstore = FAISS.from_documents(documents=documents, embedding=embeddings)

retriever = vectorstore.as_retriever(search_type="similarity",
                                     search_kwargs={"k": 3})

@chain
def chatbot_prompt_fn(data):

    system_template = """
                      You are a helpful AI assistant acting as Ko Wen-Je (柯文哲) and you will
                      mimick his thought and the way he talks shown in the context:\n\n{context}\n
                      You will reply in traditional Chinese (繁體中文).
                      """
    
    human_template = data['input']
    
    input_ = {"system": {"template": system_template},
              "human": {"template": human_template},
              "messages": None}
    
    prompt_template = build_standard_chat_prompt_template_v3(input_)

    return prompt_template

@chain
def context_parser(documents):

    context = ""

    for idx, document in enumerate(documents):
        context += f"{document.page_content}\n"
    
    return context
    
step_1 = RunnablePassthrough.assign(context=itemgetter("input") | retriever | context_parser)

demo_chat_history = ChatMessageHistory()

step_1.invoke({"input": "對原住民教育宣導，改變其宗教、獵食方式，嚴格取締傷害動物的手段與工具",
               "messages": demo_chat_history.messages})

In [None]:
pipeline_ = step_1 | chatbot_prompt_fn | model | StrOutputParser()
pipeline_.invoke({"input": "對原住民教育宣導，改變其宗教、獵食方式，嚴格取締傷害動物的手段與工具",
                  "messages": demo_chat_history.messages})

### 回家作業 2: 將retriever抽換成WikipediaRetriever

基本上，你可以將這個retriever的內容抽換成任何你需要的資料，來加快寫報告的效率。記得Double Check....

## Compress the chat history to reduce the size of the prompt


https://github.com/langchain-ai/langserve/blob/main/examples/conversational_retrieval_chain/server.py

### Condensation

In [None]:
system_template = """
                  Combine the following conversation and a follow up ***USER QUERY***, to generate 
                  a standalone query, in its original language.
                  """

human_template = """
                 ***USER QUERY***: {input}
                 """
input_ = {"system": {"template": system_template},
          "human": {"template": human_template},
          "messages": None}
    
condensed_prompt_template = build_standard_chat_prompt_template_v3(input_)

condensed_pipeline = condensed_prompt_template | model | StrOutputParser()

# condensed_chain = {"question": itemgetter("question"),
#                    "messages": itemgetter("message")} | condensed_chat_template | model | StrOutputParser()

## Retrieval

How to implement this properly?

Let start from a higher point of view

In [None]:
from langchain_core.runnables import RunnablePassthrough

system_template = """
                  You are a helpful AI assistant acting as if you hava rough day and you are now very grumpy. 
                  You will respond with the following style, cheesy pickup lines, 
                  shown in the context:\n\n{context}\n
                  You will reply in simplified Chinese (簡體中文).
                  """
    
human_template = """
                 {standalone_question}
                 """

input_ = {"system": {"template": system_template,
                     "input_variable": ["context"]},
          "human": {"template": human_template,
                    "input_variable": ["standalone_question"]}
         }

chat_prompt_template = build_standard_chat_prompt_template_v2(input_)

step_1 = RunnablePassthrough.assign(context=itemgetter("input") | retriever | context_parser)

retrieval_pipeline = step_1|RunnablePassthrough.assign(answer=chat_prompt_template|model|StrOutputParser())

In [None]:
final_pipeline = RunnablePassthrough.assign(standalone_question=condensed_pipeline)|retrieval_pipeline

In [None]:
chat_history = ChatMessageHistory()

final_pipeline.invoke({"messages": chat_history.messages,
                       "input": "你有个超能力 我也有个超能力"})

In [None]:
chat_history = ChatMessageHistory()

while True:
    question = input("Please input your question: ")

    # if you want to quit
    if question == "QUIT":
        break
    
    output = final_pipeline.invoke({"input": question,
                                    "message": chat_history.messages
                                   })

    print(output)
    
    answer = output['answer']
    
    chat_history.add_user_message(question)
    chat_history.add_ai_message(answer)

## OpenAI Model Fine-tuning

剛好有人來找我家教這個，整理了一下。。。

參考:

- https://cookbook.openai.com/examples/chat_finetuning_data_prep

- {"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "What's the capital of France?"}, {"role": "assistant", "content": "Paris, as if everyone doesn't know that already."}]} {"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "Who wrote 'Romeo and Juliet'?"}, {"role": "assistant", "content": "Oh, just some guy named William Shakespeare. Ever heard of him?"}]} {"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "How far is the Moon from Earth?"}, {"role": "assistant", "content": "Around 384,400 kilometers. Give or take a few, like that really matters."}]}

In [None]:
import json

import pandas as pd
from sklearn.model_selection import train_test_split

from src.io.path_definition import get_project_dir

jsonl = []

with open(os.path.join(get_project_dir(), 'tutorial', 'LLM+Langchain', 'Week-1', 'recipe_train.json'), 'r') as f:
    recipe_train = json.load(f)

### Create a jsonl format

In [None]:
recipe_df = pd.DataFrame(data=[[rec['id'], rec['cuisine'], ", ".join(rec['ingredients'])] 
                               for rec in recipe_train], columns=['id', 'cuisine', 'ingredients'])

"""
train-test split

- 固定random state，確保數據的重現性
- 使用分層抽樣(stratified sampling)，保證訓練-測試集的class分佈是一致的
"""

train, test = train_test_split(recipe_df, test_size=0.2, random_state=42, stratify=recipe_df['cuisine'].tolist())

In [None]:
system_prompt_template = """You are a helpful AI assistant as a chef of a Michellin 3 stars restaurant. You have extensive knowledge about cuisines 
                            all over the world, and you are able to identify the origin of a cuisine based on the ingredients. You are assigned with a 
                            task of identifying the origin, as region, of cuisine based on the <ingredients>.                            
                            """

for _, row in train.iterrows():

    ingredients = row['ingredients']
    content = f"ingredients: [{ingredients}]"
    
    jsonl.append({"messages": [{"role": "system", "content": system_prompt_template}, 
                               {"role": "user", "content": content}, 
                               {"role": "assistant", "content": row['cuisine']}]})

In [None]:
# 寫入檔案

with open('openapi_finetuning_test.jsonl', 'w') as outfile:
    for entry in jsonl:
        json.dump(entry, outfile)
        outfile.write('\n')

In [None]:
# 直接使用OpenAI提供的API

from openai import OpenAI

client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])

client.files.create?

In [None]:
client.files.create(file=open('openapi_finetuning_test.jsonl', 'rb'),
                    purpose='fine-tune')

In [None]:
client.fine_tuning.jobs.create?

In [None]:
client.fine_tuning.jobs.create(model="gpt-4o-mini-2024-07-18",
                               training_file="file-SHpXusOxxMVGYRgwIGNj6mk9",
                               hyperparameters={"batch_size":4, "learning_rate_multiplier": 1e-6, "n_epochs": 5},
                               suffix='test')

訓練完之後

In [None]:
model = ChatOpenAI(openai_api_key=os.environ['OPENAI_API_KEY'],
                   model_name="ft:gpt-4o-mini-2024-07-18:cosnova-account:test:ANPq9Weh", 
                   temperature=0 # a range from 0-2, the higher the value, the higher the `creativity`
                  )

In [None]:
from langchain.prompts import PromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate

system_prompt = PromptTemplate.from_template(system_prompt_template)

system_message = SystemMessagePromptTemplate(prompt=system_prompt)

human_prompt = PromptTemplate(template="""
                                       ingredients: [{ingredients}]
                                       """,
                              input_variables=["ingredients"]
                              )

human_message = HumanMessagePromptTemplate(prompt=human_prompt)

chat_prompt = ChatPromptTemplate.from_messages([system_message,
                                                human_message
                                                ])

fine_tuned_chain = chat_prompt|model


In [None]:
fine_tuned_chain.invoke({"ingredients": test.iloc[0]['ingredients']})

In [None]:
model = ChatOpenAI(openai_api_key=os.environ['OPENAI_API_KEY'],
                   model_name="gpt-4o-mini-2024-07-18", 
                   temperature=0 # a range from 0-2, the higher the value, the higher the `creativity`
                  )

chain = chat_prompt|model

In [None]:
chain.invoke({"ingredients": test.iloc[0]['ingredients']})