# MLflow Part 2


mlflow server --host 127.0.0.1 --port 8080

## 紀錄內容

In [None]:
import os

os.chdir("../../../")

In [None]:
import mlflow
from textwrap import dedent
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain_community.callbacks import MlflowCallbackHandler
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain

from src.initialization import credential_init

def build_standard_chat_prompt_template(kwargs):

    messages = []
 
    if 'system' in kwargs:
        content = kwargs.get('system')
        prompt = PromptTemplate(**content)
        message = SystemMessagePromptTemplate(prompt=prompt)
        messages.append(message)  

    if 'human' in kwargs:
        content = kwargs.get('human')
        prompt = PromptTemplate(**content)
        message = HumanMessagePromptTemplate(prompt=prompt)
        messages.append(message)
        
    chat_prompt_template = ChatPromptTemplate.from_messages(messages)
    
    return chat_prompt_template

experiment = "Week-4"

credential_init()

mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

# Start or get an MLflow run explicitly
mlflow.set_experiment(experiment)

### MlflowCallbackHandler

追蹤並記錄語言模型的輸入和輸出

In [None]:
with mlflow.start_run(run_name="my-llm-run") as run:
    run_id = run.info.run_id
    print(f"Using run_id={run_id}")

    # Attach the run_id so all logs go into this run
    mlflow_cb = MlflowCallbackHandler(
        experiment=experiment,
        run_id=run_id,
        tracking_uri="http://127.0.0.1:8080",
    )

    model = ChatOpenAI(
        model_name="gpt-4o-mini",
        temperature=0,
        callbacks=[mlflow_cb]
    )

    prompt = PromptTemplate(
        input_variables=["product"],
        template="What is a good name for a company that makes {product}?",
    )

    chain = LLMChain(llm=model, prompt=prompt)

    # First call logs into this run
    chain.invoke({"product": "陽電子攻城炮"})

    # Second call also logs into the SAME run_id
    chain.invoke({"product": "旋風魚雷 (Warhammer 40k, Exterminatus)"})

    chain.invoke({"product": "人形MS/Gundam"})
    
    # Finally flush once
    mlflow_cb.flush_tracker()

In [None]:
run_id = run_id
artifact_path = "table_action_records.html"   # relative path inside artifacts

# Download to a local directory
local_dir = mlflow.artifacts.download_artifacts(run_id=run_id, artifact_path=artifact_path,
                                                dst_path="tutorial/LLM+Langchain/Week-4", 
                                                tracking_uri="http://127.0.0.1:8080",
                                                )

print("Downloaded to:", local_dir)

In [None]:
mlflow.artifacts.download_artifacts?

## Autolog

This mode doesn’t write JSON files at all — instead, it captures traces (spans) of your LangChain runs into MLflow’s experiment tracking + tracing UI. That means you see inputs/outputs, timings, and nested structure in the MLflow UI, not .json artifacts.

In [None]:
run_id

In [None]:
# Enable autologging — this instruments LangChain automatically
mlflow.langchain.autolog()

model = ChatOpenAI(
    model_name="gpt-4o-mini",
    temperature=0
)

prompt = PromptTemplate(
    input_variables=["product"],
    template="What is a good name for a company that makes {product}?",
)

chain = LLMChain(llm=model, prompt=prompt)

# Run the chain
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

# Create a new MLflow Experiment
mlflow.set_experiment("Week-4")

with mlflow.start_run(run_id=run_id) as run:
    chain.invoke({"product": "光茅 (戰槌40k)"})
    chain.invoke({"product": "旋風魚雷 (Warhammer 40k, Exterminatus)"})
    chain.invoke({"product": "人形MS/Gundam"})

## 一個模型不夠，你可以用兩個

這兩個範例都只用了一個模型，還沒外加RAG之類的

使用Reflect作為例子

In [None]:
query = dedent("""
俗話說：「龍生九子，各有不同。」在廣闊浩瀚的海洋之中，就有一頭孤獨的鯨魚——五十二赫茲鯨魚。牠聲音的頻率天生便比同伴還要高，這項特別之處，也導致了牠與同伴產生了無法溝通的鴻溝。看見這則故事的我，不禁思考，在如此多元的人間，是否也有像五十二赫茲鯨魚一般，天生便與眾不同？

回首童年，我印象最為深刻的一刻，是初識字時，與文字互相理解的那一瞬、是當我第一次讀完一個句子時，它將自身的意義傳入我腦中的那一瞬。自此，我便對文字、語言抱有特殊的感情，也十分享受閱讀與朗誦。那種將自身與文字經由一點一滴積累而連接起來的感情，使我心靈感到十分富足。

而當我步入校園接觸同儕時，驀然驚覺我與別人的閱讀速度十分不同。每當我已讀完一篇文章，但同學可能只完成了一半甚至三分之二。同時，我在生字讀音方面也異常的執著，因此被同學抱怨有「文字潔癖」。面對同儕抱怨的我，也只好強忍對耳邊時而出現字錯讀音的不適，開始刻意忽略心裡對它的執念，只為想要與別人一樣，想要和朋友互相理解。

直到多年前，因緣際會之下認識了「五十二赫茲」這獨特的存在。牠的身影在我心中烙下一道深刻的痕跡。因為牠，我開始接受自己與他人的不同；也因為牠，我明白了，我對文字的執著，並不是一種負面的特質，而是上天賜予我的禮物，我開始在寫作上揮灑自如。這讓我知道，不要在一開始便用否定的眼光看待自己的特質。也許這特別之處，會使我們與五十二赫茲鯨魚一般孤獨，會使我們遭受他人的不理解與排斥，但也會讓我們與眾不同。

關於此，我想說的是，勇敢地綻放自己的特別，也讓自己成為自己和世人眼中，最閃耀的五十二赫茲鯨魚。
""")


def create_feedback_pipeline(mlflow_callback):

    ## Teacher LLM
    system_template = dedent("""
    你是一個教學與寫作經驗豐富的台灣大學中文系教授，你要來負責給予作文評分與回饋。
    """)
    
    human_template = dedent("""
    Title: {title}
    
    Article:
    {article}
    """)
    
    model = ChatOpenAI(openai_api_key=os.environ['OPENAI_API_KEY'],
                       model_name="gpt-4o-mini", temperature=0,
                       callbacks=[mlflow_callback])
    
    input_ = {"system": {"template": system_template},
              "human": {"template": human_template,
                        "input_variable": ["title", "article"],
                        }}
    
    chat_prompt_template = build_standard_chat_prompt_template(input_)
    
    feedback_pipeline = chat_prompt_template|model|StrOutputParser()

    return feedback_pipeline

In [None]:
from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser


class Output(BaseModel):
    name: str = Field(description="The revised article in traditional Chinese (繁體中文), please do not include the title.")


output_parser = PydanticOutputParser(pydantic_object=Output)
format_instructions = output_parser.get_format_instructions()


def create_revision_pipeline(mlflow_callback):
    ## Generate
    system_template = dedent("""
    你是一個在準備考試的高中生，你將根據反饋強化的作文內容。
    """)
    
    human_template = dedent("""
    Title: {title}
    
    Old Article:
    {article}
    
    Feedback:
    {feedback}

    Output format instructions: {format_instructions}
    
    Revised Article:
    """)
    
    input_ = {"system": {"template": system_template},
              "human": {"template": human_template,
                        "input_variable": ["title", "article", "feedback"],
                        "partial_variables": {'format_instructions': format_instructions}}}
    
    chat_prompt_template = build_standard_chat_prompt_template(input_)

    model = ChatOpenAI(openai_api_key=os.environ['OPENAI_API_KEY'],
                       model_name="gpt-4o-mini", temperature=0,
                       callbacks=[mlflow_callback])
    
    revision_pipeline = chat_prompt_template|model|output_parser

    return revision_pipeline

In [None]:
from langchain_core.runnables import RunnablePassthrough, RunnableLambda

mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

# Start or get an MLflow run explicitly
mlflow.set_experiment(experiment)
with mlflow.start_run(run_name=run_name) as run:
    run_id = run.info.run_id
    print(f"Using run_id={run_id}")

    # Attach the run_id so all logs go into this run
    mlflow_cb = MlflowCallbackHandler(
        experiment=experiment,
        run_id=run_id,
        tracking_uri="http://127.0.0.1:8080",
    )

    feedback_pipeline = create_feedback_pipeline(mlflow_callback=mlflow_cb)
    revision_pipeline = create_revision_pipeline(mlflow_callback=mlflow_cb)
    
    whole_pipeline = RunnablePassthrough.assign(feedback=feedback_pipeline)|revision_pipeline|RunnableLambda(lambda x: x.name)

    result = whole_pipeline.invoke({"article": query,
                                    "title": "關於五十二赫茲，我想說的是…"},
                                    # config={"callbacks": [mlflow_cb]} 
                                  )
    
    
# Finally flush once
mlflow_cb.flush_tracker()

結合上週的內容，將這個Pipeline打包成一個Artifact上傳到MLflow Server，然後藉由MLflow調用Pipeline

In [None]:
from textwrap import dedent

import pandas as pd
from mlflow.models.signature import ModelSignature
from mlflow.types.schema import Schema, ColSpec

from src.io.path_definition import get_project_dir

model_path = os.path.join(get_project_dir(), 'tutorial', 'LLM+Langchain', 
                          "Week-4", "llmchain_mlflow_experiment_tracing.py")

# You need to know what you will put into it and what you will get out of it.
input_schema = Schema([ColSpec("string", "title"),
                       ColSpec("string", "article")])
output_schema = Schema([ColSpec("string")])
signature = ModelSignature(inputs=input_schema, outputs=output_schema)


query = dedent("""
俗話說：「龍生九子，各有不同。」在廣闊浩瀚的海洋之中，就有一頭孤獨的鯨魚——五十二赫茲鯨魚。牠聲音的頻率天生便比同伴還要高，這項特別之處，也導致了牠與同伴產生了無法溝通的鴻溝。看見這則故事的我，不禁思考，在如此多元的人間，是否也有像五十二赫茲鯨魚一般，天生便與眾不同？

回首童年，我印象最為深刻的一刻，是初識字時，與文字互相理解的那一瞬、是當我第一次讀完一個句子時，它將自身的意義傳入我腦中的那一瞬。自此，我便對文字、語言抱有特殊的感情，也十分享受閱讀與朗誦。那種將自身與文字經由一點一滴積累而連接起來的感情，使我心靈感到十分富足。

而當我步入校園接觸同儕時，驀然驚覺我與別人的閱讀速度十分不同。每當我已讀完一篇文章，但同學可能只完成了一半甚至三分之二。同時，我在生字讀音方面也異常的執著，因此被同學抱怨有「文字潔癖」。面對同儕抱怨的我，也只好強忍對耳邊時而出現字錯讀音的不適，開始刻意忽略心裡對它的執念，只為想要與別人一樣，想要和朋友互相理解。

直到多年前，因緣際會之下認識了「五十二赫茲」這獨特的存在。牠的身影在我心中烙下一道深刻的痕跡。因為牠，我開始接受自己與他人的不同；也因為牠，我明白了，我對文字的執著，並不是一種負面的特質，而是上天賜予我的禮物，我開始在寫作上揮灑自如。這讓我知道，不要在一開始便用否定的眼光看待自己的特質。也許這特別之處，會使我們與五十二赫茲鯨魚一般孤獨，會使我們遭受他人的不理解與排斥，但也會讓我們與眾不同。

關於此，我想說的是，勇敢地綻放自己的特別，也讓自己成為自己和世人眼中，最閃耀的五十二赫茲鯨魚。
""")

run_name = "Reflection"

with mlflow.start_run(run_name=run_name) as run:

    os.environ['experiment'] = experiment
    os.environ['run_id'] = run.info.run_id
    os.environ['run_name'] = run_name
    
    mlflow.log_artifact(model_path, artifact_path="source_code")

    input_example = pd.DataFrame(data=[[query, "關於五十二赫茲，我想說的是…"]], columns=['article', 'title'])
    
    model_info = mlflow.pyfunc.log_model(
        python_model=model_path,  # Define the model as the path to the Python file
        name="langchain_model",
        input_example=input_example,
        signature=signature,
        registered_model_name="Generation_Reflection_Demo"
    )

In [None]:
with mlflow.start_run(run_name=run_name) as run:

    os.environ['experiment'] = experiment
    os.environ['run_id'] = run.info.run_id
    os.environ['run_name'] = run_name

    loaded_model = mlflow.pyfunc.load_model("models:/Generation_Reflection_Demo/13")
    
    input_ = pd.DataFrame(data=[['那一聲政委吹響的哨聲，代表著我對於帝皇的忠誠，衝鋒，殺光那些異端', '關於五十二赫茲，我想說的是…']], columns=['article', 'title'])
    
    output = loaded_model.predict(input_)

# LangServe

## 1. 客戶端 (client) 呼叫後端 API

In [None]:
import requests

response = requests.post(
    "http://localhost:5000/openai/invoke",
    json={'input': "Where is Taiwan?"}
)

In [None]:
response.json()

In [None]:
response.json()['output']['content']

在Windows的CLI(command line interface)中:

curl -X POST "http://localhost:5000/openai/invoke" -H "Content-Type: application/json" -d "{""input"": ""Where is Taiwan?""}"

## 2. 結合之前MLflow的應用。從MLflow server上下載模型，然後從客戶端呼叫

In [None]:
import requests

response = requests.post(
    "http://localhost:5000/demo/invoke",
    json={'input': {"article": "那一聲政委吹響的哨聲，代表著我對於帝皇的忠誠，衝鋒，殺光那些異端.",
                    "title": "關於五十二赫茲，我想說的是…"}}
)

response.json()

## 3 RemoteRunnable

In [None]:
from langserve import RemoteRunnable

remote_llm = RemoteRunnable("http://localhost:5000/openai/")

In [None]:
# Supports astream
async for msg in remote_llm.astream("Where is Taiwan?"):
    print(msg.content, end="", flush=True)

In [None]:
import os

os.chdir("../../../")

## 如何自動化生成country_code?

# ChatBot

1. N-Shot Learning 與對話歷史

    - 歷史對話可以看成一個 Q&A pair 列表
    - 當前模型在推理時，會把「之前的對話內容」作為 prompt 的一部分，再加上使用者最新的輸入，整合後丟進模型。這其實就是一種 few-shot / N-shot 的學習方式：模型從範例中抽取語境來理解「現在該怎麼回答」。

2. Stateless vs. Stateful
    - Stateless：如果每次請求都完全獨立，沒有任何歷史對話被帶入，那就叫無狀態 (stateless)。
    - Stateful：如果系統會保存對話歷史（不論是把歷史傳回模型，還是外部記憶系統存起來），那就是有狀態的 (stateful)。
    - 所以是否「能記住」過去，取決於設計，而不是模型本身自帶的能力。

3. Tools 的角色
    - 讓 ChatBot 強大的是 tools
    - 模型本身雖然能生成語言，但 結合外部工具（例如資料庫查詢、計算器、網路搜尋、代碼執行、圖片生成）後，ChatBot 才能真正做到「會推理 + 會行動」，不再只受限於參數內的知識。

    - 可以理解成：模型是「大腦」，Tools 是「手腳」。

In [None]:
from IPython.display import Image

Image(url="https://python.langchain.com/v0.1/assets/images/chat_use_case-eb8a4883931d726e9f23628a0d22e315.png")

先學怎麼調動工具: 模型就像是一個訓練有素的阿斯塔特，工具就像是動力甲，噴射背包，爆彈槍，和鏈鋸劍。

I accept these burdens, as the Imperium bleeds.

I accept these burdens, knowing no fear.

I accept these burdens, as an Angel of the Emperor.

I sheathe my form in this second skin.

This veil of machine muscle and false nerves.

I stand firm against the alien.

The mutant.

The heretic.

I grant no mercy.

I give no ground.

With humility, I bear the Imperialis, the symbol of loyalty unbroken.

With reverence, I receive actuation, awakening the armor’s spirit.

With pride, I wear the symbol of my Chapter, and join my brothers in war.

I am Iron.

I am Wrath.

I. Am. Doom.

## 工具綁定

In [None]:
import os

# os.chdir("../../../")

from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.tools import tool

from src.initialization import credential_init

credential_init()

# Define a calculator tool
@tool
def add_numbers(a: int, b: int) -> int:
    """Adds two numbers together."""
    return a + b

# Create the LLM
llm = ChatOpenAI(model="gpt-4o-mini")

# Bind the tool to the model
llm_with_tools = llm.bind_tools([add_numbers])

# Run
resp = llm_with_tools.invoke("What is 42 + 58?")

In [None]:
resp

In [None]:
resp.additional_kwargs['tool_calls'][0]

In [None]:
resp.additional_kwargs['tool_calls'][0]['function']

In [None]:
arguments = resp.additional_kwargs['tool_calls'][0]['function']['arguments']

In [None]:
name = resp.additional_kwargs['tool_calls'][0]['function']['name']

In [None]:
eval(arguments)

In [None]:
eval(name)

In [None]:
eval(name).invoke(eval(arguments))

In [None]:
follow_up = llm_with_tools.invoke(
        f"The tool '{name}' returned: 100. Give the final answer."
    )

print(follow_up)

## OpenAI WebSearch

### 基本使用

In [None]:
from openai import OpenAI

client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])

response = client.chat.completions.create(
        model="gpt-4o-search-preview",
        web_search_options={"search_context_size": "medium"},
        messages=[{"role": "user",
                   "content": "幫我查詢`掄語`"}]
    )

print(response.choices[0].message.content)

In [None]:
response = client.chat.completions.create(
        model="gpt-4o-search-preview",
        web_search_options={"search_context_size": "medium",
                            "user_location": {
                                "type": "approximate",
                                "approximate": {
                                    "country": "TW",
                                }
                            }},
        messages=[{"role": "user",
                   "content": "幫我查詢HunterXHunter 最新的進度"}]
    )

print(response.choices[0].message.content)

In [None]:
response = client.chat.completions.create(
        model="gpt-4o-search-preview",
        web_search_options={"search_context_size": "medium",
                            "user_location": {
                                "type": "approximate",
                                "approximate": {
                                    "country": "US",
                                }
                            }},
        messages=[{"role": "user",
                   "content": "幫我查詢omegazerore在tensorArt上的內容"}]
    )

### 那個country重要嗎?

In [None]:
from typing import List, Dict, Optional

import pandas as pd
from openai import OpenAI
from langchain_core.runnables import chain

from src.initialization import credential_init

credential_init()


class WebSearchService:
    """Service class responsible for querying OpenAI's web search endpoint."""

    def __init__(self, client: OpenAI, search_context_size: str, model: str):

        self.client = client
        self.search_context_size = search_context_size
        self.model = model

    def search(self, messages: List[Dict], country_code: Optional[str]=None) -> str:

        # country_code: ISO 3166-1 alpha-2 of the country
        
        response = self.client.chat.completions.create(
            model=self.model,
            web_search_options={"search_context_size": self.search_context_size,
                                "user_location": {
                                        "type": "approximate",
                                        "approximate": {
                                            "country": country_code,
                                        }
                                    },
                                },
            messages=messages
        )
        
        return response.choices[0].message.content


@chain
def websearch_image(kwargs):

    strategy = kwargs['strategy']
    brand = kwargs['brand']
    country_code = kwargs['country_code']
    if country_code == 'UK':
        country_code = "GB"
    if pd.isnull(kwargs['country_code']):
        country_code = None

    messages = [{"role": "user",
                 "content": f"What is the cosmetic or skin care product under the concept {strategy} with brand {brand}?\n\n"
                            f"Please provide me the page to the product of the brand.\n"
                            f"If the exact product cannot be found, please give me products with similar concept within {brand}.\n"
                            f"Ideally from the official website of the brand {brand}."
                 }]

    result = websearch_service.search(messages, country_code=country_code)

    return result


openai_key = os.environ.get('OPENAI_API_KEY')
client = OpenAI(api_key=openai_key)
search_context_size = 'medium'

websearch_service = WebSearchService(client=client, search_context_size=search_context_size,
                                     model="gpt-4o-search-preview")

這裡使用一個很小的巴西品牌當作例子

In [None]:
websearch_image.invoke({"strategy": "Ultra-light water",
                        "brand": "Minéraux Beauty",
                        "country_code": None})

In [None]:
websearch_image.invoke({"strategy": "Ultra-light water",
                        "brand": "Minéraux Beauty",
                        "country_code": "BR"})

有辦法根據內容物提取country的代碼嗎?

In [None]:
from textwrap import dedent
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI


def build_standard_chat_prompt_template(kwargs):

    messages = []
 
    if 'system' in kwargs:
        content = kwargs.get('system')
        prompt = PromptTemplate(**content)
        message = SystemMessagePromptTemplate(prompt=prompt)
        messages.append(message)  

    if 'human' in kwargs:
        content = kwargs.get('human')
        prompt = PromptTemplate(**content)
        message = HumanMessagePromptTemplate(prompt=prompt)
        messages.append(message)
        
    chat_prompt = ChatPromptTemplate.from_messages(messages)
    
    return chat_prompt


class Output(BaseModel):
    content: str = Field(description="Description.")
    url: str = Field(description="url of the content source.")
    country_code: str = Field(description="ISO 3166-1 alpha-2 of the country")


@chain
def websearch(kwargs):

    messages = [{"role": "user",
                 "content": kwargs['query']
                 }]

    result = websearch_service.search(messages)

    return result


output_parser = PydanticOutputParser(pydantic_object=Output)
format_instructions = output_parser.get_format_instructions()

human_template = dedent("""
Paragraph: {text}
Output format instruction: {format_instructions}
""")

input_ = {"human": {"template": human_template,
                    "input_variables": ['text'],
                    "partial_variables": {"format_instructions": format_instructions}}
          }

prompt_template = build_standard_chat_prompt_template(input_)

model = ChatOpenAI(
        model_name="gpt-4o-mini",
        temperature=0
    )
step_2 = prompt_template|model|output_parser

pipeline = RunnablePassthrough.assign(text=websearch)|RunnablePassthrough.assign(output=step_2)

In [None]:
result = pipeline.invoke({"query": "Where is Taipei 101?"})

In [None]:
# 將結果轉換成Python Dictionary類別

result['output'].model_dump()

### 建立websearch工具

In [None]:
@tool
def websearh_tool(query: str) -> str:
    """Use this tool to find the latest information or information you are not sure"""
    return websearch.invoke({"query": query})

In [None]:
# Bind the tool to the model
llm_with_tools = llm.bind_tools([websearh_tool])

# Run
resp = llm_with_tools.invoke("台灣2024總統大選結果")

In [None]:
resp

In [None]:
arguments = resp.additional_kwargs['tool_calls'][0]['function']['arguments']
name = resp.additional_kwargs['tool_calls'][0]['function']['name']

eval(name).invoke(eval(arguments))

In [None]:
llm_with_tools.invoke("黃埔軍校的歷史")

In [None]:
def call_function(additional_kwargs):
    
    arguments = additional_kwargs['tool_calls'][0]['function']['arguments']
    name = additional_kwargs['tool_calls'][0]['function']['name']
    
    return eval(name).invoke(eval(arguments))


def follow_up_answer(aiadditional_kwargs):

    result = call_function(additional_kwargs)

    name = additional_kwargs['tool_calls'][0]['function']['name']
    
    follow_up = llm_with_tools.invoke(
        f"The tool '{name}' returned: {result}. Give the final answer."
    )

    return follow_up

follow_up_answer(resp.additional_kwargs)

## ChatBot 本體

### LLM 沒有記憶性

In [None]:
from langchain_core.messages import HumanMessage, AIMessage

message = HumanMessage(
            content="Translate this sentence from English to Chinese (繁體中文): I love programming."
        )

model.invoke(
    [message]
)

In [None]:
model.invoke([HumanMessage(content="What did you just say?")])

### 外部記憶

如何將外部記憶加入?

In [None]:
model.invoke(
    [
        HumanMessage(
            content="Translate this sentence from English to  Chinese (繁體中文): I love programming."
        ),
        AIMessage(content="我愛程式設計."),
        HumanMessage(content="What did you just say?"),
    ]
)

透過 MessagePlaceholder接收外部記憶

In [None]:
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder, SystemMessagePromptTemplate

system_prompt = PromptTemplate(template=("You are a helpful assistant. Answer all questions to the best of your "
                                         "ability."))
system_message = SystemMessagePromptTemplate(prompt=system_prompt)

prompt = ChatPromptTemplate.from_messages(
    [
        system_message,
        MessagesPlaceholder(variable_name="messages"),
    ]
)

### 建立邏輯鍊條

In [None]:
pipeline_ = prompt | model

In [None]:
from langchain_core.messages import HumanMessage, AIMessage

pipeline_.invoke(
    {
        "messages": [
            HumanMessage(
            content="Translate this sentence from English to Chinese (繁體中文): I love programming."
            ),
            AIMessage(content="我愛程式設計."),
            HumanMessage(content="What did you just say?"),
            ],
    }
)

## 將對話記錄存入ChatMessageHistory裡

### 導入並創建 ChatMessageHistory。

In [None]:
from langchain.memory import ChatMessageHistory

demo_chat_history = ChatMessageHistory()

### 添加用戶和 AI 消息

In [None]:
demo_chat_history.add_user_message("Translate this sentence from English to Chinese (繁體中文): I love programming.")

demo_chat_history.add_ai_message("我愛程式設計.")

demo_chat_history.messages

In [None]:
demo_chat_history.add_user_message(
    "What did you just say?"
)

response = pipeline_.invoke({"messages": demo_chat_history.messages})

response

### 最小範例

In [None]:
from langchain_core.output_parsers import StrOutputParser

chat_history = ChatMessageHistory()
pipeline_ = prompt|model|StrOutputParser()

while True:
    question = input("What do you want to ask: ")
    if question == "QUIT":
        break
    chat_history.add_user_message(question)
    response = pipeline_.invoke({"messages": chat_history.messages})

    print(response)
    chat_history.add_ai_message(response)
    

# **** 預計第一個小時結束 ****

In [None]:
import os

os.chdir("../../../")

### 檢索消息

加入變數的敘述: 使用Pydantic schema

In [None]:
from textwrap import dedent

from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.runnables import ConfigurableField
from langchain_core.tools import tool
from langchain.tools import StructuredTool
from pydantic import BaseModel, Field
from langchain.memory import ChatMessageHistory
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder, 
SystemMessagePromptTemplate, HumanMessagePromptTemplate

from src.io.path_definition import get_project_dir
from src.initialization import credential_init


credential_init()

# 引入唐詩向量數據庫
filename = os.path.join(get_project_dir(), "tutorial", "LLM+Langchain", "Week-2", "poem_faiss_index")

embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-m3")

vectorstore = FAISS.load_local(
    filename, embeddings, allow_dangerous_deserialization=True
)

retriever = vectorstore.as_retriever(seearch_type='similarity').configurable_fields(\
        search_kwargs=ConfigurableField(id="search_kwargs")
    )

# # 建立Tool
# @tool
# def poem_retriever(query: str, k: int):
#     """
#     使用這個工具來搜尋唐詩; Use this tool to search for Tang poems.
#     The default number of retrieved poem is 1.
#     """

#     output = retriever.invoke(query, config={"configurable": {"search_kwargs": {"k": k}}})
    
#     return output


class PoemRetrieverArgs(BaseModel):
    query: str = Field(description="The keyword or phrase to search for Tang poems. 用來搜尋唐詩的關鍵字或是句子")
    k: int = Field(1, description="The number of poems to retrieve.")


def _poem_retriever(query: str, k: int):
    output = retriever.invoke(query, config={"configurable": {"search_kwargs": {"k": k}}})
    return output


poem_retriever = StructuredTool.from_function(
    func=_poem_retriever,
    args_schema=PoemRetrieverArgs,
    description="使用這個工具來搜尋唐詩; Use this tool to search for Tang poems.",
)

In [None]:
system_prompt = PromptTemplate(template=dedent("""You are a helpful assistant. 
Answer all questions to the best of your ability.
"""))

system_message = SystemMessagePromptTemplate(prompt=system_prompt)

prompt = ChatPromptTemplate.from_messages(
    [
        system_message,
        MessagesPlaceholder(variable_name="messages"),
    ]
)

model = ChatOpenAI(openai_api_key=os.environ['OPENAI_API_KEY'],
                   model_name="gpt-4o-2024-05-13", temperature=0)

model_with_tools = model.bind_tools([poem_retriever])

chatbot_pipeline = prompt | model_with_tools

In [None]:
chat_history = ChatMessageHistory()

question = "幫我找3首關於對於人生感嘆的唐詩"

chat_history.add_user_message(question)

output = chatbot_pipeline.invoke({"messages": chat_history.messages})

In [None]:
output

In [None]:
def call_function(additional_kwargs):

    arguments = additional_kwargs['tool_calls'][0]['function']['arguments']
    name = additional_kwargs['tool_calls'][0]['function']['name']
    
    return eval(name)(**eval(arguments))


def follow_up_answer(human_message, ai_message, additional_kwargs):

    result = call_function(additional_kwargs)

    name = additional_kwargs['tool_calls'][0]['function']['name']

    if ai_message == "":
        query = dedent(f"""
        human question: {human_message}
        The tool {name} is applied and the tool returns {result}.
        Give the final answer:
        """)
    else:
        query = dedent(f"""
        human question: {human_message}
        ai response: {ai_message}
        The tool {name} is applied and the tool returns {result}.
        Give the final answer:
        """)
    
    follow_up = model.invoke(query)

    return follow_up

follow_up_answer(human_message=question, ai_message=output.content, additional_kwargs=output.additional_kwargs)

In [None]:
chat_history = ChatMessageHistory()

while True:
    question = input("What do you want to ask: ")
    if question == "QUIT":
        break
    chat_history.add_user_message(question)
    output = chatbot_pipeline.invoke({"messages": chat_history.messages})

    if output.content == "":
        response = follow_up_answer(output.additional_kwargs).content
    else:
        response = output.content

    print("***********************")
    print(response)
    print("***********************")
    
    chat_history.add_ai_message(response)

### 使用代碼解決數學問題工具

1. 代碼產生的邏輯鍊條

In [None]:
from langchain_core.runnables import chain
from langchain_core.output_parsers import StrOutputParser


def build_standard_chat_prompt_template(kwargs):

    messages = []
 
    if 'system' in kwargs:
        content = kwargs.get('system')
        prompt = PromptTemplate(**content)
        message = SystemMessagePromptTemplate(prompt=prompt)
        messages.append(message)  

    if 'human' in kwargs:
        content = kwargs.get('human')
        prompt = PromptTemplate(**content)
        message = HumanMessagePromptTemplate(prompt=prompt)
        messages.append(message)
        
    chat_prompt = ChatPromptTemplate.from_messages(messages)
    
    return chat_prompt


@chain
def code_execution(code):
    
    match = re.findall(r"python\n(.*?)\n```", code, re.DOTALL)
    python_code = match[0]
    
    lines = python_code.strip()#.split('\n')
    # *stmts, last_line = lines

    local_vars = {}
    exec(lines, {}, local_vars)

    return local_vars


system_template = (
    "You are a highly skilled Python developer. Your task is to generate Python code strictly based on the user's instructions.\n"
    "Leverage statistical and mathematical libraries such as `statsmodels`, `scipy`, and `numpy` where appropriate to solve the problem.\n"
    "Your response must contain only the Python code — no explanations, comments, or additional text.\n\n"
)

human_template = dedent("""{query}\n\n
                            Always copy the final answer to a variable `answer`
                            Code:
                        """)


input_ = {"system": {"template": system_template},
          "human": {"template": human_template,
                    "input_variable": ["query"]}}

chat_prompt = build_standard_chat_prompt_template(input_)

code_generation = chat_prompt|model|StrOutputParser()

code_pipeline = code_generation|code_execution

In [None]:
class CodeArgs(BaseModel):
    query: str = Field(description="User request; 用戶需求")


def _calculator(query: str,):
    output = code_pipeline.invoke(query)
    return output


mathematic_tool = StructuredTool.from_function(
    func=_calculator,
    args_schema=CodeArgs,
    description="Use this tool to solve mathematic related problem; 使用這個工具解決數學問題",
)

In [None]:
model_with_tools = model.bind_tools([mathematic_tool])

chatbot_pipeline = prompt | model_with_tools

北一女段考考題

https://drive.google.com/file/d/1csHdgvc5WtbJZ4n39eozogVVPIkPWABf/view

3. 在坐標平面上,下列有關圓的敘述哪些正確?
(A) 滿足方程式 x
2
+ y
2
+ 2x −10y + 30 = 0 之點(x, y)的圖形是一個圓

(B) 過三點 A( 1, − 3), B( 2, 6 ), C( 4, 24 )的圓恰有一個
(C) 直線 3x −4y + 7 = 0 與圓(x − 2)2

+ ( y + 3)2

= 5 恰有一交點

(D) 圓(x − 2)2

+ ( y + 3)2

= 5 上恰有二點與直線 3x −4y −13= 0 的距離等於 2

(E) P a b ( , )為圓(x − 2)2

+ ( y + 3)2

= 4 上的點,則使 2 2 a b + 為整數的點共有 8 個

In [None]:
chat_history = ChatMessageHistory()

question = dedent("""
以下敘述是否正確:

滿足方程式 x^2 + y^2 + 2x −10y + 30 = 0 之點(x, y)的圖形是一個圓
""")

chat_history.add_user_message(question)

output = chatbot_pipeline.invoke({"messages": chat_history.messages})

In [None]:
output

In [None]:
chat_history = ChatMessageHistory()

question = dedent("""
以下敘述是否正確:

過三點 A( 1, − 3), B( 2, 6 ), C( 4, 24 )的圓恰有一個
""")

chat_history.add_user_message(question)

output = chatbot_pipeline.invoke({"messages": chat_history.messages})

In [None]:
follow_up_answer(human_message=question, ai_message=output.content, additional_kwargs=output.additional_kwargs)

In [None]:
chat_history = ChatMessageHistory()

question = dedent("""
以下敘述是否正確:

直線 3x −4y + 7 = 0 與圓 (x − 2)^2 + (y + 3)^2 = 5 恰有一交點
""")

chat_history.add_user_message(question)

output = chatbot_pipeline.invoke({"messages": chat_history.messages})

In [None]:
output

In [None]:
print(follow_up_answer(human_message=question, ai_message=output.content, additional_kwargs=output.additional_kwargs).content)

In [None]:
chat_history = ChatMessageHistory()

question = dedent("""
以下敘述是否正確:

圓(x − 2)^2 + (y + 3)^2 = 5 上恰有二點與直線 3x −4y −13= 0 的距離等於 2
""")

chat_history.add_user_message(question)

output = chatbot_pipeline.invoke({"messages": chat_history.messages})

In [None]:
output

換個方法問

In [None]:
chat_history = ChatMessageHistory()

question = dedent("""
空間中點(x, y) = (2, -3) 與直線 3x −4y −13= 0 的距離是多少?
""")

chat_history.add_user_message(question)

output = chatbot_pipeline.invoke({"messages": chat_history.messages})

In [None]:
output

In [None]:
print(follow_up_answer(human_message=question, ai_message=output.content, additional_kwargs=output.additional_kwargs).content)

In [None]:
chat_history = ChatMessageHistory()

question = dedent("""
以下敘述是否正確:

P(a, b) 為 圓 (x − 2)^2 + ( y + 3)^2 = 4 上的點,則使 (a^2 + b^2)^0.5 為整數的點共有 8 個
""")

chat_history.add_user_message(question)

output = chatbot_pipeline.invoke({"messages": chat_history.messages})

output

In [None]:
print(follow_up_answer(human_message=question, ai_message=output.content, additional_kwargs=output.additional_kwargs).content)

Which part went wrong?

In [None]:
code = code_generation.invoke({"query": "P(a, b) 為 圓 (x − 2)^2 + ( y + 3)^2 = 4 上的點,則使 (a^2 + b^2)^0.5 為整數的點共有 8 個是否正確"})

print(code)

### 有辦法加入一些基本的機械學習來進行分析嗎?

我還不知道，應該是會蠻有趣的

到這裡你應該可以認識到，寫ChatBot本體並不困難，但一個ChatBot好不好用是由他所綑綁的工具決定。



## 加入Callback 進行追蹤ChatBot

## OpenAI Model Fine-tuning

剛好有人來找我家教這個，整理了一下。。。

參考:

- https://cookbook.openai.com/examples/chat_finetuning_data_prep

- {"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "What's the capital of France?"}, {"role": "assistant", "content": "Paris, as if everyone doesn't know that already."}]} {"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "Who wrote 'Romeo and Juliet'?"}, {"role": "assistant", "content": "Oh, just some guy named William Shakespeare. Ever heard of him?"}]} {"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "How far is the Moon from Earth?"}, {"role": "assistant", "content": "Around 384,400 kilometers. Give or take a few, like that really matters."}]}

In [None]:
import json

import pandas as pd
from sklearn.model_selection import train_test_split

from src.io.path_definition import get_project_dir

jsonl = []

with open(os.path.join(get_project_dir(), 'tutorial', 'LLM+Langchain', 'Week-1', 'recipe_train.json'), 'r') as f:
    recipe_train = json.load(f)

### Create a jsonl format

In [None]:
recipe_df = pd.DataFrame(data=[[rec['id'], rec['cuisine'], ", ".join(rec['ingredients'])] 
                               for rec in recipe_train], columns=['id', 'cuisine', 'ingredients'])

"""
train-test split

- 固定random state，確保數據的重現性
- 使用分層抽樣(stratified sampling)，保證訓練-測試集的class分佈是一致的
"""

train, test = train_test_split(recipe_df, test_size=0.2, random_state=42, stratify=recipe_df['cuisine'].tolist())

In [None]:
system_prompt_template = """You are a helpful AI assistant as a chef of a Michellin 3 stars restaurant. You have extensive knowledge about cuisines 
                            all over the world, and you are able to identify the origin of a cuisine based on the ingredients. You are assigned with a 
                            task of identifying the origin, as region, of cuisine based on the <ingredients>.                            
                            """

for _, row in train.iterrows():

    ingredients = row['ingredients']
    content = f"ingredients: [{ingredients}]"
    
    jsonl.append({"messages": [{"role": "system", "content": system_prompt_template}, 
                               {"role": "user", "content": content}, 
                               {"role": "assistant", "content": row['cuisine']}]})

In [None]:
# 寫入檔案

with open('openapi_finetuning_test.jsonl', 'w') as outfile:
    for entry in jsonl:
        json.dump(entry, outfile)
        outfile.write('\n')

In [None]:
# 直接使用OpenAI提供的API

from openai import OpenAI

client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])

client.files.create?

In [None]:
client.files.create(file=open('openapi_finetuning_test.jsonl', 'rb'),
                    purpose='fine-tune')

In [None]:
client.fine_tuning.jobs.create?

In [None]:
client.fine_tuning.jobs.create(model="gpt-4o-mini-2024-07-18",
                               training_file="file-SHpXusOxxMVGYRgwIGNj6mk9",
                               hyperparameters={"batch_size":4, "learning_rate_multiplier": 1e-6, "n_epochs": 5},
                               suffix='test')

訓練完之後

In [None]:
model = ChatOpenAI(openai_api_key=os.environ['OPENAI_API_KEY'],
                   model_name="ft:gpt-4o-mini-2024-07-18:cosnova-account:test:ANPq9Weh", 
                   temperature=0 # a range from 0-2, the higher the value, the higher the `creativity`
                  )

In [None]:
from langchain.prompts import PromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate

system_prompt = PromptTemplate.from_template(system_prompt_template)

system_message = SystemMessagePromptTemplate(prompt=system_prompt)

human_prompt = PromptTemplate(template="""
                                       ingredients: [{ingredients}]
                                       """,
                              input_variables=["ingredients"]
                              )

human_message = HumanMessagePromptTemplate(prompt=human_prompt)

chat_prompt = ChatPromptTemplate.from_messages([system_message,
                                                human_message
                                                ])

fine_tuned_chain = chat_prompt|model


In [None]:
fine_tuned_chain.invoke({"ingredients": test.iloc[0]['ingredients']})

In [None]:
model = ChatOpenAI(openai_api_key=os.environ['OPENAI_API_KEY'],
                   model_name="gpt-4o-mini-2024-07-18", 
                   temperature=0 # a range from 0-2, the higher the value, the higher the `creativity`
                  )

chain = chat_prompt|model

In [None]:
chain.invoke({"ingredients": test.iloc[0]['ingredients']})