# 作業詳解

In [None]:
import os
import json

In [None]:
os.chdir("../../../")

In [None]:
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

from src.initialization import credential_init
from src.io.path_definition import get_project_dir

credential_init()

model = ChatOpenAI(openai_api_key=os.environ['OPENAI_API_KEY'],
                   model_name="gpt-4o-mini", temperature=0)

with open(os.path.join(get_project_dir(), 'tutorial', 'LLM+Langchain', 
                       'Week-1', 'recipe_train.json'), 'r') as f:
    recipe_train = json.load(f)

In [None]:
recipe_train[100]

In [None]:
from langchain_community.retrievers import BM25Retriever
from langchain.docstore.document import Document

documents = []

for recipe in recipe_train:
    document = Document(page_content=", ".join(recipe['ingredients']),
                        metadata={"cuisine": recipe['cuisine'],
                                  "id": recipe['id']})
    documents.append(document)

bm25_retriever = BM25Retriever.from_documents(documents, k=10, bm25_params={"k1":2.5})

In [None]:
# I created this last week after the session
def build_standard_chat_prompt_template(kwargs):

    system_content = kwargs['system']
    human_content = kwargs['human']
    
    system_prompt = PromptTemplate(**system_content)
    system_message = SystemMessagePromptTemplate(prompt=system_prompt)
    
    human_prompt = PromptTemplate(**human_content)
    human_message = HumanMessagePromptTemplate(prompt=human_prompt)
    
    chat_prompt = ChatPromptTemplate.from_messages([system_message,
                                                     human_message
                                                   ])

    return chat_prompt

In [None]:
response_schemas = [
        ResponseSchema(name="used ingredients", 
                       description="The actual ingredients used in cooking"),
        ResponseSchema(name="extra ingredients", 
                       description="extra ingredients that have to be prepared "),
        ResponseSchema(name="result", 
                       description="The dish and cooking recipe in detail")
    ]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

format_instructions = output_parser.get_format_instructions()


system_template = """
                  You are an AI assistant as the best chef in the world. 
                  You have a great taste and cooking skills like Gordon Ramsay. 
                  You should be able to come up with a dish based on `suggested ingredient`, 
                  and tell us what extra ingredients has to be prepared by comparing 
                  the ingredients actually used in the cooking and the `existing ingredient`.

                  The `suggested ingredients` are the ingredients suggested 
                  by some recipe. You have the freedom to add or remove 
                  ingredients to achieve the goal, but try to be as faithful 
                  to the `suggested ingredient` as possible. 
                  """

human_template = """
                 existing ingredients:[{existing_ingredients}];
                 suggested ingredients: [{suggested_ingredients}]\n; 
                 format instruction: {format_instructions}
                 """


input_ = {"system": {"template": system_template},
          "human": {"template": human_template,
                    "input_variable": ["existing_ingredients", "suggested_ingredients"],
                    "partial_variables": {"format_instructions": format_instructions}}}

my_chat_prompt_template = build_standard_chat_prompt_template(input_)

In [None]:
# Because I am lazy so I use data from another dataset.

with open(os.path.join(get_project_dir(), 'tutorial', 'LLM+Langchain', 'Week-1', 'recipe_test.json'), 'r') as f:
    recipe_test = json.load(f)

# What you have in your fridge
existing_ingredients = ", ".join(recipe_test[0]['ingredients'])

print(f"existing_ingredients: {existing_ingredients}")

output = bm25_retriever.invoke(existing_ingredients)

In [None]:
output

In [None]:
output[0]

In [None]:
suggested_ingredients = output[0].page_content

### Validation

Let us learn some python....

In [None]:
existing_ingredients_as_list = recipe_test[0]['ingredients']
suggested_ingredients_as_list = suggested_ingredients.split(", ")

In [None]:
for suggested_ingredient in suggested_ingredients_as_list:
    if suggested_ingredient in existing_ingredients_as_list:
        print(f"- {suggested_ingredient}: yes")
    else:
        print(f"- {suggested_ingredient}: no")

In [None]:
prompt = my_chat_prompt_template.invoke(
    {"existing_ingredients": existing_ingredients, 
     "suggested_ingredients": suggested_ingredients})

In [None]:
prompt

In [None]:
output = model.invoke(prompt)
print(output.content)

In [None]:
final_output = output_parser.parse(output.content)
print(final_output)

In [None]:
final_output.keys()

In [None]:
print(final_output['used ingredients'])

In [None]:
print(final_output['extra ingredients'])

In [None]:
print(final_output['result'])

In [None]:
translated_result = model.invoke(f"Translate the content into traditional Chinese (繁體中文): {final_output['result']}")

In [None]:
print(translated_result.content)

# Semantic based retrieval

Semantic-based retrieval is a method of finding information that focuses on understanding the meaning behind the words you use. Instead of just matching exact words, it looks for the context and concepts in your query. Here's a simple way to understand it:

- 1. Meaning Over Words: Imagine you want to find information about "healthy eating". Traditional search might look for documents with the exact phrase "healthy eating". Semantic-based retrieval, however, understands that terms like "nutritious diet" or "balanced diet" are related and will include those in the results.

- 2. Context Awareness: This method takes into account the context in which words are used. For example, if you search for "apple", a traditional search might give you results about the fruit and the tech company. Semantic-based retrieval uses context to determine whether you’re likely asking about a fruit or a tech product.

- 3. Natural Language Understanding: It works more like how humans understand language. When you ask a question, it tries to grasp the intent behind your query and finds relevant information accordingly.

- 4. Better Results: By focusing on the meaning and context, semantic-based retrieval can provide more accurate and relevant results. This means you spend less time sifting through unrelated information.


語義檢索是一種尋找信息的方法，它重點在於理解你使用的詞語背後的意思。與其僅僅匹配精確的詞語，它會尋找你查詢中的上下文和概念。以下是一種簡單的理解方式：

- 1. 重點在於意思：想像一下你想找關於“健康飲食”的信息。傳統搜索可能會尋找包含“健康飲食”這個精確詞語的文檔。而語義檢索則會理解“營養均衡的飲食”或“均衡飲食”等相關詞語，並將它們包含在結果中。

- 2. 上下文感知：這種方法會考慮詞語使用的上下文。例如，如果你搜索“蘋果”，傳統搜索可能會給你關於水果和科技公司的結果。語義檢索則會使用上下文來判斷你更可能是在詢問水果還是科技產品。

- 3. 自然語言理解：它更像人類理解語言的方式。當你提出問題時，它會嘗試理解你查詢背後的意圖，並相應地找到相關信息。

- 4. 更好的結果：通過重點關注意思和上下文，語義檢索可以提供更準確和相關的結果。這意味著你可以減少篩選無關信息的時間。

In [None]:
'egg' == 'large egg'

In [None]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

# https://platform.openai.com/docs/guides/embeddings/what-are-embeddings

# A list of embedding models you can choose 
# https://www.sbert.net/docs/sentence_transformer/pretrained_models.html

### 1. Creating Embeddings (創建嵌入):

- HuggingFaceEmbeddings is used to create embeddings (vector representations) for text data.
- The model all-MiniLM-L6-v2 from HuggingFace is specified to generate these embeddings. This model converts text into numerical vectors that capture the semantic meaning of the text.

- 使用 HuggingFaceEmbeddings 創建文本數據的嵌入（向量表示）。
- 指定 HuggingFace 的模型 all-MiniLM-L6-v2 來生成這些嵌入。此模型將文本轉換為數字向量，這些向量捕捉文本的語義。

In [None]:
HuggingFaceEmbeddings?

In [None]:
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

### 2. Loading a vectorstore:

A PDF file is processed and the content is saved as vectorstore. I will show you how in Week-5.

In [None]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

path = os.path.join(get_project_dir(), 'tutorial', 'LLM+Langchain', 'Week-2', 'BertV2 Index')

vectorstore = FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)

### 3. Creating a Retriever (創建檢索器):

- The as_retriever method is called on the vectorstore object to create a retriever.
- This retriever is configured to use "similarity" as the search type, meaning it will find documents that are similar to a given query based on their vector embeddings.

- 在 vectorstore 對象上調用 as_retriever 方法來創建一個檢索器。
- 這個檢索器配置為使用“相似性”作為搜索類型，這意味著它將根據文檔的向量嵌入找到與給定查詢相似的文檔。

### 4. Setting Search Parameters (設置搜索參數):

- The search_kwargs argument is used to pass additional parameters to the search function.
- In this case, {'k': 5} is specified, which means the retriever will return the top 5 most similar documents for each query.

- 使用 search_kwargs 參數來傳遞額外的搜索功能參數。
- 在這裡，指定了 {'k': 5}，這意味著檢索器將返回每個查詢最相似的前 5 個文檔。

In [None]:
retriever = vectorstore.as_retriever(search_type="similarity", 
                                     search_kwargs={"k": 5})

In [None]:
retriever.invoke("What kind of attention does BERT use?")

In [None]:
retriever.invoke("Is it difficult to fine-tune BERT?")

## Runtime Configuration

What we learned last week: Runtime Configuration.

In [None]:
from langchain_core.runnables import ConfigurableField

query = "Is it difficult to fine-tune BERT?"

retriever = vectorstore.as_retriever(search_type="similarity").configurable_fields( \
                                        search_kwargs=ConfigurableField(
                                                id="hello_search",
                                            )
                                        )

In [None]:
retriever.invoke(query, config={"configurable": {"hello_search": {"k": 7}}})

In [None]:
retriever.invoke(query, config={"configurable": {"hello_search": {"k": 3}}})

## Three search types:

### 1. similarity (default)

- This search type finds documents that are most similar to your query. It looks at the meaning of the words you used and matches documents that have similar meanings. Think of it like finding articles or documents that closely relate to the topic you're interested in.

- 這種搜索類型找到與你的查詢最相似的文檔。它會看你使用詞語的意思，並匹配具有相似意思的文檔。可以把它想像成找到與你感興趣的主題密切相關的文章或文檔。

### 2. MMR, Maximum Marginal Relevance (MMR, 最大邊際相關性):

- This method balances finding documents that are similar to your query while also ensuring that the results are diverse. It's like asking for a variety of opinions on a topic so you don't get too much of the same thing. It helps avoid redundancy in the search results.

- 這種方法在找到與你的查詢相似的文檔的同時，也確保結果是多樣的。這就像是在一個主題上尋求多種意見，避免得到過多相同的東西。它有助於避免搜索結果的冗餘。

### 3. similarity_score_threshold (相似性分數閾值):

- This search type sets a minimum similarity score that documents must meet to be considered relevant. Only documents that are very close to your query in terms of meaning will be included. It ensures that the results are highly relevant and filters out less related information.

- 這種搜索類型設置一個最小相似性分數，只有達到這個分數的文檔才會被認為是相關的。只有那些在意思上與你的查詢非常接近的文檔才會被包含進來。它確保結果高度相關，並過濾掉不太相關的信息。

In [None]:
from IPython.display import Image
from IPython.core.display import HTML 
Image(url= "https://miro.medium.com/v2/resize:fit:720/format:webp/1*c0c19i2tPSWZaHwQ7cVMrg.png")

In [None]:
"""
cosine similarity

https://api.python.langchain.com/en/latest/_modules/langchain_core/vectorstores.html

elif search_type == "similarity_score_threshold":
    docs_and_similarities = self.similarity_search_with_relevance_scores(
        query, **kwargs
    )
    return [doc for doc, _ in docs_and_similarities]

in subclass.
Return docs and relevance scores in the range [0, 1].

0 is dissimilar, 1 is most similar.
"""

retriever = vectorstore.as_retriever(
    search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.5, "k": 5}
)

In [None]:
retriever.invoke(query)

### How to get the scores of the documents?

In [None]:
vectorstore.similarity_search_with_score(query)

In [None]:
vectorstore._similarity_search_with_relevance_scores(query)

### How to leverage the metadata?

In [None]:
# retriever = vectorstore.as_retriever(search_type='mmr', search_kwargs={'k': 8, 'fetch_k': 50, 'lambda_mult': 0.1,
#                                                                        "filter": {"cuisine": "korean"}})

### Multiple Condition Filtering

## CNN dataset

In [None]:
import pandas as pd


filename = os.path.join(get_project_dir(), 'tutorial', 'LLM+Langchain', 'Week-2', 'CNN_Articels_clean.csv')

df_cnn = pd.read_csv(filename, index_col=0)

In [None]:
df_cnn.head(5)

In [None]:
# The time format is a string. It will be shown how to transform this object properly later

df_cnn.iloc[0]

In [None]:
df_cnn.groupby(["Category", "Section"]).agg(n=('Category', 'count'))

### We create a subset of CNN news

In [None]:
df_cnn_filtered_1= df_cnn[(df_cnn['Category']=='business') & (df_cnn['Section']=='business')]
df_cnn_filtered_2= df_cnn[(df_cnn['Category']=='entertainment') & (df_cnn['Section']=='entertainment')]
df_cnn_filtered_3= df_cnn[(df_cnn['Category']=='news') & (df_cnn['Section'].isin(['africa', 'australia', 'us']))]
df_cnn_filtered_4= df_cnn[(df_cnn['Category']=='sport') & (df_cnn['Section'].isin(['motorsport', 'tennis']))]

df_cnn_filtered = pd.concat([df_cnn_filtered_1, df_cnn_filtered_2, 
                             df_cnn_filtered_3, df_cnn_filtered_4])

In [None]:
# Extract the year and month from `Date published`

df_cnn_filtered[['year', 'month']] = df_cnn_filtered.apply(lambda x: x['Date published'].split(" ")[0].split("-")[:2], 
                                                           axis=1, result_type='expand')

In [None]:
df_cnn_filtered.iloc[0]

1. Create a bunch of `Document` object storing the data: page_content will be the part used to for similarity calculation
2. We use the attributes `Category`, `Section`, `Year`, `ID` as the metadata
3. Store the information in the form of high dimension vectors in a vectorstore with an embedding model (all-MiniLM-L6-v2).

In [None]:
documents = []

for idx, row in df_cnn_filtered.iterrows():
    document = Document(page_content=row['Article text'],
                        metadata={"Category": row['Category'],
                                  "Section": row['Section'],
                                  "Year": row['year'],
                                  "ID": f"{idx}"})
    documents.append(document)

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
cnn_vectorstore = FAISS.from_documents(documents, embedding=embedding)

In [None]:
cnn_retriever = cnn_vectorstore.as_retriever(search_type="mmr")
cnn_retriever_configurable = cnn_retriever.configurable_fields(search_kwargs=ConfigurableField(id="hello_search"))

In [None]:
## BUG https://github.com/langchain-ai/langchain/discussions/26806

T = cnn_retriever_configurable.invoke("Russian", 
                                      config={"configurable": 
                                              {"hello_search": 
                                               {"k": 6, 'fetch_k': 100, 'lambda_mult': 0.1,
                                                "filter": {"Category": "sport",
                                                           "Section": "motorsport"}}}})

In [None]:
for document in T:
    print(document.metadata)

In [None]:
T = cnn_retriever_configurable.invoke("Russian", 
                                      config={"configurable": 
                                              {"hello_search": 
                                               {"k": 6,
                                                "filter": {"Category": "sport",
                                                           "Section": "motorsport",
                                                           "Year": "2020"}}}})

In [None]:
for document in T:
    print(document.metadata)

In [None]:
T = cnn_retriever_configurable.invoke("Russian", 
                                      config={"configurable": 
                                              {"hello_search": 
                                               {"k": 6, 'fetch_k': 120, 'lambda_mult': 0.1,
                                                "filter": {"Category": "sport",
                                                           "Section": "motorsport",
                                                           "Year": "2020"}}}})
for document in T:
    print(document.metadata)

# **** 預計第一個小時結束 ****

# LangChain Expression Language (LCEL)

From an `Operator` to a `Foreman`:

Assuming that you finished an LLM process and you want to hand it over to an intern to run it, who does not have too much knowledge of Langchain. How do you improve the chance that the workflow will run without getting mistake?  

### 食譜 - LCEL

In [None]:
response_schemas = [
        ResponseSchema(name="used ingredients", 
                       description="The actual ingredients used in cooking"),
        ResponseSchema(name="extra ingredients", 
                       description="extra ingredients that have to be prepared "),
        ResponseSchema(name="result", 
                       description="The dish and cooking recipe in detail")]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

format_instructions = output_parser.get_format_instructions()


system_template = """
                  You are an AI assistant as the best chef in the world. 
                  You have a great taste and cooking skills like Gordon Ramsay. You should be able to come up with a dish based on `suggested ingredient`, and tell us what extra ingredients 
                  has to be prepared by comparing the ingredients actually 
                  used in the cooking and the `existing ingredient`

                  The `suggested ingredients` are the ingredients suggested 
                  by some recipe. You have the freedom to add or remove 
                  ingredients to achieve the goal, but try to be as faithful 
                  to the `suggested ingredient` as possible. 
                  """

human_template = """
                 existing ingredients:[{existing_ingredients}];
                 suggested ingredients: [{suggested_ingredients}]\n; 
                 format instruction: {format_instructions}
                 """

input_ = {"system": {"template": system_template},
          "human": {"template": human_template,
                    "input_variable": ["existing_ingredients", 
                                       "suggested_ingredients"],
                    "partial_variables": {"format_instructions": 
                                          format_instructions}}}

cuisine_prompt_template = build_standard_chat_prompt_template(input_)

### Previously

1. my_chat_prompt_template = build_standard_chat_prompt_template(input_)
2. prompt = my_chat_prompt_template.invoke(
    {"existing_ingredients": existing_ingredients, 
     "suggested_ingredients": suggested_ingredients})
3. output = model.invoke(prompt)
4. final_output = output_parser.parse(output.content)

### LCEL

In [None]:
pipeline_ = cuisine_prompt_template|model|output_parser

In [None]:
pipeline_.invoke({"existing_ingredients": ", ".join(existing_ingredients), 
                  "suggested_ingredients": ", ".join(suggested_ingredients)})

#### How do we attach the translation to the process above?

- 1. Build the translation process 

In [None]:
system_template = """
                  You are a helpful AI assistant with native speaker 
                  fluency in both English and traditional Chinese (繁體中文). 
                  You will translate the given content.
                  """

input_ = {"system": {"template": system_template},
          "human": {"template": "{query}",
                    "input_variable": ["query"]}}

translation_prompt_template = build_standard_chat_prompt_template(input_)

translation_chain = translation_prompt_template|model

- 2. Connect the recipe chain with the translation chain

In [None]:
recipe_chain = cuisine_prompt_template|model

pipeline = {"query": recipe_chain}|translation_chain|output_parser

In [None]:
pipeline.invoke({"existing_ingredients": ", ".join(existing_ingredients), 
                 "suggested_ingredients": ", ".join(suggested_ingredients)})

#### What happens?

I know it looks mysterious, but it is very simple:

In [None]:
Image(filename= "tutorial/LLM+Langchain/Week-2/LCEL_1.png")

## Minimal Example

### 1. Creating a Prompt Template (創建提示模板):

- ChatPromptTemplate.from_template is used to create a prompt template. This template is a string that includes a placeholder {topic}.
- The template specifies the instruction: "tell me a short joke about {topic}".
- 使用 ChatPromptTemplate.from_template 創建一個提示模板。這個模板是一個包含佔位符 {topic} 的字符串。
- 模板指定了指令：“tell me a short joke about {topic}”（給我講一個關於{topic}的簡短笑話）。

In [None]:
## Official diagram flow

Image(filename= "tutorial/LLM+Langchain/Week-2/lcel pipeline.png")

In [None]:
prompt = ChatPromptTemplate.from_template("tell me a short joke about {topic}")
# prompt = ChatPromptTemplate(template="tell me a short joke about {topic}")

In [None]:
prompt

### 2. Setting Up the Chain (設置鏈條):

- chain = prompt | model sets up a chain where the prompt is connected to a model. This means that the model will process the prompt to generate a response.
- The | operator is used to combine the prompt and the model into a single chain.
- chain = prompt | model 設置了一個鏈條，其中提示連接到模型。這意味著模型將處理該提示來生成回應。
- | 運算符用於將提示和模型組合成一個鏈條。

In [None]:
# from the PromptTemplate to the ChatModel

pipeline_ = prompt | model

### 3. Getting the Joke (獲取笑話):

- The result of chain.invoke({"topic": "ice cream"}) is stored in the variable joke.
- This variable now contains the generated joke about ice cream.
- chain.invoke({"topic": "ice cream"}) 的結果存儲在變量 joke 中。
- 這個變量現在包含生成的關於冰淇淋的笑話。

In [None]:
# input -> prompt template -> model

joke = pipeline_.invoke({"topic": "ice cream"})

In [None]:
joke

In [None]:
print(joke.content)

### 1. Importing StrOutputParser (導入 StrOutputParser):

- The code imports StrOutputParser from the langchain_core.output_parsers module. This class is used to parse the output of the model into a string format.
- 代碼從 langchain_core.output_parsers 模塊導入 StrOutputParser。這個類用於將模型的輸出解析為字符串格式。

### 2. Creating an Output Parser:

- An instance of StrOutputParser is created and assigned to the variable output_parser.
- This parser will be used to process the raw output from the model and convert it into a readable string format.
- 創建一個 StrOutputParser 的實例，並將其賦值給變量 output_parser。
- 這個解析器將用於處理來自模型的原始輸出，並將其轉換為可讀的字符串格式。

In [None]:
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()

pipeline_ = prompt | model | output_parser

# input -> prompt template -> model -> output parser

pipeline_.invoke({"topic": "ice cream"})

## 範例操作

### Coercion

Do not ask me why this word is used....

In [None]:
Image(filename= "tutorial/LLM+Langchain/Week-2/LCEL_2.png")

In [None]:
joke_chain = prompt | model | output_parser

template = """
           is this a funny joke? {joke}
           """

human_prompt = PromptTemplate(template=template,
                              input_variables=['joke'])

human_message = HumanMessagePromptTemplate(prompt=human_prompt)

analysis_prompt = ChatPromptTemplate(messages=[human_message])

analysis_chain = analysis_prompt | model

composed_chain = {"joke": joke_chain} | analysis_chain | output_parser

In [None]:
print(composed_chain.invoke({"topic": "ice cream"}))

1. chain 執行結果，將結果放進'joke' 這個 key 裡
2. {"joke": content} 被送進analysis_prompt 中，等價於 analysis_prompt.invoke({"joke": content})
3. model 接收 analysis_prompt 產生的結果
4. output_parser 處理結果

## Parallelize steps

In [None]:
from langchain_core.runnables import RunnableParallel

joke_chain = ChatPromptTemplate.from_template("tell me a joke about {topic}") | model
poem_chain = ChatPromptTemplate.from_template("write a 2-line poem about {topic}") | model

map_chain = RunnableParallel(joke=joke_chain, poem=poem_chain)

map_chain.invoke({"topic": "bear"})

- Computation time

In [None]:
%%timeit

joke_chain.invoke({"topic": "bear"})

In [None]:
%%timeit

poem_chain.invoke({"topic": "bear"})

In [None]:
%%timeit

map_chain.invoke({"topic": "bear"})

RunnableParallel are also useful for running independent processes in parallel, since each Runnable in the map is executed in parallel. For example, we can see our earlier joke_chain, poem_chain and map_chain all have about the same runtime, even though map_chain executes both of the other two.



## Run custom function

In [None]:
from operator import itemgetter

from langchain_core.runnables import RunnableLambda


def length_function(text):
    return len(text)


def _multiple_length_function(text1, text2):
    return len(text1) * len(text2)


def multiple_length_function(_dict):
    return _multiple_length_function(_dict["text1"], _dict["text2"])

prompt = ChatPromptTemplate.from_template("what is {a} + {b}")

pipeline_ = (
    {
        "a": itemgetter("foo") | length_function,
        "b": {"text1": itemgetter("foo"), "text2": itemgetter("bar")}
        | multiple_length_function,
    }
    | prompt
    | model
)

- Oops, how to solve this error message? 

In [None]:
# wrap the function with RunnableLambda

pipeline_ = (
    {
        "a": itemgetter("foo") | RunnableLambda(length_function),
        "b": {"text1": itemgetter("foo"), "text2": itemgetter("bar")}
        | RunnableLambda(multiple_length_function),
    }
    | prompt
    | model
)


In [None]:
pipeline_.invoke({"foo": "bar", "bar": "gah"})

How does it work?

- 'bar' -> foo, foo ('bar') -> length_function => a = 3
- 'bar' -> foo & 'gah' -> bar, foo ('bar') -> 'text1' & bar ('gah') -> 'text2', {'text1': 'bar', 'text2': 'gah'} -> multiple_length_function => b = 9
- {'a':3, 'b': 9} -> prompt -> 'what is 3 + 9'

#### Decorator

- A very cool feature.
- This was a new discovery at the beginning of December, so it is not used in subsequent tutorials. However, feel free to adapt the code and experience its magic.
- Understanding programming remains key to building successful AI applications.

In [None]:
from operator import itemgetter
from langchain_core.runnables import chain, RunnableParallel

prompt = ChatPromptTemplate.from_template("what is {a} + {b}")

@chain
def length_function(text):
    return len(text)

def _multiple_length_function(text1, text2):
    return len(text1) * len(text2)

@chain
def multiple_length_function(_dict):
    return _multiple_length_function(_dict["text1"], _dict["text2"])

pipeline_ = RunnableParallel(
        a=itemgetter("foo") | length_function,
        b={"text1": itemgetter("foo"), "text2": itemgetter("bar")}
        | multiple_length_function)| prompt | model

pipeline_.invoke({"foo": "bar", "bar": "gah"})

## Passing data through

In [None]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

runnable = RunnableParallel(
    passed=RunnablePassthrough(),
    modified=lambda x: x["num"] + 1,
)

runnable.invoke({"num": 1})

In [None]:
runnable = RunnableParallel(
    passed_2=RunnablePassthrough(),
    modified=lambda x: x["num"] + 1,
)

runnable.invoke({"num": 1})

## RAG + LCEL

In [None]:
@chain
def chatbot_prompt_fn(data):

    system_template = """
                      You are a helpful assistant. Answer all questions to the best of your ability.
                      """
    
    human_template = """
                     question: {question}

                     Please answer the question based on the context:
                     {context}
                     """
    
    input_ = {"system": {"template": system_template},
              "human": {"template": human_template,
                        "input_variable": ["question", "context"]}}
    
    prompt_template = build_standard_chat_prompt_template(input_)
    
    return prompt_template


embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

path = os.path.join(get_project_dir(), 'tutorial', 'LLM+Langchain', 'Week-2', 'BertV2 Index')

vectorstore = FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)

retriever = vectorstore.as_retriever(search_type="similarity", 
                                     search_kwargs={"k": 5})

pipeline_ = RunnablePassthrough.assign(context=itemgetter('question')|retriever)|chatbot_prompt_fn|model|StrOutputParser()

In [None]:
pipeline_.invoke({"question": "Is it difficult to fine-tune BERT?"})

## Translation Template

In [None]:
from langchain_core.runnables import chain

@chain
def translation_function(text):

    """
    翻譯
    直接將給予內容text翻譯成繁體中文
    """
    
    system_template = """
                      You are a helpful AI assistant with native speaker 
                      fluency in both English and traditional Chinese 
                      (繁體中文). You will translate the given content into traditional Chinese 
                      (繁體中文).
                      """

    human_template = """
                     {query}
                     """

    input_ = {"system": {"template": system_template},
              "human": {"template": human_template,
                        "input_variable": ["query"]}}
    
    prompt_template = build_standard_chat_prompt_template(input_)

    return prompt_template

In [None]:
step_1 = RunnablePassthrough.assign(context=itemgetter('question')|retriever)|chatbot_prompt_fn|model|StrOutputParser()

In [None]:
pipeline_ = {"query": step_1}|translation_function|model|StrOutputParser()

In [None]:
pipeline_.invoke({"question": "Is it difficult to fine-tune BERT?"})