# 作業詳解

In [1]:
import os
import json

In [2]:
os.chdir("../../../")

In [3]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

from src.initialization import credential_init
from src.io.path_definition import get_project_dir

credential_init()

model = ChatOpenAI(openai_api_key=os.environ['OPENAI_API_KEY'],
                   model_name="gpt-4o-mini", temperature=0)

with open(os.path.join(get_project_dir(), 'tutorial', 'LLM+Langchain', 
                       'Week-1', 'recipe_train.json'), 'r') as f:
    recipe_train = json.load(f)

  warn_deprecated(


In [4]:
from langchain_community.retrievers import BM25Retriever
from langchain.docstore.document import Document

documents = []

for recipe in recipe_train:
    document = Document(page_content=", ".join(recipe['ingredients']),
                        metadata={"cuisine": recipe['cuisine'],
                                  "id": recipe['id']})
    documents.append(document)

bm25_retriever = BM25Retriever.from_documents(documents, k=10, bm25_params={"k1":2.5})

In [5]:
response_schemas = [
        ResponseSchema(name="used ingredients", description="The actual ingredients used in cooking"),
        ResponseSchema(name="extra ingredients", description="extra ingredients that have to be prepared "),
        ResponseSchema(name="result", description="The dish and cooking recipe in detail")
    ]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

format_instructions = output_parser.get_format_instructions()

# Define human prompt template

system_prompt = PromptTemplate.from_template("""You are an AI assistant as the best chef in the world. You have a great taste and
cooking skills like Gordon Ramsay. You should be able to come up with a dish based on `suggested ingredient`, and tell us what extra ingredients 
has to be prepared by comparing the ingredients actually used in the cooking and the `existing ingredient`

The `suggested ingredients` are the ingredients suggested by some recipe. You have the freedom to add or remove ingredients to achieve the goal, 
but try to be as faithful to the `suggested ingredient` as possible. 
""")

system_message = SystemMessagePromptTemplate(prompt=system_prompt)

human_prompt = PromptTemplate(template='existing ingredients:[{existing_ingredients}]; '
                                       'suggested ingredients: [{suggested_ingredients}]\n; '
                                       'format instruction: {format_instructions}',
                              input_variables=["existing_ingredients", "suggested_ingredients"],
                              partial_variables={"format_instructions": format_instructions}
                              )

human_message = HumanMessagePromptTemplate(prompt=human_prompt)

chat_prompt = ChatPromptTemplate.from_messages([system_message,
                                                human_message
                                                ])

In [6]:
with open(os.path.join(get_project_dir(), 'tutorial', 'LLM+Langchain', 'Week-1', 'recipe_test.json'), 'r') as f:
    recipe_test = json.load(f)

content = ", ".join(recipe_test[0]['ingredients'])

output = bm25_retriever.invoke(content)

In [7]:
existing_ingredients = content
print(existing_ingredients)

olive oil, balsamic vinegar, toasted pine nuts, kosher salt, golden raisins, part-skim ricotta cheese, grated parmesan cheese, baby spinach, fresh basil leaves, pepper, fusilli, scallions


In [8]:
output[0]

Document(page_content='Italian parsley leaves, toasted pine nuts, olive oil, fresh oregano, fresh leav spinach, salt, fresh basil leaves, grated parmesan cheese, garlic cloves', metadata={'cuisine': 'italian', 'id': 7983})

In [9]:
suggested_ingredients = output[0].page_content

In [10]:
prompt = chat_prompt.invoke({"existing_ingredients": existing_ingredients, 
                             "suggested_ingredients": suggested_ingredients})

In [11]:
prompt

ChatPromptValue(messages=[SystemMessage(content='You are an AI assistant as the best chef in the world. You have a great taste and\ncooking skills like Gordon Ramsay. You should be able to come up with a dish based on `suggested ingredient`, and tell us what extra ingredients \nhas to be prepared by comparing the ingredients actually used in the cooking and the `existing ingredient`\n\nThe `suggested ingredients` are the ingredients suggested by some recipe. You have the freedom to add or remove ingredients to achieve the goal, \nbut try to be as faithful to the `suggested ingredient` as possible. \n'), HumanMessage(content='existing ingredients:[olive oil, balsamic vinegar, toasted pine nuts, kosher salt, golden raisins, part-skim ricotta cheese, grated parmesan cheese, baby spinach, fresh basil leaves, pepper, fusilli, scallions]; suggested ingredients: [Italian parsley leaves, toasted pine nuts, olive oil, fresh oregano, fresh leav spinach, salt, fresh basil leaves, grated parmesan 

In [12]:
output = model.invoke(prompt)
print(output.content)

```json
{
	"used ingredients": "olive oil, balsamic vinegar, toasted pine nuts, kosher salt, golden raisins, part-skim ricotta cheese, grated parmesan cheese, baby spinach, fresh basil leaves, pepper, fusilli, scallions, garlic cloves, fresh oregano, Italian parsley leaves",
	"extra ingredients": "fresh oregano, Italian parsley leaves, garlic cloves",
	"result": "### Spinach and Ricotta Fusilli with Toasted Pine Nuts\n\n#### Ingredients:\n- 2 cups fusilli pasta\n- 2 tablespoons olive oil\n- 2 cloves garlic, minced\n- 4 cups baby spinach, chopped\n- 1 cup part-skim ricotta cheese\n- 1/2 cup grated parmesan cheese\n- 1/4 cup toasted pine nuts\n- 1/4 cup golden raisins\n- 1/4 cup balsamic vinegar\n- 1/4 cup scallions, chopped\n- 1/4 cup fresh basil leaves, chopped\n- 1/4 cup Italian parsley leaves, chopped\n- 1 teaspoon fresh oregano, chopped\n- Kosher salt and pepper to taste\n\n#### Instructions:\n1. **Cook the Pasta:** In a large pot of salted boiling water, cook the fusilli according 

In [13]:
final_output = output_parser.parse(output.content)
print(final_output)

{'used ingredients': 'olive oil, balsamic vinegar, toasted pine nuts, kosher salt, golden raisins, part-skim ricotta cheese, grated parmesan cheese, baby spinach, fresh basil leaves, pepper, fusilli, scallions, garlic cloves, fresh oregano, Italian parsley leaves', 'extra ingredients': 'fresh oregano, Italian parsley leaves, garlic cloves', 'result': '### Spinach and Ricotta Fusilli with Toasted Pine Nuts\n\n#### Ingredients:\n- 2 cups fusilli pasta\n- 2 tablespoons olive oil\n- 2 cloves garlic, minced\n- 4 cups baby spinach, chopped\n- 1 cup part-skim ricotta cheese\n- 1/2 cup grated parmesan cheese\n- 1/4 cup toasted pine nuts\n- 1/4 cup golden raisins\n- 1/4 cup balsamic vinegar\n- 1/4 cup scallions, chopped\n- 1/4 cup fresh basil leaves, chopped\n- 1/4 cup Italian parsley leaves, chopped\n- 1 teaspoon fresh oregano, chopped\n- Kosher salt and pepper to taste\n\n#### Instructions:\n1. **Cook the Pasta:** In a large pot of salted boiling water, cook the fusilli according to package i

In [14]:
final_output.keys()

dict_keys(['used ingredients', 'extra ingredients', 'result'])

In [15]:
final_output['used ingredients']

'olive oil, balsamic vinegar, toasted pine nuts, kosher salt, golden raisins, part-skim ricotta cheese, grated parmesan cheese, baby spinach, fresh basil leaves, pepper, fusilli, scallions, garlic cloves, fresh oregano, Italian parsley leaves'

In [16]:
suggested_ingredients

'Italian parsley leaves, toasted pine nuts, olive oil, fresh oregano, fresh leav spinach, salt, fresh basil leaves, grated parmesan cheese, garlic cloves'

In [17]:
final_output['extra ingredients']

'fresh oregano, Italian parsley leaves, garlic cloves'

In [18]:
final_output['result']

'### Spinach and Ricotta Fusilli with Toasted Pine Nuts\n\n#### Ingredients:\n- 2 cups fusilli pasta\n- 2 tablespoons olive oil\n- 2 cloves garlic, minced\n- 4 cups baby spinach, chopped\n- 1 cup part-skim ricotta cheese\n- 1/2 cup grated parmesan cheese\n- 1/4 cup toasted pine nuts\n- 1/4 cup golden raisins\n- 1/4 cup balsamic vinegar\n- 1/4 cup scallions, chopped\n- 1/4 cup fresh basil leaves, chopped\n- 1/4 cup Italian parsley leaves, chopped\n- 1 teaspoon fresh oregano, chopped\n- Kosher salt and pepper to taste\n\n#### Instructions:\n1. **Cook the Pasta:** In a large pot of salted boiling water, cook the fusilli according to package instructions until al dente. Drain and set aside.\n\n2. **Sauté the Garlic:** In a large skillet, heat the olive oil over medium heat. Add the minced garlic and sauté for about 1 minute until fragrant.\n\n3. **Add Spinach:** Add the chopped baby spinach to the skillet and cook until wilted, about 2-3 minutes.\n\n4. **Combine Ingredients:** Stir in the 

In [19]:
print(final_output['result'])

### Spinach and Ricotta Fusilli with Toasted Pine Nuts

#### Ingredients:
- 2 cups fusilli pasta
- 2 tablespoons olive oil
- 2 cloves garlic, minced
- 4 cups baby spinach, chopped
- 1 cup part-skim ricotta cheese
- 1/2 cup grated parmesan cheese
- 1/4 cup toasted pine nuts
- 1/4 cup golden raisins
- 1/4 cup balsamic vinegar
- 1/4 cup scallions, chopped
- 1/4 cup fresh basil leaves, chopped
- 1/4 cup Italian parsley leaves, chopped
- 1 teaspoon fresh oregano, chopped
- Kosher salt and pepper to taste

#### Instructions:
1. **Cook the Pasta:** In a large pot of salted boiling water, cook the fusilli according to package instructions until al dente. Drain and set aside.

2. **Sauté the Garlic:** In a large skillet, heat the olive oil over medium heat. Add the minced garlic and sauté for about 1 minute until fragrant.

3. **Add Spinach:** Add the chopped baby spinach to the skillet and cook until wilted, about 2-3 minutes.

4. **Combine Ingredients:** Stir in the ricotta cheese, grated par

In [20]:
translated_result = model.invoke(f"Translate the content into traditional Chinese (繁體中文): {final_output['result']}")

In [21]:
print(translated_result.content)

### 菠菜和瑞可達起司螺旋麵配烤松子

#### 材料：
- 2杯螺旋麵
- 2湯匙橄欖油
- 2瓣大蒜，切碎
- 4杯嬰兒菠菜，切碎
- 1杯部分脫脂瑞可達起司
- 1/2杯磨碎的帕爾馬起司
- 1/4杯烤松子
- 1/4杯金葡萄乾
- 1/4杯香醋
- 1/4杯青蔥，切碎
- 1/4杯新鮮羅勒葉，切碎
- 1/4杯義大利香菜葉，切碎
- 1茶匙新鮮牛至，切碎
- 猶太鹽和黑胡椒，依個人口味調整

#### 做法：
1. **煮麵條：** 在一大鍋加鹽的滾水中，根據包裝說明煮螺旋麵至剛好熟透（al dente）。瀝乾並放置一旁。

2. **炒大蒜：** 在一個大平底鍋中，用中火加熱橄欖油。加入切碎的大蒜，炒約1分鐘，直到散發香味。

3. **加入菠菜：** 將切碎的嬰兒菠菜加入平底鍋中，煮至萎縮，約2-3分鐘。

4. **混合材料：** 加入瑞可達起司、磨碎的帕爾馬起司、烤松子、金葡萄乾和煮熟的螺旋麵。充分攪拌混合。

5. **調味：** 淋上香醋，並根據個人口味加入猶太鹽和黑胡椒。加入切碎的新鮮羅勒、義大利香菜和牛至，攪拌至所有材料充分融合。

6. **上菜：** 將菜餚盛盤，若需要可用額外的帕爾馬起司和新鮮香草裝飾。享受美味的菠菜和瑞可達起司螺旋麵！


# Semantic based retrieval

Semantic-based retrieval is a method of finding information that focuses on understanding the meaning behind the words you use. Instead of just matching exact words, it looks for the context and concepts in your query. Here's a simple way to understand it:

- 1. Meaning Over Words: Imagine you want to find information about "healthy eating". Traditional search might look for documents with the exact phrase "healthy eating". Semantic-based retrieval, however, understands that terms like "nutritious diet" or "balanced diet" are related and will include those in the results.

- 2. Context Awareness: This method takes into account the context in which words are used. For example, if you search for "apple", a traditional search might give you results about the fruit and the tech company. Semantic-based retrieval uses context to determine whether you’re likely asking about a fruit or a tech product.

- 3. Natural Language Understanding: It works more like how humans understand language. When you ask a question, it tries to grasp the intent behind your query and finds relevant information accordingly.

- 4. Better Results: By focusing on the meaning and context, semantic-based retrieval can provide more accurate and relevant results. This means you spend less time sifting through unrelated information.


語義檢索是一種尋找信息的方法，它重點在於理解你使用的詞語背後的意思。與其僅僅匹配精確的詞語，它會尋找你查詢中的上下文和概念。以下是一種簡單的理解方式：

- 1. 重點在於意思：想像一下你想找關於“健康飲食”的信息。傳統搜索可能會尋找包含“健康飲食”這個精確詞語的文檔。而語義檢索則會理解“營養均衡的飲食”或“均衡飲食”等相關詞語，並將它們包含在結果中。

- 2. 上下文感知：這種方法會考慮詞語使用的上下文。例如，如果你搜索“蘋果”，傳統搜索可能會給你關於水果和科技公司的結果。語義檢索則會使用上下文來判斷你更可能是在詢問水果還是科技產品。

- 3. 自然語言理解：它更像人類理解語言的方式。當你提出問題時，它會嘗試理解你查詢背後的意圖，並相應地找到相關信息。

- 4. 更好的結果：通過重點關注意思和上下文，語義檢索可以提供更準確和相關的結果。這意味著你可以減少篩選無關信息的時間。

In [22]:
'egg' == 'large egg'

False

In [23]:
# !pip install sentence-transformers

In [24]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings


# https://platform.openai.com/docs/guides/embeddings/what-are-embeddings

# A list of embedding models you can choose 
# https://www.sbert.net/docs/sentence_transformer/pretrained_models.html

### 1. Creating Embeddings (創建嵌入):

- HuggingFaceEmbeddings is used to create embeddings (vector representations) for text data.
- The model all-MiniLM-L6-v2 from HuggingFace is specified to generate these embeddings. This model converts text into numerical vectors that capture the semantic meaning of the text.

- 使用 HuggingFaceEmbeddings 創建文本數據的嵌入（向量表示）。
- 指定 HuggingFace 的模型 all-MiniLM-L6-v2 來生成這些嵌入。此模型將文本轉換為數字向量，這些向量捕捉文本的語義。

In [25]:
HuggingFaceEmbeddings?

[1;31mInit signature:[0m
[0mHuggingFaceEmbeddings[0m[1;33m([0m[1;33m
[0m    [1;33m*[0m[1;33m,[0m[1;33m
[0m    [0mclient[0m[1;33m:[0m [0mAny[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mmodel_name[0m[1;33m:[0m [0mstr[0m [1;33m=[0m [1;34m'sentence-transformers/all-mpnet-base-v2'[0m[1;33m,[0m[1;33m
[0m    [0mcache_folder[0m[1;33m:[0m [0mOptional[0m[1;33m[[0m[0mstr[0m[1;33m][0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mmodel_kwargs[0m[1;33m:[0m [0mDict[0m[1;33m[[0m[0mstr[0m[1;33m,[0m [0mAny[0m[1;33m][0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mencode_kwargs[0m[1;33m:[0m [0mDict[0m[1;33m[[0m[0mstr[0m[1;33m,[0m [0mAny[0m[1;33m][0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mmulti_process[0m[1;33m:[0m [0mbool[0m [1;33m=[0m [1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mshow_progress[0m[1;33m:[0m [0mbool[0m [1;33m=[0m [1;32mFalse[0m

In [26]:
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  warn_deprecated(
  from .autonotebook import tqdm as notebook_tqdm


### 2. Initializing Vector Store (初始化向量存儲):

- Chroma.from_documents is used to create a vector store from a subset of documents.
- The first 500 documents from the documents list are selected for this operation.
- The embedding parameter is set to the previously created embeddings (HuggingFaceEmbeddings).

- 使用 Chroma.from_documents 從一部分文檔創建一個向量存儲。
- 選擇 documents 列表中的前 500 個文檔來進行此操作。
- embedding 參數設置為先前創建的嵌入（HuggingFaceEmbeddings）。

In [27]:
vectorstore = FAISS.from_documents(documents[:500], embedding=embedding)

### 3. Creating a Retriever (創建檢索器):

- The as_retriever method is called on the vectorstore object to create a retriever.
- This retriever is configured to use "similarity" as the search type, meaning it will find documents that are similar to a given query based on their vector embeddings.

- 在 vectorstore 對象上調用 as_retriever 方法來創建一個檢索器。
- 這個檢索器配置為使用“相似性”作為搜索類型，這意味著它將根據文檔的向量嵌入找到與給定查詢相似的文檔。

### 4. Setting Search Parameters (設置搜索參數):

- The search_kwargs argument is used to pass additional parameters to the search function.
- In this case, {'k': 5} is specified, which means the retriever will return the top 5 most similar documents for each query.

- 使用 search_kwargs 參數來傳遞額外的搜索功能參數。
- 在這裡，指定了 {'k': 5}，這意味著檢索器將返回每個查詢最相似的前 5 個文檔。

In [28]:
retriever = vectorstore.as_retriever(search_type="similarity",
                                     search_kwargs={'k': 5})

In [29]:
query = ", ".join(recipe_test[0]['ingredients'])

In [30]:
query

'olive oil, balsamic vinegar, toasted pine nuts, kosher salt, golden raisins, part-skim ricotta cheese, grated parmesan cheese, baby spinach, fresh basil leaves, pepper, fusilli, scallions'

In [31]:
retriever.invoke(query)

[Document(page_content='white onion, balsamic vinegar, shredded mozzarella cheese, unsalted butter, soppressata, prebaked pizza crusts, sweet potatoes, freshly ground pepper, kosher salt, extra-virgin olive oil, oregano', metadata={'cuisine': 'italian', 'id': 12421}),
 Document(page_content='fresh ginger, vegetable oil, rice vinegar, large eggs, crushed red pepper flakes, scallions, reduced sodium soy sauce, all purpose unbleached flour, dark brown sugar, kosher salt, sesame oil, garlic', metadata={'cuisine': 'korean', 'id': 18437}),
 Document(page_content='picholine olives, parmigiano reggiano cheese, cavatelli, fresh basil, whole grain dijon mustard, extra-virgin olive oil, prosciutto, red wine vinegar, flat leaf parsley, sugar, ground black pepper, salt', metadata={'cuisine': 'italian', 'id': 25019}),
 Document(page_content='fresh basil, chicken breasts, pepper, purple onion, mozzarella cheese, balsamic vinegar, tomatoes, olive oil, salt', metadata={'cuisine': 'italian', 'id': 39500

## Runtime Configuration

What we learned last week: Runtime Configuration. Although I do not use this in my work, but we can see what can be achieved with this functionality. Maybe in the future there will be some use cases in which I need this :).

In [32]:
from langchain_core.runnables import ConfigurableField

retriever = vectorstore.as_retriever(search_type="similarity").configurable_fields( \
                                        search_kwargs=ConfigurableField(
                                                id="hello_search",
                                            )
                                        )

In [33]:
retriever.invoke(query, config={"configurable": {"hello_search": {"k": 7}}})

[Document(page_content='white onion, balsamic vinegar, shredded mozzarella cheese, unsalted butter, soppressata, prebaked pizza crusts, sweet potatoes, freshly ground pepper, kosher salt, extra-virgin olive oil, oregano', metadata={'cuisine': 'italian', 'id': 12421}),
 Document(page_content='fresh ginger, vegetable oil, rice vinegar, large eggs, crushed red pepper flakes, scallions, reduced sodium soy sauce, all purpose unbleached flour, dark brown sugar, kosher salt, sesame oil, garlic', metadata={'cuisine': 'korean', 'id': 18437}),
 Document(page_content='picholine olives, parmigiano reggiano cheese, cavatelli, fresh basil, whole grain dijon mustard, extra-virgin olive oil, prosciutto, red wine vinegar, flat leaf parsley, sugar, ground black pepper, salt', metadata={'cuisine': 'italian', 'id': 25019}),
 Document(page_content='fresh basil, chicken breasts, pepper, purple onion, mozzarella cheese, balsamic vinegar, tomatoes, olive oil, salt', metadata={'cuisine': 'italian', 'id': 39500

In [34]:
retriever.invoke(query, config={"configurable": {"hello_search": {"k": 3}}})

[Document(page_content='white onion, balsamic vinegar, shredded mozzarella cheese, unsalted butter, soppressata, prebaked pizza crusts, sweet potatoes, freshly ground pepper, kosher salt, extra-virgin olive oil, oregano', metadata={'cuisine': 'italian', 'id': 12421}),
 Document(page_content='fresh ginger, vegetable oil, rice vinegar, large eggs, crushed red pepper flakes, scallions, reduced sodium soy sauce, all purpose unbleached flour, dark brown sugar, kosher salt, sesame oil, garlic', metadata={'cuisine': 'korean', 'id': 18437}),
 Document(page_content='picholine olives, parmigiano reggiano cheese, cavatelli, fresh basil, whole grain dijon mustard, extra-virgin olive oil, prosciutto, red wine vinegar, flat leaf parsley, sugar, ground black pepper, salt', metadata={'cuisine': 'italian', 'id': 25019})]

## Three search types:

### 1. similarity (default)

- This search type finds documents that are most similar to your query. It looks at the meaning of the words you used and matches documents that have similar meanings. Think of it like finding articles or documents that closely relate to the topic you're interested in.

- 這種搜索類型找到與你的查詢最相似的文檔。它會看你使用詞語的意思，並匹配具有相似意思的文檔。可以把它想像成找到與你感興趣的主題密切相關的文章或文檔。

### 2. MMR, Maximum Marginal Relevance (MMR, 最大邊際相關性):

- This method balances finding documents that are similar to your query while also ensuring that the results are diverse. It's like asking for a variety of opinions on a topic so you don't get too much of the same thing. It helps avoid redundancy in the search results.

- 這種方法在找到與你的查詢相似的文檔的同時，也確保結果是多樣的。這就像是在一個主題上尋求多種意見，避免得到過多相同的東西。它有助於避免搜索結果的冗餘。

### 3. similarity_score_threshold (相似性分數閾值):

- This search type sets a minimum similarity score that documents must meet to be considered relevant. Only documents that are very close to your query in terms of meaning will be included. It ensures that the results are highly relevant and filters out less related information.

- 這種搜索類型設置一個最小相似性分數，只有達到這個分數的文檔才會被認為是相關的。只有那些在意思上與你的查詢非常接近的文檔才會被包含進來。它確保結果高度相關，並過濾掉不太相關的信息。

In [35]:
from IPython.display import Image
from IPython.core.display import HTML 
Image(url= "https://miro.medium.com/v2/resize:fit:720/format:webp/1*c0c19i2tPSWZaHwQ7cVMrg.png")

In [36]:
"""
cosine similarity

https://api.python.langchain.com/en/latest/_modules/langchain_core/vectorstores.html

elif search_type == "similarity_score_threshold":
    docs_and_similarities = self.similarity_search_with_relevance_scores(
        query, **kwargs
    )
    return [doc for doc, _ in docs_and_similarities]

in subclass.
Return docs and relevance scores in the range [0, 1].

0 is dissimilar, 1 is most similar.
"""

retriever = vectorstore.as_retriever(
    search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.5, "k": 3}
)

In [37]:
query = ", ".join(recipe_test[0]['ingredients'])

In [38]:
retriever.invoke(query)

[Document(page_content='white onion, balsamic vinegar, shredded mozzarella cheese, unsalted butter, soppressata, prebaked pizza crusts, sweet potatoes, freshly ground pepper, kosher salt, extra-virgin olive oil, oregano', metadata={'cuisine': 'italian', 'id': 12421}),
 Document(page_content='fresh ginger, vegetable oil, rice vinegar, large eggs, crushed red pepper flakes, scallions, reduced sodium soy sauce, all purpose unbleached flour, dark brown sugar, kosher salt, sesame oil, garlic', metadata={'cuisine': 'korean', 'id': 18437}),
 Document(page_content='picholine olives, parmigiano reggiano cheese, cavatelli, fresh basil, whole grain dijon mustard, extra-virgin olive oil, prosciutto, red wine vinegar, flat leaf parsley, sugar, ground black pepper, salt', metadata={'cuisine': 'italian', 'id': 25019})]

### How to get the scores of the documents?

In [39]:
vectorstore.similarity_search_with_score(query)

[(Document(page_content='white onion, balsamic vinegar, shredded mozzarella cheese, unsalted butter, soppressata, prebaked pizza crusts, sweet potatoes, freshly ground pepper, kosher salt, extra-virgin olive oil, oregano', metadata={'cuisine': 'italian', 'id': 12421}),
  0.2848874),
 (Document(page_content='fresh ginger, vegetable oil, rice vinegar, large eggs, crushed red pepper flakes, scallions, reduced sodium soy sauce, all purpose unbleached flour, dark brown sugar, kosher salt, sesame oil, garlic', metadata={'cuisine': 'korean', 'id': 18437}),
  0.32091278),
 (Document(page_content='picholine olives, parmigiano reggiano cheese, cavatelli, fresh basil, whole grain dijon mustard, extra-virgin olive oil, prosciutto, red wine vinegar, flat leaf parsley, sugar, ground black pepper, salt', metadata={'cuisine': 'italian', 'id': 25019}),
  0.32712448),
 (Document(page_content='fresh basil, chicken breasts, pepper, purple onion, mozzarella cheese, balsamic vinegar, tomatoes, olive oil, sa

In [40]:
vectorstore._similarity_search_with_relevance_scores(query)

[(Document(page_content='white onion, balsamic vinegar, shredded mozzarella cheese, unsalted butter, soppressata, prebaked pizza crusts, sweet potatoes, freshly ground pepper, kosher salt, extra-virgin olive oil, oregano', metadata={'cuisine': 'italian', 'id': 12421}),
  0.798554185287482),
 (Document(page_content='fresh ginger, vegetable oil, rice vinegar, large eggs, crushed red pepper flakes, scallions, reduced sodium soy sauce, all purpose unbleached flour, dark brown sugar, kosher salt, sesame oil, garlic', metadata={'cuisine': 'korean', 'id': 18437}),
  0.7730803982398308),
 (Document(page_content='picholine olives, parmigiano reggiano cheese, cavatelli, fresh basil, whole grain dijon mustard, extra-virgin olive oil, prosciutto, red wine vinegar, flat leaf parsley, sugar, ground black pepper, salt', metadata={'cuisine': 'italian', 'id': 25019}),
  0.768688064422268),
 (Document(page_content='fresh basil, chicken breasts, pepper, purple onion, mozzarella cheese, balsamic vinegar, 

In [41]:
# vectorstore._select_relevance_score_fn?

In [42]:
vectorstore.similarity_search_with_score?

[1;31mSignature:[0m
[0mvectorstore[0m[1;33m.[0m[0msimilarity_search_with_score[0m[1;33m([0m[1;33m
[0m    [0mquery[0m[1;33m:[0m [1;34m'str'[0m[1;33m,[0m[1;33m
[0m    [0mk[0m[1;33m:[0m [1;34m'int'[0m [1;33m=[0m [1;36m4[0m[1;33m,[0m[1;33m
[0m    [0mfilter[0m[1;33m:[0m [1;34m'Optional[Union[Callable, Dict[str, Any]]]'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mfetch_k[0m[1;33m:[0m [1;34m'int'[0m [1;33m=[0m [1;36m20[0m[1;33m,[0m[1;33m
[0m    [1;33m**[0m[0mkwargs[0m[1;33m:[0m [1;34m'Any'[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m [1;33m->[0m [1;34m'List[Tuple[Document, float]]'[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Return docs most similar to query.

Args:
    query: Text to look up documents similar to.
    k: Number of Documents to return. Defaults to 4.
    filter (Optional[Dict[str, str]]): Filter by metadata.
        Defaults to None. If a callable, it must take as input the
        metadata 

### How to leverage the metadata?

In [43]:
# Debug filter 出問題

retriever = vectorstore.as_retriever(
    search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.5}
)

In [44]:
retriever.invoke(query)

[Document(page_content='white onion, balsamic vinegar, shredded mozzarella cheese, unsalted butter, soppressata, prebaked pizza crusts, sweet potatoes, freshly ground pepper, kosher salt, extra-virgin olive oil, oregano', metadata={'cuisine': 'italian', 'id': 12421}),
 Document(page_content='fresh ginger, vegetable oil, rice vinegar, large eggs, crushed red pepper flakes, scallions, reduced sodium soy sauce, all purpose unbleached flour, dark brown sugar, kosher salt, sesame oil, garlic', metadata={'cuisine': 'korean', 'id': 18437}),
 Document(page_content='picholine olives, parmigiano reggiano cheese, cavatelli, fresh basil, whole grain dijon mustard, extra-virgin olive oil, prosciutto, red wine vinegar, flat leaf parsley, sugar, ground black pepper, salt', metadata={'cuisine': 'italian', 'id': 25019}),
 Document(page_content='fresh basil, chicken breasts, pepper, purple onion, mozzarella cheese, balsamic vinegar, tomatoes, olive oil, salt', metadata={'cuisine': 'italian', 'id': 39500

In [45]:
retriever = vectorstore.as_retriever(
    search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.5,
                                                             "filter": {"cuisine": "mexican"}})

In [46]:
retriever.invoke(query)

[Document(page_content='avocado, chicken breasts, yellow onion, masa, kosher salt, achiote paste, liquid, white vinegar, lettuce leaves, purple onion, plum tomatoes, black beans, vegetable oil, sauce', metadata={'cuisine': 'mexican', 'id': 35072}),
 Document(page_content='kosher salt, chile de arbol, turkey breast, mint leaves, garlic cloves, masa, tomatoes, vegetable oil, low salt chicken broth, water, achiote paste, onions', metadata={'cuisine': 'mexican', 'id': 26778}),
 Document(page_content='kosher salt, lean ground beef, onions, chili powder, garlic, salsa verde, cilantro, sugar, lime wedges, corn tortillas', metadata={'cuisine': 'mexican', 'id': 27521})]

In [47]:
enhanced_docs = vectorstore.similarity_search(query, k=5,
                                               filter={"cuisine": 'mexican'}
                                               )

In [48]:
enhanced_docs

[Document(page_content='avocado, chicken breasts, yellow onion, masa, kosher salt, achiote paste, liquid, white vinegar, lettuce leaves, purple onion, plum tomatoes, black beans, vegetable oil, sauce', metadata={'cuisine': 'mexican', 'id': 35072}),
 Document(page_content='kosher salt, chile de arbol, turkey breast, mint leaves, garlic cloves, masa, tomatoes, vegetable oil, low salt chicken broth, water, achiote paste, onions', metadata={'cuisine': 'mexican', 'id': 26778}),
 Document(page_content='kosher salt, lean ground beef, onions, chili powder, garlic, salsa verde, cilantro, sugar, lime wedges, corn tortillas', metadata={'cuisine': 'mexican', 'id': 27521})]

In [49]:
vectorstore.similarity_search_with_score(query, k=5, 
                                         filter={'cuisine': 'mexican'})

[(Document(page_content='avocado, chicken breasts, yellow onion, masa, kosher salt, achiote paste, liquid, white vinegar, lettuce leaves, purple onion, plum tomatoes, black beans, vegetable oil, sauce', metadata={'cuisine': 'mexican', 'id': 35072}),
  0.36982745),
 (Document(page_content='kosher salt, chile de arbol, turkey breast, mint leaves, garlic cloves, masa, tomatoes, vegetable oil, low salt chicken broth, water, achiote paste, onions', metadata={'cuisine': 'mexican', 'id': 26778}),
  0.38994306),
 (Document(page_content='kosher salt, lean ground beef, onions, chili powder, garlic, salsa verde, cilantro, sugar, lime wedges, corn tortillas', metadata={'cuisine': 'mexican', 'id': 27521}),
  0.39436603)]

In [50]:
retriever = vectorstore.as_retriever(search_type='mmr', search_kwargs={'k': 8, 'fetch_k': 50, 'lambda_mult': 0.1})

In [51]:
retriever.invoke(query)

[Document(page_content='white onion, balsamic vinegar, shredded mozzarella cheese, unsalted butter, soppressata, prebaked pizza crusts, sweet potatoes, freshly ground pepper, kosher salt, extra-virgin olive oil, oregano', metadata={'cuisine': 'italian', 'id': 12421}),
 Document(page_content='celery ribs, baby spinach, chickpeas, kosher salt, orzo, carrots, parmigiano reggiano cheese, dry bread crumbs, homemade chicken stock, ground pork, freshly ground pepper', metadata={'cuisine': 'italian', 'id': 46525}),
 Document(page_content='fresh basil, purple onion, feta cheese, balsamic vinaigrette, tomatoes, kalamata, rotini, green bell pepper, freshly ground pepper', metadata={'cuisine': 'greek', 'id': 29557}),
 Document(page_content='sugar, unsalted butter, lemon, pure vanilla extract, blood orange, orange marmalade, plain whole-milk yogurt, eggs, almonds, pistachio nuts, kosher salt, flour, lemon juice', metadata={'cuisine': 'italian', 'id': 6401}),
 Document(page_content='pepper, dry whit

### Multiple Condition Filtering

## CNN dataset

In [52]:
import pandas as pd


filename = os.path.join(get_project_dir(), 'tutorial', 'LLM+Langchain', 'Week-2', 'CNN_Articels_clean.csv')

df_cnn = pd.read_csv(filename, index_col=0)

In [53]:
df_cnn.head(5)

Unnamed: 0_level_0,Author,Date published,Category,Section,Url,Headline,Description,Keywords,Second headline,Article text
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,"Jacopo Prisco, CNN",2021-07-15 02:46:59,news,world,https://www.cnn.com/2021/07/14/world/tusimple-...,"There's a shortage of truckers, but TuSimple t...",The e-commerce boom has exacerbated a global t...,"world, There's a shortage of truckers, but TuS...","There's a shortage of truckers, but TuSimple t...","(CNN)Right now, there's a shortage of truck d..."
2,"Stephanie Bailey, CNN",2021-05-12 07:52:09,news,world,https://www.cnn.com/2021/05/12/world/ironhand-...,Bioservo's robotic 'Ironhand' could protect fa...,Working in a factory can mean doing the same t...,"world, Bioservo's robotic 'Ironhand' could pro...",A robotic 'Ironhand' could protect factory wor...,(CNN)Working in a factory or warehouse can me...
3,"Words by Stephanie Bailey, video by Zahra Jamshed",2021-06-16 02:51:30,news,asia,https://www.cnn.com/2021/06/15/asia/swarm-robo...,This swarm of robots gets smarter the more it ...,"In a Hong Kong warehouse, a swarm of autonomou...","asia, This swarm of robots gets smarter the mo...",This swarm of robots gets smarter the more it ...,"(CNN)In a Hong Kong warehouse, a swarm of aut..."
4,"Paul R. La Monica, CNN Business",2022-03-15 09:57:36,business,investing,https://www.cnn.com/2022/03/15/investing/brics...,Russia is no longer an option for investors. T...,"For many years, the world's most popular emerg...","investing, Russia is no longer an option for i...",Russia is no longer an option for investors. T...,"New York (CNN Business)For many years, the wor..."
7,Reuters,2022-03-15 11:27:02,business,business,https://www.cnn.com/2022/03/15/business/russia...,Russian energy investment ban part of new EU s...,The European Union formally approved on Tuesda...,"business, Russian energy investment ban part o...",EU bans investment in Russian energy in new sa...,The European Union formally approved on Tuesda...


In [54]:
# The time format is a string. It will be shown how to transform this object properly later

df_cnn.iloc[0]['Date published']

'2021-07-15 02:46:59'

In [55]:
df_cnn.iloc[0]['Headline']

"There's a shortage of truckers, but TuSimple thinks it has a solution: no driver needed - CNN"

In [56]:
df_cnn.iloc[0]['Description']

'The e-commerce boom has exacerbated a global truck driver shortage, but could autonomous trucks help fix the problem?'

In [57]:
df_cnn.groupby(["Category", "Section"]).agg(n=('Category', 'count'))

Unnamed: 0_level_0,Unnamed: 1_level_0,n
Category,Section,Unnamed: 2_level_1
business,business,23
business,business-food,1
business,business-money,2
business,cars,1
business,economy,8
business,energy,3
business,homes,6
business,investing,9
business,media,2
business,perspectives,25


In [58]:
df_cnn_filtered_1= df_cnn[(df_cnn['Category']=='business') & (df_cnn['Section']=='business')]
df_cnn_filtered_2= df_cnn[(df_cnn['Category']=='entertainment') & (df_cnn['Section']=='entertainment')]
df_cnn_filtered_3= df_cnn[(df_cnn['Category']=='news') & (df_cnn['Section'].isin(['africa', 'australia', 'us']))]
df_cnn_filtered_4= df_cnn[(df_cnn['Category']=='sport') & (df_cnn['Section'].isin(['motorsport']))]

df_cnn_filtered = pd.concat([df_cnn_filtered_1, df_cnn_filtered_2, 
                             df_cnn_filtered_3, df_cnn_filtered_4])

In [59]:
df_cnn_filtered[['year', 'month']] = df_cnn_filtered.apply(lambda x: x['Date published'].split(" ")[0].split("-")[:2], 
                                                           axis=1, result_type='expand')

In [60]:
df_cnn_filtered.iloc[0]

Author                                                       Reuters
Date published                                   2022-03-15 11:27:02
Category                                                    business
Section                                                     business
Url                https://www.cnn.com/2022/03/15/business/russia...
Headline           Russian energy investment ban part of new EU s...
Description        The European Union formally approved on Tuesda...
Keywords           business, Russian energy investment ban part o...
Second headline    EU bans investment in Russian energy in new sa...
Article text       The European Union formally approved on Tuesda...
year                                                            2022
month                                                             03
Name: 7, dtype: object

In [61]:
documents = []

for idx, row in df_cnn_filtered.iterrows():
    document = Document(page_content=row['Article text'],
                        metadata={"Category": row['Category'],
                                  "Section": row['Section'],
                                  "Year": row['year'],
                                  "ID": f"{idx}"})
    documents.append(document)


cnn_vectorstore = FAISS.from_documents(documents, embedding=embedding)

In [62]:
cnn_retriever = cnn_vectorstore.as_retriever(search_type="similarity")
cnn_retriever_configurable = cnn_retriever.configurable_fields(search_kwargs=ConfigurableField(id="hello_search"))

In [63]:
cnn_vectorstore.as_retriever?

[1;31mSignature:[0m [0mcnn_vectorstore[0m[1;33m.[0m[0mas_retriever[0m[1;33m([0m[1;33m**[0m[0mkwargs[0m[1;33m:[0m [1;34m'Any'[0m[1;33m)[0m [1;33m->[0m [1;34m'VectorStoreRetriever'[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Return VectorStoreRetriever initialized from this VectorStore.

Args:
    search_type (Optional[str]): Defines the type of search that
        the Retriever should perform.
        Can be "similarity" (default), "mmr", or
        "similarity_score_threshold".
    search_kwargs (Optional[Dict]): Keyword arguments to pass to the
        search function. Can include things like:
            k: Amount of documents to return (Default: 4)
            score_threshold: Minimum relevance threshold
                for similarity_score_threshold
            fetch_k: Amount of documents to pass to MMR algorithm (Default: 20)
            lambda_mult: Diversity of results returned by MMR;
                1 for minimum diversity and 0 for maximum. (De

In [64]:
len(cnn_retriever_configurable.invoke("Russian", config={"configurable": {"hello_search": {"k": 3}}}))

3

In [65]:
len(cnn_retriever_configurable.invoke("Russian", config={"configurable": {"hello_search": {"k": 6}}}))

6

In [66]:
for idx, document in enumerate(cnn_retriever_configurable.invoke("Russian", config={"configurable": {"hello_search": {"k": 40}}})):
    metadata = document.metadata
    if metadata['Category']=='sport':
        print(idx, metadata)

0 {'Category': 'sport', 'Section': 'motorsport', 'Year': '2022', 'ID': '130'}
9 {'Category': 'sport', 'Section': 'motorsport', 'Year': '2021', 'ID': '1878'}
11 {'Category': 'sport', 'Section': 'motorsport', 'Year': '2021', 'ID': '4251'}
16 {'Category': 'sport', 'Section': 'motorsport', 'Year': '2021', 'ID': '3515'}
18 {'Category': 'sport', 'Section': 'motorsport', 'Year': '2021', 'ID': '3803'}
27 {'Category': 'sport', 'Section': 'motorsport', 'Year': '2021', 'ID': '2029'}
29 {'Category': 'sport', 'Section': 'motorsport', 'Year': '2021', 'ID': '3699'}
37 {'Category': 'sport', 'Section': 'motorsport', 'Year': '2021', 'ID': '3502'}
38 {'Category': 'sport', 'Section': 'motorsport', 'Year': '2021', 'ID': '4492'}
39 {'Category': 'sport', 'Section': 'motorsport', 'Year': '2020', 'ID': '689'}


In [67]:
## BUG https://github.com/langchain-ai/langchain/discussions/26806

T = cnn_retriever_configurable.invoke("Russian", config={"configurable": {"hello_search": {"k": 6, 'fetch_k': 50,
                                                                                           "filter": {"Category": "sport"}}}})

In [68]:
for document in T:
    print(document.metadata)

{'Category': 'sport', 'Section': 'motorsport', 'Year': '2022', 'ID': '130'}
{'Category': 'sport', 'Section': 'motorsport', 'Year': '2021', 'ID': '1878'}
{'Category': 'sport', 'Section': 'motorsport', 'Year': '2021', 'ID': '4251'}
{'Category': 'sport', 'Section': 'motorsport', 'Year': '2021', 'ID': '3515'}
{'Category': 'sport', 'Section': 'motorsport', 'Year': '2021', 'ID': '3803'}
{'Category': 'sport', 'Section': 'motorsport', 'Year': '2021', 'ID': '2029'}


#### Some template/or reference we need.

I do not memorize everything. I always keep a template and I remember where to find it.

In [None]:
# retriever.invoke(query, config={"configurable": {"hello_search": {"k": 7}}})

# What if we have more than one condition?

# template

# filter = {'$and': [{'brand': {'$eq': brand}},  {'category': {'$eq': category}}}]# {
# "filter": filter

# greater than: '$gt' 
# less than: '$lt}

# retriever = vectorstore.as_retriever(
#     search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.5,
#                                                              "filter": {"cuisine": "mexican"}})

# **** 預計第一個小時結束 ****

# LangChain Expression Language (LCEL)

From an `Operator` to a `Foreman`:

Assuming that you finished an LLM process and you want to hand it over to an intern to run it, who does not have too much knowledge of Langchain. How do you improve the chance that the workflow will run without getting mistake?  

In [None]:
# We borrow the translation from the previous class.

def translation_function(text):

    """
    翻譯
    直接將給予內容text翻譯成繁體中文
    """
    
    system_prompt = PromptTemplate.from_template("""You are a helpful AI assistant with native speaker fluency in both English and traditional Chinese (繁體中文). 
    You will translate the given content.""")
    system_message = SystemMessagePromptTemplate(prompt=system_prompt)
    
    human_prompt = PromptTemplate(template='{query}',
                                  input_variables=["query"]
                                  )
    human_message = HumanMessagePromptTemplate(prompt=human_prompt)
    
    translation_prompt_template =  ChatPromptTemplate.from_messages([system_message,
                                                                     human_message
                                                                    ])
    
    prompt = translation_prompt_template.invoke({"query": text})
    output = model.invoke(prompt)
    return output.content

### 食譜 - LCEL

In [None]:
response_schemas = [
        ResponseSchema(name="used ingredients", description="The actual ingredients used in cooking"),
        ResponseSchema(name="extra ingredients", description="extra ingredients that have to be prepared "),
        ResponseSchema(name="result", description="The dish and cooking recipe in detail")
    ]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

format_instructions = output_parser.get_format_instructions()

# Define human prompt template

system_prompt = PromptTemplate.from_template("""You are an AI assistant as the best chef in the world. You have a great taste and
cooking skills like Gordon Ramsay. You should be able to come up with dish based on `suggested ingredient`, and tell us what extra ingredients to be prepared by 
comparing the ingredients actually used in the cooking and the `existing ingredient`

The `suggested ingredients` are the ingredients suggested by some recipe. You have the freedom to add or remove ingredients to achieve the goal, but try to be as 
faithful to the `suggested ingredient` as possible. 
""")

system_message = SystemMessagePromptTemplate(prompt=system_prompt)

human_prompt = PromptTemplate(template='existing ingredients:[{existing_ingredients}]; '
                                       'suggested ingredients: [{suggested_ingredients}]\n; '
                                       'format instruction: {format_instructions}',
                              input_variables=["existing_ingredients", "suggested_ingredients"],
                              partial_variables={"format_instructions": format_instructions}
                              )

human_message = HumanMessagePromptTemplate(prompt=human_prompt)

chat_prompt = ChatPromptTemplate.from_messages([system_message,
                                                human_message
                                                ])

In [None]:
chain = chat_prompt|model|output_parser

In [None]:
chain.invoke({"existing_ingredients": ", ".join(existing_ingredients), "suggested_ingredients": ", ".join(suggested_ingredients)})

#### How do we attach the translation to the process above?

- 1. Build the translation process 

In [None]:
system_prompt = PromptTemplate.from_template("""You are a helpful AI assistant with native speaker fluency in both English and traditional Chinese (繁體中文). 
    You will translate the given content.""")
system_message = SystemMessagePromptTemplate(prompt=system_prompt)

human_prompt = PromptTemplate(template='{query}',
                              input_variables=["query"]
                              )
human_message = HumanMessagePromptTemplate(prompt=human_prompt)

translation_prompt_template =  ChatPromptTemplate.from_messages([system_message,
                                                                 human_message
                                                                ])

translation_chain=translation_prompt_template|model

- 2. Connect the recipe chain with the translation chain

In [None]:
recipe_chain = chat_prompt|model


pipeline = {"query": recipe_chain}|translation_chain|output_parser

In [None]:
pipeline.invoke({"existing_ingredients": ", ".join(existing_ingredients), "suggested_ingredients": ", ".join(suggested_ingredients)})

#### What happens?

I know it looks mysterious, but it is very simple:

In [None]:
Image(filename= "tutorial/LLM+Langchain/Week-2/LCEL_1.png")

## Minimal Example

### 1. Creating a Prompt Template (創建提示模板):

- ChatPromptTemplate.from_template is used to create a prompt template. This template is a string that includes a placeholder {topic}.
- The template specifies the instruction: "tell me a short joke about {topic}".
- 使用 ChatPromptTemplate.from_template 創建一個提示模板。這個模板是一個包含佔位符 {topic} 的字符串。
- 模板指定了指令：“tell me a short joke about {topic}”（給我講一個關於{topic}的簡短笑話）。

In [None]:
## Official diagram flow

Image(filename= "tutorial/LLM+Langchain/Week-2/lcel pipeline.png")

In [None]:
prompt = ChatPromptTemplate.from_template("tell me a short joke about {topic}")

In [None]:
prompt

### 2. Setting Up the Chain (設置鏈條):

- chain = prompt | model sets up a chain where the prompt is connected to a model. This means that the model will process the prompt to generate a response.
- The | operator is used to combine the prompt and the model into a single chain.
- chain = prompt | model 設置了一個鏈條，其中提示連接到模型。這意味著模型將處理該提示來生成回應。
- | 運算符用於將提示和模型組合成一個鏈條。

In [None]:
# from the PromptTemplate to the ChatModel

chain = prompt | model

### 3. Getting the Joke (獲取笑話):

- The result of chain.invoke({"topic": "ice cream"}) is stored in the variable joke.
- This variable now contains the generated joke about ice cream.
- chain.invoke({"topic": "ice cream"}) 的結果存儲在變量 joke 中。
- 這個變量現在包含生成的關於冰淇淋的笑話。

In [None]:
# input -> prompt template -> model

joke = chain.invoke({"topic": "ice cream"})

In [None]:
joke

In [None]:
joke.content

In [None]:
print(joke.content)

### 1. Importing StrOutputParser (導入 StrOutputParser):

- The code imports StrOutputParser from the langchain_core.output_parsers module. This class is used to parse the output of the model into a string format.
- 代碼從 langchain_core.output_parsers 模塊導入 StrOutputParser。這個類用於將模型的輸出解析為字符串格式。

### 2. Creating an Output Parser:

- An instance of StrOutputParser is created and assigned to the variable output_parser.
- This parser will be used to process the raw output from the model and convert it into a readable string format.
- 創建一個 StrOutputParser 的實例，並將其賦值給變量 output_parser。
- 這個解析器將用於處理來自模型的原始輸出，並將其轉換為可讀的字符串格式。

In [None]:
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()

chain = prompt | model | output_parser

# input -> prompt template -> model -> output parser

chain.invoke({"topic": "ice cream"})

## 範例操作

### Coercion

Do not ask me why this word is used....

In [None]:
Image(filename= "tutorial/LLM+Langchain/Week-2/LCEL_2.png")

In [None]:
joke_chain = prompt | model | output_parser

analysis_prompt = ChatPromptTemplate.from_template("is this a funny joke? {joke}")

analysis_chain = analysis_prompt | model

composed_chain = {"joke": joke_chain} | analysis_chain | output_parser

In [None]:
composed_chain.invoke({"topic": "ice cream"})

In [None]:
print(composed_chain.invoke({"topic": "ice cream"}))

1. chain 執行結果，將結果放進'joke' 這個 key 裡
2. {"joke": content} 被送進analysis_prompt 中，等價於 analysis_prompt.invoke({"joke": content})
3. model 接收 analysis_prompt 產生的結果
4. output_parser 處理結果

## Parallelize steps

In [None]:
from langchain_core.runnables import RunnableParallel

joke_chain = ChatPromptTemplate.from_template("tell me a joke about {topic}") | model
poem_chain = ChatPromptTemplate.from_template("write a 2-line poem about {topic}") | model

map_chain = RunnableParallel(joke=joke_chain, poem=poem_chain)

map_chain.invoke({"topic": "bear"})

In [None]:
type(joke_chain)

In [None]:
%%timeit

joke_chain.invoke({"topic": "bear"})

In [None]:
%%timeit

poem_chain.invoke({"topic": "bear"})

In [None]:
%%timeit

map_chain.invoke({"topic": "bear"})

RunnableParallel are also useful for running independent processes in parallel, since each Runnable in the map is executed in parallel. For example, we can see our earlier joke_chain, poem_chain and map_chain all have about the same runtime, even though map_chain executes both of the other two.



## Run custom function

In [None]:
from operator import itemgetter

from langchain_core.runnables import RunnableLambda



def length_function(text):
    return len(text)


def _multiple_length_function(text1, text2):
    return len(text1) * len(text2)


def multiple_length_function(_dict):
    return _multiple_length_function(_dict["text1"], _dict["text2"])

# chain = (
#     {
#         "a": itemgetter("foo") | RunnableLambda(length_function),
#         "b": {"text1": itemgetter("foo"), "text2": itemgetter("bar")}
#         | RunnableLambda(multiple_length_function),
#     }
#     | prompt
#     | model
# )

prompt = ChatPromptTemplate.from_template("what is {a} + {b}")

chain = (
    {
        "a": itemgetter("foo") | length_function,
        "b": {"text1": itemgetter("foo"), "text2": itemgetter("bar")}
        | multiple_length_function,
    }
    | prompt
    | model
)

Oops, how to solve this error message?

In [None]:
chain = (
    {
        "a": itemgetter("foo") | RunnableLambda(length_function),
        "b": {"text1": itemgetter("foo"), "text2": itemgetter("bar")}
        | RunnableLambda(multiple_length_function),
    }
    | prompt
    | model
)


In [None]:
chain.invoke({"foo": "bar", "bar": "gah"})

How does it work?

- 'bar' -> 'foo', 'foo' ('bar') -> length_function => a = 3
- 'bar' -> 'foo' & 'gah' -> 'bar', 'foo' ('bar') -> 'text1' & 'bar' ('gah') -> 'text2', {'text1': 'bar', 'text2': 'gah'} -> multiple_length_function => b = 9
- {'a':3, 'b': 9} -> prompt -> 'what is 3 + 9'

#### Decorator

- Something very cool
- This is a new discovery in the beginning of December. So it is not used in subsequent tutorials. But feel free to adapt the code to experience the magic.
- Knowing of programming is still the key to successful AI application:

  you can only get a frog from a frog - Joerg Schmalian

In [None]:
from operator import itemgetter
from langchain_core.runnables import chain, RunnableParallel

prompt = ChatPromptTemplate.from_template("what is {a} + {b}")

@chain
def length_function(text):
    return len(text)

def _multiple_length_function(text1, text2):
    return len(text1) * len(text2)

@chain
def multiple_length_function(_dict):
    return _multiple_length_function(_dict["text1"], _dict["text2"])

chain =    RunnableParallel(
        a=itemgetter("foo") | length_function,
        b={"text1": itemgetter("foo"), "text2": itemgetter("bar")}
        | multiple_length_function)| prompt | model

chain.invoke({"foo": "bar", "bar": "gah"})

## Passing data through

In [None]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

runnable = RunnableParallel(
    passed=RunnablePassthrough(),
    modified=lambda x: x["num"] + 1,
)

runnable.invoke({"num": 1})

In [None]:
runnable = RunnableParallel(
    passed_2=RunnablePassthrough(),
    modified=lambda x: x["num"] + 1,
)

runnable.invoke({"num": 1})

### Retrieval Example: Step by Step

### 1. Creating a Template (創建模板):

- A template is created that instructs the model to answer a question based only on a provided context. The template looks like this:
- 創建一個模板，指示模型僅基於提供的上下文來回答問題。模板如下

In [None]:
# context: something that will be generated with the question
# question

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

### 2. Generating a Prompt (生成提示):

- The ChatPromptTemplate.from_template(template) command uses the template to create a prompt that can later be filled with specific context and a question.
- 使用 ChatPromptTemplate.from_template(template) 命令來創建一個提示，之後可以用特定的上下文和問題來填充。

In [None]:
prompt = ChatPromptTemplate.from_template(template)

### 3. Formulating a Query (制定查詢):

- A query is created by joining the ingredients from the 6th recipe in recipe_test with commas. This query is used to retrieve relevant information.
- 通過將 recipe_test 中第六個食譜的成分用逗號連接來創建查詢。此查詢用於檢索相關信息。

In [None]:
query = ", ".join(recipe_test[5]['ingredients'])

### 4. Retrieving Context (檢索上下文):

- The retriever.invoke(query) command uses the query to find the most relevant documents or information. This retrieved information is stored in the context variable.
- 使用 retriever.invoke(query) 命令，通過查詢找到最相關的文檔或信息。這些檢索到的信息存儲在 context 變量中。

In [None]:
context = retriever.invoke(query)

In [None]:
context

### 5. Filling the Prompt (填充提示):

- The prompt is filled with the retrieved context and the question using prompt.invoke({"context": context, "question": question}). This creates an input prompt for the model.
- 使用 prompt.invoke({"context": context, "question": question}) 將提示填充檢索到的上下文和問題。這創建了模型的輸入提示。

In [None]:
question = "Show me all the ingredients."

In [None]:
prompt_as_input = prompt.invoke({"context": context, "question": question})

In [None]:
prompt_as_input

### 6. Getting the Model's Response (獲取模型的回應):

- The model is invoked with the filled prompt using model.invoke(prompt_as_input). The model processes the prompt and generates an output.
- 使用 model.invoke(prompt_as_input) 調用模型。模型處理提示並生成輸出。

In [None]:
output = model.invoke(prompt_as_input)

In [None]:
print(output.content)

### 7. Parsing the Output (解析輸出):

- The output from the model is parsed using output_parser.parse(output.content). This ensures the output is in a readable format.
- 使用 output_parser.parse(output.content) 解析模型的輸出。這確保輸出是可讀的格式。

In [None]:
print(output_parser.parse(output.content))

In [None]:
from langchain_core.runnables import RunnablePassthrough

chain = RunnablePassthrough.assign(context=itemgetter("query")|retriever) | prompt | model | output_parser

chain.invoke({"query": query, "question": question})

## Translation Template

In [None]:
system_prompt = PromptTemplate.from_template('''You are an AI assistant with a linquistic PhD degree and translation expert. 
If you are not able to identify the language used by the given text, answer 'I do not know'.
''')

# Define the response schema for translation
response_schemas = [
    ResponseSchema(name="translate", description="the translated result")]

# Create an output parser based on the response schemas
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

# Get format instructions from the output parser
format_instructions = output_parser.get_format_instructions()

# Define a prompt template for text translation
prompt = PromptTemplate(template="Translate the product name to English: \n\n "
                                 "product: {product}\n{format_instructions}",
                        input_variables=['product'],
                        partial_variables={"format_instructions": format_instructions})

# Create a human message prompt template
human_message = HumanMessagePromptTemplate(prompt=prompt)

# Create a chat prompt template from system prompt and human message
chat_prompt = ChatPromptTemplate.from_messages([("system", system_prompt.template),
                                                human_message])

# Construct the processing chain
chain = chat_prompt | model | output_parser

In [None]:
chain.invoke({"product": 'Сыворотка Ревиталифт Филлер для лица и шеи с 1,5% чистой гиалуроновой кислотой'})

In [None]:
chain.invoke({"product": 'Felt Liner Noir Infaillible Grip Precision'})

In [None]:
system_prompt = PromptTemplate.from_template('''You are an AI assistant with native Chinese proficiency and translation expert. 
If you are not able to identify the language used by the given text, answer 'I do not know'.
''')

# Define the response schema for translation
response_schemas = [
    ResponseSchema(name="translate", description="the translated result")]

# Create an output parser based on the response schemas
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

# Get format instructions from the output parser
format_instructions = output_parser.get_format_instructions()

# Define a prompt template for text translation
prompt = PromptTemplate(template="Translate the message to English: \n\n "
                                 "message: {product}\n{format_instructions}",
                        input_variables=['product'],
                        partial_variables={"format_instructions": format_instructions})

# Create a human message prompt template
human_message = HumanMessagePromptTemplate(prompt=prompt)

# Create a chat prompt template from system prompt and human message
chat_prompt = ChatPromptTemplate.from_messages([("system", system_prompt.template),
                                                human_message])

# Construct the processing chain
chain = chat_prompt | model

chain.invoke({"product": "老師這最後做出來可以怎麼運用?就是找不同國家的語言來學習嗎? 能不能用中文"})

## 回家作業

1. 根據食譜 - LCEL, 配合LCEL, 完成從給 材料 -> 中文食譜
2. 根據 retrieval example -> 要求將食材分類 (肉，香料，奶製品，等等)