In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
MODEL = "llama3.2"

In [2]:
# from langchain_community.embeddings import OllamaEmbeddings
from langchain_ollama import OllamaEmbeddings

embedding = OllamaEmbeddings(model="llama3.2")

In [3]:
from langchain_ollama import OllamaLLM

# if MODEL.startswith("gpt"):
#     model = ChatOpenAI(api_key=OPENAI_API_KEY, model=model)
model = OllamaLLM(model=MODEL)
model.invoke("Tell me a joke!")

"Here's one:\n\nWhat do you call a fake noodle?\n\nAn impasta!\n\nI hope that made you smile! Do you want to hear another one?"

In [4]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser
chain.invoke("Tell me a bit about Retrieval Augmented Generation (RAG) in less than 100 words.")

'Retrieval-Augmented Generation (RAG) is an artificial intelligence framework for improving language generation models by leveraging retrieval mechanisms. It combines the strengths of large language models and retrievers to generate coherent text. RAG uses a retriever model to search for relevant documents or knowledge in a database, which are then augmented with a generator model to produce final outputs. This approach has shown promising results in tasks such as question answering, text summarization, and content retrieval, and has the potential to improve performance in various natural language processing applications.'

#### Interacting with TXT

In [5]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import Chroma

In [15]:
# Load the document, split it into chunks, embed each chunk and load it into the vector store.
raw_documents = TextLoader('facts.txt').load()
text_splitter = CharacterTextSplitter(chunk_size=20, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)
documents

Created a chunk of size 697, which is longer than the specified 20


[Document(metadata={'source': 'facts.txt'}, page_content="Fufufufafafu is widely known in forums, linking the possibility of referring the identity of Gibranu Reke, a vice president in Wakanda Republic.\nHis activity in KasKus throughout 2013 to 2018 attracted internet users due for his satire jokes and funny comments on Gemoy Joget's decisions. \nSome of his comments consisting lots of funny but politically incorrect statements and completely different with his current vice president persona.\nAbout Gemoy Joget, he's currently in charge of running President positions in Wakanda Republic. Having lost in previous four elections, he succeeded in his fifth trial,\ngaining the most votes above two other candidates, Anissu Bassuuedan and Ganujar Prawowo."),
 Document(metadata={'source': 'facts.txt'}, page_content='Here\'re some comments:\n1) Saya lagi membayangkan joget gemoy mendaki semeru trus pas nyampe di puncak, dia mengibarkan bendera merah putih lalu dia berteriak, "titiekkkkk kembal

In [16]:
from langchain.prompts import PromptTemplate

template = """
    Please answer the question based on the context below. If it's deemed too complex
    or not relevant, please reply "I afraid my capability has yet to satisfy to the topic you're asking"

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)

In [17]:
from langchain_community.vectorstores import DocArrayInMemorySearch

vector_store = DocArrayInMemorySearch.from_documents(
    documents, 
    embedding=embedding
    )

In [18]:
retriever = vector_store.as_retriever()

retriever.invoke("Wakanda republic")

[Document(metadata={'source': 'facts.txt'}, page_content="Fufufufafafu is widely known in forums, linking the possibility of referring the identity of Gibranu Reke, a vice president in Wakanda Republic.\nHis activity in KasKus throughout 2013 to 2018 attracted internet users due for his satire jokes and funny comments on Gemoy Joget's decisions. \nSome of his comments consisting lots of funny but politically incorrect statements and completely different with his current vice president persona.\nAbout Gemoy Joget, he's currently in charge of running President positions in Wakanda Republic. Having lost in previous four elections, he succeeded in his fifth trial,\ngaining the most votes above two other candidates, Anissu Bassuuedan and Ganujar Prawowo."),
 Document(metadata={'source': 'facts.txt'}, page_content='Here\'re some comments:\n1) Saya lagi membayangkan joget gemoy mendaki semeru trus pas nyampe di puncak, dia mengibarkan bendera merah putih lalu dia berteriak, "titiekkkkk kembal

In [19]:
from operator import itemgetter

chain = (
    {
    "context": itemgetter("question") | retriever, "question": itemgetter("question")
    }
    | prompt
    | model
    | parser
)

chain.invoke({"question": "Who is Gibranu Reke?"})

'Gibranu Reke is a vice president in Wakanda Republic, according to the context provided.'

In [20]:
chain.invoke({"question": "What are some funny comments that Fufufufafafu posted online?"})

'Fufufufafafu posted the following funny comments online:\n\n1. "titiekkkkk kembalilah ke pelukanku"\n2. "Ayo bayar get" (meaning \'Pay now\' in Indonesian)\n3. Alumni 212 mana suaranya (This one seems to be a joke about a school\'s number, but the phrase is left incomprehensible)'

In [22]:
### Using Chroma### Embedding with nomic
embedding_nomic = OllamaEmbeddings(model="nomic-embed-text")

### Initialize vector DB Chroma
vector_db_chroma = Chroma(
    embedding_function=embedding_nomic,
    collection_name="local-fufufufafafu"

)

  vector_db_chroma = Chroma(


In [25]:
vector_db_chroma.add_documents(documents=documents)

['7eb7d45c-32f0-4ddd-aee0-04733e113489',
 '480009bf-1022-4c83-8373-6822e41a1334']

In [26]:
results = vector_db_chroma.similarity_search(
    "fufufufafafu comments",
    k=2,
    # filter={"source": "Implementation_MLOps.pdf"},
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* Fufufufafafu is widely known in forums, linking the possibility of referring the identity of Gibranu Reke, a vice president in Wakanda Republic.
His activity in KasKus throughout 2013 to 2018 attracted internet users due for his satire jokes and funny comments on Gemoy Joget's decisions. 
Some of his comments consisting lots of funny but politically incorrect statements and completely different with his current vice president persona.
About Gemoy Joget, he's currently in charge of running President positions in Wakanda Republic. Having lost in previous four elections, he succeeded in his fifth trial,
gaining the most votes above two other candidates, Anissu Bassuuedan and Ganujar Prawowo. [{'source': 'facts.txt'}]
* Here're some comments:
1) Saya lagi membayangkan joget gemoy mendaki semeru trus pas nyampe di puncak, dia mengibarkan bendera merah putih lalu dia berteriak, "titiekkkkk kembalilah ke pelukanku"
2) Ayo bayar get
3) Alumni 212 mana suaranya [{'source': 'facts.txt'}]


In [28]:
vector_db_chroma.as_retriever().invoke("Joget Gemoy")

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


[Document(metadata={'source': 'facts.txt'}, page_content="Fufufufafafu is widely known in forums, linking the possibility of referring the identity of Gibranu Reke, a vice president in Wakanda Republic.\nHis activity in KasKus throughout 2013 to 2018 attracted internet users due for his satire jokes and funny comments on Gemoy Joget's decisions. \nSome of his comments consisting lots of funny but politically incorrect statements and completely different with his current vice president persona.\nAbout Gemoy Joget, he's currently in charge of running President positions in Wakanda Republic. Having lost in previous four elections, he succeeded in his fifth trial,\ngaining the most votes above two other candidates, Anissu Bassuuedan and Ganujar Prawowo."),
 Document(metadata={'source': 'facts.txt'}, page_content='Here\'re some comments:\n1) Saya lagi membayangkan joget gemoy mendaki semeru trus pas nyampe di puncak, dia mengibarkan bendera merah putih lalu dia berteriak, "titiekkkkk kembal

### Using Ollama for Interacting with PDF

In [8]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("Implementation_MLOps.pdf")

pages = loader.load_and_split()
pages

[Document(metadata={'source': 'Implementation_MLOps.pdf', 'page': 0, 'page_label': '1'}, page_content='1 \nImplementation of MLOps \n \nMaximilian Zwiesler \n \nJune 2023 \n \nAbstract \nThe rise of machine learning (ML) has reshaped industries, demanding efficient ML \nmodel deployment. This has led to the emergence of Machine Learning Operations \n(MLOps), which streamlines the entire ML workflow. MLOps aligns data, model, and \ncode component s to ensure accurate and scalable models. However, testing and \ndiverse execution environments are underrepresented in existing literature. This paper \nbridges this gap by presenting crucial tests for reliability and proposing an integrated \nMLOps framework that unifies testing with different environments. The paper structure \nencompasses foundational definitions, a standardized development process, and \nconcludes with insights into enhancing MLOps reliability. \n1 Introduction \n1.1 Motivation \nMachine learning (ML) has b ecome an import

In [9]:
from langchain.prompts import PromptTemplate

template = """
    Please answer the question based on the context below. If it's deemed too complex
    or not relevant, please reply "I afraid my capability has yet to satisfy to the topic you're asking"

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)

print(prompt.format(context="Here is some context", question="Here is a question"))


    Please answer the question based on the context below. If it's deemed too complex
    or not relevant, please reply "I afraid my capability has yet to satisfy to the topic you're asking"

Context: Here is some context

Question: Here is a question



In [10]:
### Linking customized prompt with Llama and parser
chain = prompt | model | parser

In [11]:
chain.invoke(
    {
        "context": "The topic we're talking about is the implementation of MLOps",
        "question": "What's the topic of talking?"
    }
)

'The topic of our conversation appears to be MLOps (Machine Learning Operations). Would you like to discuss aspects of implementing MLOps, such as pipeline management, model deployment, or hyperparameter tuning?'

In [13]:
chain.input_schema.model_json_schema()

{'properties': {'context': {'title': 'Context', 'type': 'string'},
  'question': {'title': 'Question', 'type': 'string'}},
 'required': ['context', 'question'],
 'title': 'PromptInput',
 'type': 'object'}

### Building Memory Storage with DocArrays

In [15]:
from langchain_community.vectorstores import DocArrayInMemorySearch

vector_store = DocArrayInMemorySearch.from_documents(
    pages, 
    embedding=embedding
    )



In [16]:
vector_store.as_retriever().invoke("Machine Learning")

[Document(metadata={'source': 'Implementation_MLOps.pdf', 'page': 1, 'page_label': '2'}, page_content='2 \ndata component involves preparing and managing the data used to train and evaluate machine \nlearning models. The model component involves developing and testing machine learning \nmodels to ensure that they meet the desired accuracy and performance requirements. The \ncode component involves the software engineering practices used to package, deploy, and \nmaintain machine learning models in a production environment. The integration of these three \ncomponents is critical to the success of MLOps. [13] \n1.2 Research Question \nThe principles of DevOps were first mentioned in 2007 and since then have been discussed \nextensively. [2], [8] Thus, the process how to write, test and deploy code is well established. \n[4], [7] From this approach the framework of MLOps has been developed. [30] The main focus \nof the existing literature is defining a suitable process how to train and de

In [17]:
retriever = vector_store.as_retriever()

retriever.invoke("Machine Learning")

[Document(metadata={'source': 'Implementation_MLOps.pdf', 'page': 1, 'page_label': '2'}, page_content='2 \ndata component involves preparing and managing the data used to train and evaluate machine \nlearning models. The model component involves developing and testing machine learning \nmodels to ensure that they meet the desired accuracy and performance requirements. The \ncode component involves the software engineering practices used to package, deploy, and \nmaintain machine learning models in a production environment. The integration of these three \ncomponents is critical to the success of MLOps. [13] \n1.2 Research Question \nThe principles of DevOps were first mentioned in 2007 and since then have been discussed \nextensively. [2], [8] Thus, the process how to write, test and deploy code is well established. \n[4], [7] From this approach the framework of MLOps has been developed. [30] The main focus \nof the existing literature is defining a suitable process how to train and de

In [41]:
retriever.invoke("DevOps")

[Document(metadata={'source': 'Implementation_MLOps.pdf', 'page': 5, 'page_label': '6'}, page_content='and the costly and slow high-level tests like end-to-end tests at the top. [4]  \nSince data plays a fundamental role in developing ML models the first pillar of testing is testing \nof the data. The quality of the data can be assessed by checking the completeness, accuracy, \nconsistency, and timeliness [29]. Here one approach is to create a schema starting with \ndetermining statistics from training data which can be checked against domain knowledge.'),
 Document(metadata={'source': 'Implementation_MLOps.pdf', 'page': 1, 'page_label': '2'}, page_content='components is critical to the success of MLOps. [13] \n1.2 Research Question \nThe principles of DevOps were first mentioned in 2007 and since then have been discussed \nextensively. [2], [8] Thus, the process how to write, test and deploy code is well established. \n[4], [7] From this approach the framework of MLOps has been develo

In [18]:
from operator import itemgetter

chain = (
    {
    "context": itemgetter("question") | retriever, "question": itemgetter("question")
    }
    | prompt
    | model
    | parser
)

chain.invoke({"question": "What are main core of MLOps?"})

'The text doesn\'t explicitly state the "main core" of MLOps, but based on the context and information provided, it can be inferred that the core of MLOps consists of:\n\n1. **Model development**: This involves writing code for machine learning models, training them, and iterating through various configurations to improve performance.\n2. **Data science workflow**: This includes data preprocessing, feature engineering, model selection, and hyperparameter tuning, among other tasks.\n3. **Version control and reproducibility**: MLOps emphasizes the importance of controlling versions of code, parameters, data, and software environments to ensure reproducibility and reliability.\n4. **Automation and testing**: The text mentions the need for extensive testing of the project, including CI processes, to ensure that all processes and tasks function as expected.\n\nHowever, if we consider the "main core" of MLOps in a more abstract sense, it could be argued that the core consists of:\n\n1. **Con

In [19]:
questions = [
    "What does MLOps aim for?",
    "What underlies the theory of MLOps",
    "According to the author's thought and opinion, how MLOps is structured?"
]

In [20]:
for question in questions:
    print(f"Question; {question}")
    print(f"Answer: {chain.invoke({'question': question})}")
    print()

Question; What does MLOps aim for?
Answer: MLOps (Machine Learning Operations) aims to bridge the gap between machine learning development and production, focusing on the testing of models and ensuring they meet certain standards before being deployed. It encompasses various aspects such as data quality control, model performance evaluation, fairness, bias detection, application integration testing, and continuous deployment. The ultimate goal is to deliver high-quality, reliable, and scalable machine learning models that meet business needs while maintaining regulatory compliance and transparency.

Question; What underlies the theory of MLOps
Answer: According to the text, several underlying concepts and theories are relevant to MLOps (Machine Learning Operations), including:

1. **DevOps**: The integration of software development (Dev) and operations (Ops) to ensure smooth operation of software systems.
2. **Continuous Delivery**: A practice where changes to code are delivered contin

### How about Chroma

In [22]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document

text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500, chunk_overlap=50, length_function=len, is_separator_regex=False
    )

chunks = text_splitter.split_documents(pages)

In [None]:
from langchain_chroma import Chroma

### Embedding with nomic
embedding_nomic = OllamaEmbeddings(model="nomic-embed-text")

In [28]:

import ollama

### Initialize vector DB Chroma
vector_db_chroma = Chroma(
    embedding_function=embedding_nomic,
    collection_name="local-mlops"

)

In [34]:
# ids = [str(i) for i in range(len(chunks))]

In [36]:
chunks[:3]

[Document(metadata={'source': 'Implementation_MLOps.pdf', 'page': 0, 'page_label': '1'}, page_content='1 \nImplementation of MLOps \n \nMaximilian Zwiesler \n \nJune 2023 \n \nAbstract \nThe rise of machine learning (ML) has reshaped industries, demanding efficient ML \nmodel deployment. This has led to the emergence of Machine Learning Operations \n(MLOps), which streamlines the entire ML workflow. MLOps aligns data, model, and \ncode component s to ensure accurate and scalable models. However, testing and \ndiverse execution environments are underrepresented in existing literature. This paper'),
 Document(metadata={'source': 'Implementation_MLOps.pdf', 'page': 0, 'page_label': '1'}, page_content='bridges this gap by presenting crucial tests for reliability and proposing an integrated \nMLOps framework that unifies testing with different environments. The paper structure \nencompasses foundational definitions, a standardized development process, and \nconcludes with insights into enha

In [None]:
vector_store.add_documents(documents=chunks, ids=ids)

In [37]:
results = vector_store.similarity_search(
    "DevOps constitutes the foundational aspect of MLOps",
    k=2,
    filter={"source": "Implementation_MLOps.pdf"},
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* components is critical to the success of MLOps. [13] 
1.2 Research Question 
The principles of DevOps were first mentioned in 2007 and since then have been discussed 
extensively. [2], [8] Thus, the process how to write, test and deploy code is well established. 
[4], [7] From this approach the framework of MLOps has been developed. [30] The main focus 
of the existing literature is defining a suitable process how to train and deploy a n ML model [{'source': 'Implementation_MLOps.pdf', 'page': 1, 'page_label': '2'}]
* healthcare to finance, and from retail to transportation. However, for most of them deploying 
ML models into production environments is relatively new. Until now there might have only 
been a manageable number of models which needed to be hosted. As the number of models 
and the role they play for the company business growths, since more and more decisions are 
automated, the importance of being able to develop, deploy and operate the models efficiently [{'source': 'Im

### Experimenting with Other Cases

In [39]:
from langchain.prompts import PromptTemplate

template = """
    Please answer the question based on the context below. If it's deemed too complex
    or not relevant, please reply "I afraid my capability has yet to extend to the topic you're asking"

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)

print(prompt.format(context="Here is some context", question="Here is a question"))


    Please answer the question based on the context below. If it's deemed too complex
    or not relevant, please reply "I afraid my capability has yet to extend to the topic you're asking"

Context: Here is some context

Question: Here is a question



In [50]:
model = OllamaLLM(model=MODEL)

In [51]:
from operator import itemgetter

chain = (
    {
    "context": itemgetter("question"), "question": itemgetter("question")
    }
    | prompt | model | parser
)

chain.invoke({"question": "What are main core of MLOps?"})

# chain.invoke({"question": "What MLOps mainly covers?"})

'The main core of MLOps (Machine Learning Operating System) includes:\n\n1. Data Ingestion and Management\n2. Model Development and Training\n3. Model Deployment and Serving\n4. Monitoring and Evaluation\n\nThese components work together to facilitate the end-to-end lifecycle of machine learning models, from data preparation to model deployment and monitoring, making it easier for data scientists and developers to build, deploy, and maintain scalable and reliable ML systems.'

In [57]:
questions = [
    """During the research stage of machine learning experimentation, let say no one of the team has ever done time-series forecasting. That brings us to
     the selection of source. So, which gives us a good starting point in getting familiar with models?""",
    "In the context of time-series, tell us the statistical test suitable for this task."
]

In [58]:
for question in questions:
    print(f"Question; {question}")
    print(f"Answer: {chain.invoke({'question': question})}")
    print()

Question; During the research stage of machine learning experimentation, let say no one of the team has ever done time-series forecasting. That brings us to
     the selection of source. So, which gives us a good starting point in getting familiar with models?
Answer: A great question for a team diving into time-series forecasting for the first time!

I'm happy to help! Given that no one on your team has experience with time-series forecasting, I'd recommend starting with some fundamental concepts and popular models. Here's a good starting point:

1. **Understand the basics of time-series data**: Familiarize yourself with what time-series data is, its characteristics, and common types (e.g., temporal, spatial, or both).
2. **Explore ARIMA (AutoRegressive Integrated Moving Average) models**: ARIMA is a popular, widely used, and relatively simple model for time-series forecasting. It's a good starting point for understanding the basics of time-series modeling.
3. **Look into other popula

The demonstration from Overfitted ends here.

### Local Adaptive RAG

Source: https://langchain-ai.github.io/langgraph/tutorials/rag/langgraph_adaptive_rag_local/#components

In [43]:
### Index 
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [45]:
### Index
urls = [
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

In [46]:
embedding

OllamaEmbeddings(model='llama3.2', base_url=None, client_kwargs={})

In [48]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=embedding,
)
retriever = vectorstore.as_retriever()

In [54]:
### Retrieval Grader

from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser

# LLM
llm = ChatOllama(model="llama3.2", format="json", temperature=0)

prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing relevance 
    of a retrieved document to a user question. If the document contains keywords related to the user question, 
    grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no premable or explanation.
     <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["question", "document"],
)

retrieval_grader = prompt | llm | JsonOutputParser()
question = "agent memory"
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_txt}))

{'score': 'yes'}


In [55]:
### Hallucination Grader

# LLM
llm = ChatOllama(model="llama3.2", format="json", temperature=0)

# Prompt
prompt = PromptTemplate(
    template=""" <|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether 
    an answer is grounded in / supported by a set of facts. Give a binary 'yes' or 'no' score to indicate 
    whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a 
    single key 'score' and no preamble or explanation. <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here are the facts:
    \n ------- \n
    {documents} 
    \n ------- \n
    Here is the answer: {generation}  <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["generation", "documents"],
)

hallucination_grader = prompt | llm | JsonOutputParser()
hallucination_grader.invoke({"documents": docs, "generation": generation})

NameError: name 'generation' is not defined

In [32]:
### Answer Grader

# LLM
llm = ChatOllama(model="llama3.2", format="json", temperature=0)

# Prompt
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether an 
    answer is useful to resolve a question. Give a binary score 'yes' or 'no' to indicate whether the answer is 
    useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
     <|eot_id|><|start_header_id|>user<|end_header_id|> Here is the answer:
    \n ------- \n
    {generation} 
    \n ------- \n
    Here is the question: {question} <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["generation", "question"],
)

answer_grader = prompt | llm | JsonOutputParser()
answer_grader.invoke({"question": question, "generation": generation})

{'score': 'yes'}

In [56]:
### Generate

from langchain.prompts import PromptTemplate
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_models import ChatOllama

# Prompt
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks. 
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. 
    Use three sentences maximum and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question} 
    Context: {context} 
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question", "document"],
)

llm = ChatOllama(model="llama3.2", temperature=0)


# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# Chain
rag_chain = prompt | llm | StrOutputParser()

# Run
question = "What are some trade-off when fitting data with Random Forest model?"
docs = retriever.invoke(question)
generation = rag_chain.invoke({"context": docs, "question": question})
print(generation)

I don't know the answer to your question about trade-offs when fitting data with a Random Forest model. The provided context appears to be related to prompt engineering for autoregressive language models, and does not mention Random Forest at all.


In [25]:
### Router

from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser

# LLM
llm = ChatOllama(model=local_llm, format="json", temperature=0)

prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an expert at routing a 
    user question to a vectorstore or web search. Use the vectorstore for questions on LLM  agents, 
    prompt engineering, and adversarial attacks. You do not need to be stringent with the keywords 
    in the question related to these topics. Otherwise, use web-search. Give a binary choice 'web_search' 
    or 'vectorstore' based on the question. Return the a JSON with a single key 'datasource' and 
    no premable or explanation. Question to route: {question} <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question"],
)

question_router = prompt | llm | JsonOutputParser()
question = "The approach of bringing well-trained Random Forest to production environment."
docs = retriever.get_relevant_documents(question)
doc_txt = docs[1].page_content
print(question_router.invoke({"question": question}))

{'datasource': 'vectorstore'}


In [36]:
### Search

from langchain_community.tools.tavily_search import TavilySearchResults

# set up API key
os.environ["TAVILY_API_KEY"] = "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"


web_search_tool = TavilySearchResults(k=3)

In [38]:
from typing_extensions import TypedDict
from typing import List

### State


class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        web_search: whether to add search
        documents: list of documents
    """

    question: str
    generation: str
    web_search: str
    documents: List[str]


from langchain.schema import Document

### Nodes


def retrieve(state):
    """
    Retrieve documents from vectorstore

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Retrieval
    documents = retriever.invoke(question)
    return {"documents": documents, "question": question}


def generate(state):
    """
    Generate answer using RAG on retrieved documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]

    # RAG generation
    generation = rag_chain.invoke({"context": documents, "question": question})
    return {"documents": documents, "question": question, "generation": generation}


def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question
    If any document is not relevant, we will set a flag to run web search

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Filtered out irrelevant documents and updated web_search state
    """

    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]

    # Score each doc
    filtered_docs = []
    web_search = "No"
    for d in documents:
        score = retrieval_grader.invoke(
            {"question": question, "document": d.page_content}
        )
        grade = score["score"]
        # Document relevant
        if grade.lower() == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        # Document not relevant
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            # We do not include the document in filtered_docs
            # We set a flag to indicate that we want to run web search
            web_search = "Yes"
            continue
    return {"documents": filtered_docs, "question": question, "web_search": web_search}


def web_search(state):
    """
    Web search based based on the question

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Appended web results to documents
    """

    print("---WEB SEARCH---")
    question = state["question"]
    documents = state["documents"]

    # Web search
    docs = web_search_tool.invoke({"query": question})
    web_results = "\n".join([d["content"] for d in docs])
    web_results = Document(page_content=web_results)
    if documents is not None:
        documents.append(web_results)
    else:
        documents = [web_results]
    return {"documents": documents, "question": question}


### Conditional edge


def route_question(state):
    """
    Route question to web search or RAG.

    Args:
        state (dict): The current graph state

    Returns:
        str: Next node to call
    """

    print("---ROUTE QUESTION---")
    question = state["question"]
    print(question)
    source = question_router.invoke({"question": question})
    print(source)
    print(source["datasource"])
    if source["datasource"] == "web_search":
        print("---ROUTE QUESTION TO WEB SEARCH---")
        return "websearch"
    elif source["datasource"] == "vectorstore":
        print("---ROUTE QUESTION TO RAG---")
        return "vectorstore"


def decide_to_generate(state):
    """
    Determines whether to generate an answer, or add web search

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")
    question = state["question"]
    web_search = state["web_search"]
    filtered_documents = state["documents"]

    if web_search == "Yes":
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        print(
            "---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, INCLUDE WEB SEARCH---"
        )
        return "websearch"
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: GENERATE---")
        return "generate"


### Conditional edge


def grade_generation_v_documents_and_question(state):
    """
    Determines whether the generation is grounded in the document and answers question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """

    print("---CHECK HALLUCINATIONS---")
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]

    score = hallucination_grader.invoke(
        {"documents": documents, "generation": generation}
    )
    grade = score["score"]

    # Check hallucination
    if grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")
        score = answer_grader.invoke({"question": question, "generation": generation})
        grade = score["score"]
        if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
    else:
        pprint("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"


from langgraph.graph import END, StateGraph

workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("websearch", web_search)  # web search
workflow.add_node("retrieve", retrieve)  # retrieve
workflow.add_node("grade_documents", grade_documents)  # grade documents
workflow.add_node("generate", generate)  # generatae

### Build Graph

In [39]:
# Build graph
workflow.set_conditional_entry_point(
    route_question,
    {
        "websearch": "websearch",
        "vectorstore": "retrieve",
    },
)

workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "websearch": "websearch",
        "generate": "generate",
    },
)
workflow.add_edge("websearch", "generate")
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": END,
        "not useful": "websearch",
    },
)

In [40]:
# Compile
app = workflow.compile()

# Test
from pprint import pprint

inputs = {"question": "Have any knowledge about a popular Pokemon named Pikachu?"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
pprint(value["generation"])

---ROUTE QUESTION---
Have any knowledge about a popular Pokemon named Pikachu?
{'datasource': 'web_search'}
web_search
---ROUTE QUESTION TO WEB SEARCH---
---WEB SEARCH---
