In [2]:
from langchain_community.chat_models import ChatOllama


In [3]:
llm = ChatOllama(model='llama3', temperature=0)

In [4]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(
    [
        "https://lilianweng.github.io/posts/2023-06-23-agent/",
        "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
        "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
    ]
)
docs = loader.load()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [32]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=100,
    chunk_overlap=50,
    length_function=len,
    is_separator_regex=False,
)
texts = text_splitter.create_documents([docs[0].page_content, docs[1].page_content, docs[2].page_content])
print(texts[0])
print(texts[-1])

page_content="LLM Powered Autonomous Agents | Lil'Log\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nLil'Log"
page_content="© 2024 Lil'Log\n\n        Powered by\n        Hugo &\n        PaperMod"


In [37]:
# import
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain_text_splitters import CharacterTextSplitter

# load the document and split it into chunks

# from google.colab import drive
# drive.mount('/content/drive')

# loader = TextLoader("/content/drive/MyDrive/Colab Notebooks/promptengineering.txt")
# documents = loader.load()

# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(texts)

# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# load it into Chroma
db = Chroma.from_documents(docs, embedding_function)

# query it
query = "What is Chain-of-Thought"
docs = db.similarity_search(query)

# print results
print(docs[0].page_content)

Chain-of-Thought (CoT)#


In [41]:
db

<langchain_chroma.vectorstores.Chroma at 0x7fed213bc070>

In [87]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    [
        "https://lilianweng.github.io/posts/2023-06-23-agent/",
        "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
        "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
    ]
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(), collection_name="haggiepromaxultra")

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever(search_type="similarity")
prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# rag_chain = (
#     {"context": retriever | format_docs, "question": RunnablePassthrough()}
#     | prompt
#     | llm
#     | StrOutputParser()
# )

# rag_chain.invoke("What is Task Decomposition?")

In [102]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate


parser = JsonOutputParser()

prompt = PromptTemplate(
    template="""
            <|begin_of_text|><|start_header_id|>system<|end_header_id|>
            You are an AI language model assistant. check the relevance between documents given and query<|eot_id|>\n
            <|start_header_id|>user<|end_header_id|>\n{format_instructions}\n{query}\n<|eot_id|>\n
            <|start_header_id|>documents<|end_header_id|>{documents}<|eot_id|>\n
            <|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["query","documents","format_instructions"],
    partial_variables={"format_instructions": parser.get_format_instructions()+"""For example, if there is relevance then just return {"relevance": "yes"} otherwise return {"relevance": "no"}. just a json object not any description"""},
)

chain = {"documents": retriever | format_docs, "query": RunnablePassthrough()} | prompt | llm | parser #StrOutputParser()

chain.invoke("What is Task Decomposition?")

{'relevance': 'yes'}

In [106]:
chain.invoke("how to make kimchi?")

{'relevance': 'no'}

In [107]:
chain.invoke("why did world war happen?")

{'relevance': 'no'}

In [80]:
parser.get_format_instructions()+"For example, {‘relevance': 'yes'} or {'relevance': 'no'}"

"Return a JSON object.For example, {‘relevance': 'yes'} or {'relevance': 'no'}"

In [82]:
retriever

VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x7fed281f5e50>)