In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_BASE = os.environ.get("OPENAI_API_BASE")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

LANGCHAIN_TRACING_V2 = os.environ.get("LANGCHAIN_TRACING_V2")
LANGCHAIN_ENDPOINT = 'https://api.smith.langchain.com'
LANGCHAIN_API_KEY = os.environ.get("LANGCHAIN_API_KEY")

In [2]:
# Before running, please pip install tiktoken
from dashscope import get_tokenizer  # dashscope version >= 1.14.0

# Get the tokenizer object, currently only supports the Qwen series models
tokenizer = get_tokenizer('qwen-max')

def print_token_details(input_str):
    # Split the string into tokens and convert to token ids
    tokens = tokenizer.encode(input_str)
    print(f"Token ids after tokenization: {tokens}.")
    print(f"There are {len(tokens)} tokens after tokenization")

    # Convert token ids back to strings and print them
    for i in range(len(tokens)):
        print(f"Token id {tokens[i]} corresponds to the string: {tokenizer.decode(tokens[i])}")

In [3]:
input_str = 'Nice to meet you.'
print_token_details(input_str)

Token ids after tokenization: [44978, 311, 3367, 498, 13].
There are 5 tokens after tokenization
Token id 44978 corresponds to the string: Nice
Token id 311 corresponds to the string:  to
Token id 3367 corresponds to the string:  meet
Token id 498 corresponds to the string:  you
Token id 13 corresponds to the string: .


In [4]:
input_str = '很高兴认识你！'
print_token_details(input_str)

Token ids after tokenization: [112169, 100720, 56568, 6313].
There are 4 tokens after tokenization
Token id 112169 corresponds to the string: 很高兴
Token id 100720 corresponds to the string: 认识
Token id 56568 corresponds to the string: 你
Token id 6313 corresponds to the string: ！


# Config LLM & Embedding Model

In [6]:
from langchain_openai import ChatOpenAI
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

llm = ChatOpenAI(model="qwen-max", temperature=0)

model_name = "C:\\Home\\Documents\\Projects\\models\\BAAI\\bge-large-en-v1.5"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embedding_model = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

  from tqdm.autonotebook import tqdm, trange


# Indexing

In [7]:
# Load blog

import bs4
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

blog_docs = loader.load()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [8]:
# Split

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50)

# Make splits
splits = text_splitter.split_documents(blog_docs)
len(splits)

52

In [9]:
# Vector Store

from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

client = QdrantClient(host="localhost", port=6333)

client.create_collection(
    collection_name="rag_from_scratch",
    vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="rag_from_scratch",
    embedding=embedding_model,
)

In [10]:
from uuid import uuid4

ids = [str(uuid4()) for _ in range(len(splits))]
vector_store.add_documents(documents=splits, ids=ids)

['7414612b-7d13-4900-a8e2-98fe3f9c8bc3',
 'f679d4ed-cc86-4436-9440-47843fe87398',
 'ca53f36c-9331-4f43-bc8f-5b5e8f788d57',
 '44491300-3ce6-4fac-a818-512f5be9f6cf',
 'eaba91fc-4b4d-49c9-a699-1de2479965a2',
 '54f59cfa-d0e6-4fc7-b6de-e56524a74a14',
 'a93c8848-380a-47ea-862b-e5de698485c9',
 '3d8d4956-ac6e-4e57-a798-185d64c787d1',
 'b75577ce-efb7-4654-91a2-4562931448fc',
 '9693c546-8266-4eb0-a219-8b3bbc46a8ac',
 '7411503a-cff2-4f70-8741-255e361d5ba5',
 '24107b94-44c4-4b97-85e7-5e04c7ade0d0',
 'cfcbe698-abe8-43de-a477-14c32c4d205e',
 '6f2ee547-822b-4037-b46a-05212a23549f',
 'f0ddc992-0698-46b1-8faf-b5cd9ee9c95f',
 '553b94ae-eb96-4cfb-bad1-7adcdf44d99d',
 'a2c9af44-80dc-420b-8224-1a3a17c66a25',
 '6be2c036-6ae9-40e7-bd8e-4c4b9ecc324d',
 'cb274079-b1a9-41c2-859b-0a4afc47e8b3',
 'a0f6e334-0528-42d9-a2c7-dca646e39a99',
 '054e3152-94ef-4955-b54b-88f3b1b31f5b',
 '2a9cc697-d61c-4f2f-a413-0017dd5a0e75',
 '1609d41a-f776-4a51-98f0-51a947afe99a',
 '82ca98e7-a73d-4a47-8588-76cf1b587299',
 'd7514ece-6b92-

# Retrieval

In [12]:
retriever = vector_store.as_retriever(search_kwargs={"k": 1})

retrieved_docs = retriever.invoke("What is Task Decomposition?")

len(retrieved_docs)

1

In [15]:
import pprint

pprint.pprint(retrieved_docs[0].page_content)

('Fig. 1. Overview of a LLM-powered autonomous agent system.\n'
 'Component One: Planning#\n'
 'A complicated task usually involves many steps. An agent needs to know what '
 'they are and plan ahead.\n'
 'Task Decomposition#\n'
 'Chain of thought (CoT; Wei et al. 2022) has become a standard prompting '
 'technique for enhancing model performance on complex tasks. The model is '
 'instructed to “think step by step” to utilize more test-time computation to '
 'decompose hard tasks into smaller and simpler steps. CoT transforms big '
 'tasks into multiple manageable tasks and shed lights into an interpretation '
 'of the model’s thinking process.\n'
 'Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple '
 'reasoning possibilities at each step. It first decomposes the problem into '
 'multiple thought steps and generates multiple thoughts per step, creating a '
 'tree structure. The search process can be BFS (breadth-first search) or DFS '
 '(depth-first search) with each

# Generation

In [16]:
from langchain.prompts import ChatPromptTemplate

# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# Chain
chain = prompt | llm

# Run
result = chain.invoke({"context":retrieved_docs,
                       "question":"What is Task Decomposition?"})

pprint.pprint(result)

AIMessage(content='Task Decomposition is a method used to break down complex tasks into smaller, more manageable steps. This technique helps in making the task easier to handle and provides insight into the model\'s thinking process. It can be achieved through different approaches such as:\n\n1. Using simple prompts with a large language model (LLM) like "Steps for XYZ." or "What are the subgoals for achieving XYZ?".\n2. Employing task-specific instructions, for example, "Write a story outline." when the goal is to write a novel.\n3. Incorporating human inputs to guide the decomposition.\n\nAdditionally, techniques like Chain of Thought (CoT) and Tree of Thoughts enhance task decomposition. CoT instructs the model to think step by step, while Tree of Thoughts extends this by exploring multiple reasoning possibilities at each step, creating a tree structure that can be searched using methods like breadth-first search (BFS) or depth-first search (DFS).', additional_kwargs={'refusal': Non

In [17]:
# Chain

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

result = rag_chain.invoke("What is Task Decomposition?")

pprint.pprint(result)

('Task Decomposition is a technique used to break down complex tasks into '
 'smaller, more manageable steps. This approach helps in making the task '
 "easier to handle and provides insight into the model's thinking process. It "
 'can be achieved through methods such as Chain of Thought (CoT), where the '
 'model is instructed to think step by step, or Tree of Thoughts, which '
 'extends CoT by exploring multiple reasoning possibilities at each step, '
 'creating a tree structure. Task decomposition can be facilitated (1) by '
 'using simple prompts with large language models (LLMs), (2) by employing '
 'task-specific instructions, or (3) with human inputs.')
