In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [3]:
documents = [
    'Thought: The LLM part of the Agent decides what the next step should be.',
    'Action: The agent takes an action by calling the tools with the associated arguments.',
    'Observation: The model reflects on the response from the tool.'
]

In [4]:
query = 'reflection from tool'

In [5]:
import re

def preprocess_text(text):
    #convert text to lower case
    text = text.lower()
    #remove punctuation 
    text = re.sub(r'[^\w\s]','', text)
    
    return text

In [6]:
preprocess_text("Apple - mango / orange ?")

'apple  mango  orange '

In [7]:
preprocess_documents = [preprocess_text(doc) for doc in documents]

In [8]:
preprocess_documents

['thought the llm part of the agent decides what the next step should be',
 'action the agent takes an action by calling the tools with the associated arguments',
 'observation the model reflects on the response from the tool']

In [9]:
preprocess_query = preprocess_text(query)
preprocess_query

'reflection from tool'

In [10]:
#keyword search
vector = TfidfVectorizer()

In [11]:
X=vector.fit_transform(preprocess_documents)

In [12]:
X.toarray()

array([[0.        , 0.20533878, 0.        , 0.        , 0.        ,
        0.26999582, 0.        , 0.        , 0.26999582, 0.        ,
        0.26999582, 0.        , 0.26999582, 0.        , 0.26999582,
        0.        , 0.26999582, 0.        , 0.        , 0.26999582,
        0.26999582, 0.        , 0.47839164, 0.26999582, 0.        ,
        0.        , 0.26999582, 0.        ],
       [0.50446784, 0.1918304 , 0.25223392, 0.25223392, 0.25223392,
        0.        , 0.25223392, 0.25223392, 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.25223392, 0.44692025, 0.        , 0.        ,
        0.25223392, 0.        , 0.25223392],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.31404572,
        0.        , 0.31404572, 0.        , 0.31404572, 0.        ,
        0.31404572, 0.    

In [13]:
len(X.toarray()[0])

28

In [14]:
query_embedding = vector.transform([preprocess_query])

In [15]:
len(query_embedding.toarray()[0])

28

In [16]:
vector.transform(['i am pasindu']).toarray()

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [17]:
similarities = cosine_similarity(X, query_embedding)

In [18]:
similarities

array([[0.        ],
       [0.        ],
       [0.44412771]])

In [19]:
ranked_indices = np.argsort(similarities, axis=0)[::-1].flatten()

In [20]:
rank_documents = [documents[i] for i in ranked_indices]

for i, doc in enumerate(rank_documents):
    print(f"Rank: {i+1} Doc: {doc}")

Rank: 1 Doc: Observation: The model reflects on the response from the tool.
Rank: 2 Doc: Action: The agent takes an action by calling the tools with the associated arguments.
Rank: 3 Doc: Thought: The LLM part of the Agent decides what the next step should be.


In [21]:
#vector search

document_embedding = np.array([
    [0.245, 0.675, 0.890, 0.453],
    [0.634, 0.743, 0.555, 0.123],
    [0.456, 0.785, 0.126, 0.342]
])

In [22]:
query_embedding = np.array([[0.265, 0.612, 0.912, 0.412]])

In [23]:
similarities = cosine_similarity(document_embedding, query_embedding)

In [24]:
similarities

array([[0.99804555],
       [0.86791172],
       [0.72851819]])

In [25]:
ranked_indices = np.argsort(similarities, axis=0)[::-1].flatten()

In [26]:
ranked_indices

array([0, 1, 2])

In [27]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [30]:
from pathlib import Path
pdf_path = 'rag.pdf'

loader = PyPDFLoader(str(pdf_path))
docs = loader.load()

In [33]:
docs[:2]

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-03-28T00:54:45+00:00', 'author': '', 'keywords': '', 'moddate': '2024-03-28T00:54:45+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'rag.pdf', 'total_pages': 21, 'page': 0, 'page_label': '1'}, page_content='1\nRetrieval-Augmented Generation for Large\nLanguage Models: A Survey\nYunfan Gaoa, Yun Xiongb, Xinyu Gao b, Kangxiang Jia b, Jinliu Pan b, Yuxi Bic, Yi Dai a, Jiawei Sun a, Meng\nWangc, and Haofen Wang a,c\naShanghai Research Institute for Intelligent Autonomous Systems, Tongji University\nbShanghai Key Laboratory of Data Science, School of Computer Science, Fudan University\ncCollege of Design and Innovation, Tongji University\nAbstract—Large Language Models (LLMs) showcase impres-\nsive capabilities but encounter challenges like hallucination,\nout

In [34]:
splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=30)

In [35]:
chunks = splitter.split_documents(docs)

In [36]:
chunks

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-03-28T00:54:45+00:00', 'author': '', 'keywords': '', 'moddate': '2024-03-28T00:54:45+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'rag.pdf', 'total_pages': 21, 'page': 0, 'page_label': '1'}, page_content='1\nRetrieval-Augmented Generation for Large\nLanguage Models: A Survey\nYunfan Gaoa, Yun Xiongb, Xinyu Gao b, Kangxiang Jia b, Jinliu Pan b, Yuxi Bic, Yi Dai a, Jiawei Sun a, Meng'),
 Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-03-28T00:54:45+00:00', 'author': '', 'keywords': '', 'moddate': '2024-03-28T00:54:45+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'rag.pdf

In [39]:
from dotenv import load_dotenv
import os 

_ = load_dotenv()

os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')

In [38]:
from langchain.embeddings import HuggingFaceBgeEmbeddings

In [41]:
embedding_model = HuggingFaceBgeEmbeddings(model_name='BAAI/bge-base-en-v1.5')

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [42]:
from langchain.vectorstores import Chroma

In [44]:
vectorstore = Chroma.from_documents(chunks, embedding_model)

In [45]:
vectorstore_retriever =  vectorstore.as_retriever(search_kwargs={"k":3})

In [49]:
vectorstore_retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceBgeEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000001C17B6AB0D0>, search_kwargs={'k': 3})

In [46]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever

In [47]:
keyword_retriever = BM25Retriever.from_documents(chunks)

In [48]:
keyword_retriever

BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x000001C18CD48090>)

In [50]:
keyword_retriever.k = 3

In [51]:
ensemble_retriever = EnsembleRetriever(retrievers=[vectorstore_retriever, keyword_retriever], weights=[0.3, 0.7])

In [52]:
model_name = 'HuggingFaceH4/zephyr-7b-beta'

In [54]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
from langchain import HuggingFacePipeline

In [55]:
def load_quantized_model(model_name:str):
    
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_compute_dtype=torch.bfloat16
    )
    
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype = torch.bfloat16,
        quantization_config = bnb_config
    )
    
    return model

In [56]:
def initiate_tokenizer(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name, return_token_type_ids=False)
    tokenizer.bos_token_id = 1
    return tokenizer

In [57]:
tokenizer = initiate_tokenizer(model_name)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [59]:
model = load_quantized_model(model_name)

ImportError: Using `bitsandbytes` 4-bit quantization requires Accelerate: `pip install 'accelerate>=0.26.0'`

In [None]:
pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    use_cache=True,
    device_map='auto',
    max_length=2048, 
    do_sample=True,
    top_k=5,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id
)

In [None]:
llm = HuggingFacePipeline(pipeline=pipeline)

In [60]:
from langchain.chains import RetrievalQA

In [None]:
normal_chain = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=vectorstore_retriever
)

In [None]:
hybrid_chain = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=ensemble_retriever
)

In [None]:
response1 = normal_chain.invoke("What is rag")