In [1]:
import re

import bs4
import torch
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_huggingface.embeddings.huggingface import HuggingFaceEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pinecone import Pinecone, ServerlessSpec
from transformers import (AutoModelForCausalLM, AutoTokenizer,
                          BitsAndBytesConfig, pipeline)
from typing_extensions import List, TypedDict

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
DEVICE = ('cuda' if torch.cuda.is_available() else
          'mps' if torch.backends.mps.is_available() else 'cpu')
DEVICE

'cuda'

In [None]:
embeddings = HuggingFaceEmbeddings(model_name='all-mpnet-base-v2',
                                   model_kwargs={'device': DEVICE})

In [None]:
index_name = 'all-mpnet-base-v2-character-chunker-500-100'
pc = Pinecone()
index = pc.Index(index_name)
vector_store = PineconeVectorStore(embedding=embeddings, index=index)

In [None]:
# <Generated>Question: When is the Vintage Pittsburgh retro fair taking place?
# Answer: The Vintage Pittsburgh retro fair is taking place on April 5, 2025.

In [None]:
question = "When is the Vintage Pittsburgh retro fair taking place?"

In [None]:
class Query(TypedDict):
    question: str
    context: List[Document]
    answer: str


query = Query(question=question, context=[], answer="")

In [None]:
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5},
)

retrieved_docs = retriever.invoke(query['question'])

In [None]:
print("\n--- Relevant Documents ---")
for i, doc in enumerate(retrieved_docs, 1):
    print(f"Document {i}:\n{doc.page_content}\n")
    if doc.metadata:
        print(f"Source: {doc.metadata.get('source', 'Unknown')}\n")

In [None]:
model_name = 'mistralai/Mistral-7B-Instruct-v0.2'

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
if not tokenizer.pad_token:
    tokenizer.pad_token = tokenizer.eos_token

In [None]:
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
llm = pipeline("text-generation", model=model_name, tokenizer=tokenizer,
               torch_dtype=torch.bfloat16, device=DEVICE)

In [None]:
retrieved_docs_text = [doc.page_content for doc in retrieved_docs]
context = "".join(["\n" + doc for i, doc in enumerate(retrieved_docs_text)])
print(context)

In [None]:
# Provide the number of the source document when relevant.

prompt_in_chat_format = [
    {
        "role": "system",
        "content": """Using the information contained in the context,
give a comprehensive answer to the question.
Respond only to the question asked, response should be concise and relevant to the question.
If the answer cannot be deduced from the context, do not give an answer.""",
    },
    {
        "role": "user",
        "content": """Context:
{context}
---
Now here is the question you need to answer.

Question: {question}""",
    },
]
RAG_PROMPT_TEMPLATE = llm.tokenizer.apply_chat_template(
    prompt_in_chat_format, tokenize=False, add_generation_prompt=True
)
print(RAG_PROMPT_TEMPLATE)

In [None]:
prompt = RAG_PROMPT_TEMPLATE.format(question=query['question'], context=context)
print(prompt)

In [None]:
query['answer'] =llm(prompt)[0]["generated_text"]  # type: ignore

In [None]:
print(query['answer'])

In [None]:
llm(prompt, return_full_text=False)

In [3]:
from rag_pipeline import Query, DataStore, RetrivalLLM

In [4]:
data_store = DataStore(
    model_name='all-mpnet-base-v2',
    chunker_name='character_chunker',
    dir_to_chunk='raw_data',
    dir_preformatted='formatted_data',
    is_upsert_data=False
)

model_name = 'mistralai/Mistral-7B-Instruct-v0.2'
rag_model = RetrivalLLM(model_name=model_name, data_store=data_store)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Device set to use cuda


In [7]:
rag_model.llm.tokenizer.pad_token

'</s>'

In [8]:
torch.randn(10).cuda()

tensor([-0.8051, -0.7840, -0.1125,  0.8382,  0.2850,  0.8885,  0.8883, -0.6970,
         0.1646,  1.4678], device='cuda:0')

In [9]:
question = "When is the Vintage Pittsburgh retro fair taking place?"
query = Query(question=question, context=[], answer="")
rag_model.query_answer(query)

In [10]:
query

{'question': 'When is the Vintage Pittsburgh retro fair taking place?',
 'context': [Document(id='character_chunker_8996_raw_data_museums_heinz_history_center_txt', metadata={'source': 'raw_data/museums/heinz_history_center.txt'}, page_content='TITLE: Vintage Pittsburgh - Heinz History Center\n\nCONTENT:\n\nVintage Pittsburgh\n\nEvent Information\n\nOld is new again at the Heinz History Center’s 11th annual Vintage Pittsburgh retro fair!\n\nPresented in partnership with the Neighborhood Flea, you’re invited to shop ‘til you drop for far-out finds and groovy goods on Saturday, April 5. Dozens of local makers and vendors will be on site to sell one-of-a-kind clothing, accessories, home décor, vinyl records, and more!'),
  Document(id='character_chunker_5769_raw_data_events_food_related_summer_festivals_and_events_in_pittsburgh___visit_pittsburgh___visit_pittsburgh_txt', metadata={'source': 'raw_data/events_food_related/summer_festivals_and_events_in_pittsburgh___visit_pittsburgh___visit_