In [40]:
import bs4
import torch
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_huggingface.embeddings.huggingface import HuggingFaceEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pinecone import Pinecone, ServerlessSpec
from transformers import (AutoModelForCausalLM, AutoTokenizer,
                          BitsAndBytesConfig, pipeline)
from typing_extensions import List, TypedDict
import re

In [2]:
DEVICE = ('cuda' if torch.cuda.is_available() else
          'mps' if torch.backends.mps.is_available() else 'cpu')
DEVICE

'mps'

In [3]:
embeddings = HuggingFaceEmbeddings(model_name='all-mpnet-base-v2',
                                   model_kwargs={'device': DEVICE})

In [4]:
len(embeddings.embed_documents(['test'])[0])

768

In [5]:
index_name = 'tutorial'

pc = Pinecone()
# pc.delete_index(index_name)
# pc.create_index(
#     name=index_name,
#     dimension=768,
#     metric='cosine',
#     spec=ServerlessSpec(cloud="aws", region="us-east-1")
# )
index = pc.Index(index_name)

vector_store = PineconeVectorStore(embedding=embeddings, index=index)

In [6]:
# urls = ["https://www.apple.com/"]
# loader = WebBaseLoader(urls)
# documents = loader.load()
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
# docs = text_splitter.split_documents(documents)

# # Index chunks
# _ = vector_store.add_documents(documents=docs)

In [72]:
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3},
)

# Define the user's question
query = "What new products are announced on Apple.com?"

# Retrieve relevant documents based on the query
retrieved_docs = retriever.invoke(query)

In [73]:
# print("\n--- Relevant Documents ---")
# for i, doc in enumerate(relevant_docs, 1):
#     print(f"Document {i}:\n{doc.page_content}\n")
#     if doc.metadata:
#         print(f"Source: {doc.metadata.get('source', 'Unknown')}\n")

In [74]:
retrieved_docs_text = [doc.page_content for doc in retrieved_docs]
context = "".join([f"Document {str(i)}:::\n" + doc for i, doc in enumerate(retrieved_docs_text)])

# QA

In [None]:
# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

In [None]:
# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")

In [None]:
# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [30]:
model_name = "HuggingFaceH4/zephyr-7b-beta"

In [98]:
model_name

'openai-community/gpt2-large'

In [79]:
model = AutoModelForCausalLM.from_pretrained(model_name).to(DEVICE)
model

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 1280)
    (wpe): Embedding(1024, 1280)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-35): 36 x GPT2Block(
        (ln_1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=3840, nx=1280)
          (c_proj): Conv1D(nf=1280, nx=1280)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=5120, nx=1280)
          (c_proj): Conv1D(nf=1280, nx=5120)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1280, out_features=50257, bias=False)
)

In [80]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer

GPT2TokenizerFast(name_or_path='openai-community/gpt2-large', vocab_size=50257, model_max_length=1024, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|endoftext|>', 'eos_token': '<|endoftext|>', 'unk_token': '<|endoftext|>'}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	50256: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
}
)

In [33]:
chat_template = tokenizer.chat_template

In [47]:
model_name = "openai-community/gpt2-large"

llm = pipeline("text-generation", model=model_name, device=DEVICE)
llm

Device set to use mps


<transformers.pipelines.text_generation.TextGenerationPipeline at 0x39a133c20>

In [51]:
print(llm("What is 4+4? Answer:")[0]["generated_text"])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


What is 4+4? Answer: 4+4 means 'one plus four, or four equals four.' And this simple question is actually extremely important. Without a clear definition of '4+4,' we cannot understand how it is not four,


In [58]:
inputs_with_doc = tokenizer('What is 4+4? Answer:', return_tensors="pt")
inputs_with_doc

{'input_ids': tensor([[    1,  1824,   349, 28705, 28781, 28806, 28781, 28804, 26307, 28747]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [61]:
answers = model.generate(**inputs_with_doc, pad_token_id=tokenizer.eos_token_id)

In [65]:
' '.join(tokenizer.batch_decode(answers)[0].split('\n'))

'<s> What is 4+4? Answer:G<s>��<s> roi is 4+4<s>��<s> roi is 4+'

In [66]:
llm.tokenizer.chat_template = chat_template

In [67]:
prompt_in_chat_format = [
    {
        "role": "system",
        "content": """Using the information contained in the context,
give a comprehensive answer to the question.
Respond only to the question asked, response should be concise and relevant to the question.
Provide the number of the source document when relevant.
If the answer cannot be deduced from the context, do not give an answer.""",
    },
    {
        "role": "user",
        "content": """Context:
{context}
---
Now here is the question you need to answer.

Question: {question}""",
    },
]
RAG_PROMPT_TEMPLATE = tokenizer.apply_chat_template(
    prompt_in_chat_format, tokenize=False, add_generation_prompt=True
)
print(RAG_PROMPT_TEMPLATE)

<|system|>
Using the information contained in the context,
give a comprehensive answer to the question.
Respond only to the question asked, response should be concise and relevant to the question.
Provide the number of the source document when relevant.
If the answer cannot be deduced from the context, do not give an answer.</s>
<|user|>
Context:
{context}
---
Now here is the question you need to answer.

Question: {question}</s>
<|assistant|>



In [75]:
context = re.sub(r'\n{2,}', '', context)

In [76]:
prompt = RAG_PROMPT_TEMPLATE.format(question=query, context=context)
print(prompt)

<|system|>
Using the information contained in the context,
give a comprehensive answer to the question.
Respond only to the question asked, response should be concise and relevant to the question.
Provide the number of the source document when relevant.
If the answer cannot be deduced from the context, do not give an answer.</s>
<|user|>
Context:
Document 0:::
AppleAppleAppleStoreMaciPadiPhoneWatch
VisionAirPodsTV & HomeEntertainmentAccessoriesSupport0+ MacBook Air
Sky blue color.Sky high performance with M4.Learn more
BuyBuilt for Apple Intelligence. iPad Air
Now supercharged by the M3 chip.Learn more
BuyBuilt for Apple Intelligence. Mac Studio
M4 Max and M3 Ultra. Choose your superpower.Learn more
BuyBuilt for Apple Intelligence. iPad
Now with the speed of the A16 chip and double the starting storage.Learn more
Buy iPhone
Meet the iPhone 16 family.Learn more
Shop iPhoneBuilt for Apple Intelligence. Apple Trade In
Get $170–$630 in credit when you trade in iPhone 12 or higher.1Get your

In [81]:
prompt

'<|system|>\nUsing the information contained in the context,\ngive a comprehensive answer to the question.\nRespond only to the question asked, response should be concise and relevant to the question.\nProvide the number of the source document when relevant.\nIf the answer cannot be deduced from the context, do not give an answer.</s>\n<|user|>\nContext:\nDocument 0:::\nAppleAppleAppleStoreMaciPadiPhoneWatch\nVisionAirPodsTV & HomeEntertainmentAccessoriesSupport0+\xa0MacBook Air\nSky blue color.Sky high performance with M4.Learn more\nBuyBuilt for Apple Intelligence.\xa0iPad Air\nNow supercharged by the M3 chip.Learn more\nBuyBuilt for Apple Intelligence.\xa0Mac Studio\nM4\xa0Max and M3\xa0Ultra. Choose your superpower.Learn more\nBuyBuilt for Apple Intelligence.\xa0iPad\nNow with the speed of the A16 chip and double the starting storage.Learn more\nBuy\xa0iPhone\nMeet the iPhone 16 family.Learn more\nShop iPhoneBuilt for Apple Intelligence.\xa0Apple Trade In\nGet $170–$630 in credit w

In [91]:
prompt_tokens = tokenizer(prompt, return_tensors="pt").to(DEVICE)
prompt_tokens

{'input_ids': tensor([[   27,    91, 10057,    91,    29,   198, 12814,   262,  1321,  7763,
           287,   262,  4732,    11,   198, 26535,   257,  9815,  3280,   284,
           262,  1808,    13,   198, 19309,   623,   691,   284,   262,  1808,
          1965,    11,  2882,   815,   307, 35327,   290,  5981,   284,   262,
          1808,    13,   198, 15946,   485,   262,  1271,   286,   262,  2723,
          3188,   618,  5981,    13,   198,  1532,   262,  3280,  2314,   307,
          4648, 19513,   422,   262,  4732,    11,   466,   407,  1577,   281,
          3280, 25970,    82,    29,   198,    27,    91,  7220,    91,    29,
           198, 21947,    25,   198, 24941,   657,  3712,    25,   198, 16108,
         16108, 16108, 22658, 14155,    72,    47,  9189,  6132, 10723,   198,
         44206, 16170,    47, 12978,  6849,  1222,  5995, 17469, 10738, 15457,
          1749, 15514,    15,    10,  1849, 14155, 10482,  3701,   198, 22308,
          4171,  3124,    13, 22308,  

In [94]:
answer = model.generate(**prompt_tokens, pad_token_id=tokenizer.eos_token_id)
answer

tensor([[   27,    91, 10057,    91,    29,   198, 12814,   262,  1321,  7763,
           287,   262,  4732,    11,   198, 26535,   257,  9815,  3280,   284,
           262,  1808,    13,   198, 19309,   623,   691,   284,   262,  1808,
          1965,    11,  2882,   815,   307, 35327,   290,  5981,   284,   262,
          1808,    13,   198, 15946,   485,   262,  1271,   286,   262,  2723,
          3188,   618,  5981,    13,   198,  1532,   262,  3280,  2314,   307,
          4648, 19513,   422,   262,  4732,    11,   466,   407,  1577,   281,
          3280, 25970,    82,    29,   198,    27,    91,  7220,    91,    29,
           198, 21947,    25,   198, 24941,   657,  3712,    25,   198, 16108,
         16108, 16108, 22658, 14155,    72,    47,  9189,  6132, 10723,   198,
         44206, 16170,    47, 12978,  6849,  1222,  5995, 17469, 10738, 15457,
          1749, 15514,    15,    10,  1849, 14155, 10482,  3701,   198, 22308,
          4171,  3124,    13, 22308,  1029,  2854,  

In [97]:
print(tokenizer.batch_decode(answer)[0])

<|system|>
Using the information contained in the context,
give a comprehensive answer to the question.
Respond only to the question asked, response should be concise and relevant to the question.
Provide the number of the source document when relevant.
If the answer cannot be deduced from the context, do not give an answer.</s>
<|user|>
Context:
Document 0:::
AppleAppleAppleStoreMaciPadiPhoneWatch
VisionAirPodsTV & HomeEntertainmentAccessoriesSupport0+ MacBook Air
Sky blue color.Sky high performance with M4.Learn more
BuyBuilt for Apple Intelligence. iPad Air
Now supercharged by the M3 chip.Learn more
BuyBuilt for Apple Intelligence. Mac Studio
M4 Max and M3 Ultra. Choose your superpower.Learn more
BuyBuilt for Apple Intelligence. iPad
Now with the speed of the A16 chip and double the starting storage.Learn more
Buy iPhone
Meet the iPhone 16 family.Learn more
Shop iPhoneBuilt for Apple Intelligence. Apple Trade In
Get $170–$630 in credit when you trade in iPhone 12 or higher.1Get your

In [99]:
llm = pipeline("text-generation", model=model_name, device=DEVICE, max_length=2048)

Device set to use mps


In [102]:
print(llm(prompt)[0]["generated_text"])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


<|system|>
Using the information contained in the context,
give a comprehensive answer to the question.
Respond only to the question asked, response should be concise and relevant to the question.
Provide the number of the source document when relevant.
If the answer cannot be deduced from the context, do not give an answer.</s>
<|user|>
Context:
Document 0:::
AppleAppleAppleStoreMaciPadiPhoneWatch
VisionAirPodsTV & HomeEntertainmentAccessoriesSupport0+ MacBook Air
Sky blue color.Sky high performance with M4.Learn more
BuyBuilt for Apple Intelligence. iPad Air
Now supercharged by the M3 chip.Learn more
BuyBuilt for Apple Intelligence. Mac Studio
M4 Max and M3 Ultra. Choose your superpower.Learn more
BuyBuilt for Apple Intelligence. iPad
Now with the speed of the A16 chip and double the starting storage.Learn more
Buy iPhone
Meet the iPhone 16 family.Learn more
Shop iPhoneBuilt for Apple Intelligence. Apple Trade In
Get $170–$630 in credit when you trade in iPhone 12 or higher.1Get your