### Model loading

In [7]:
from langchain_groq import ChatGroq
from dotenv import load_dotenv

load_dotenv()
llm = ChatGroq(model="deepseek-r1-distill-llama-70b")
llm.invoke("hi")

AIMessage(content='<think>\n\n</think>\n\nHello! How can I assist you today? 😊', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 4, 'total_tokens': 20, 'completion_time': 0.079697988, 'prompt_time': 6.2188e-05, 'queue_time': 0.054578772000000005, 'total_time': 0.079760176}, 'model_name': 'deepseek-r1-distill-llama-70b', 'system_fingerprint': 'fp_76307ac09b', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None}, id='run--bc93b25c-8372-449d-9ea3-aa44dfa0231d-0', usage_metadata={'input_tokens': 4, 'output_tokens': 16, 'total_tokens': 20})

### Loading Embeddings

In [18]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001") 
# embedding_model.embed_query("hai")

## 1. Data Ingestion

config

In [16]:
import os
file_path = os.path.join("..", "data", "sample.pdf")

Document Loader

In [24]:
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader(file_path=file_path)
document = loader.load()
len(document)

77

In [None]:
#ignore this, just skipping some pdf pages to make it easy.
document = document[:20]
len(document)

20

Document Spliiter

In [27]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=150,
    length_function=len
)

In [28]:
splitted_doc = splitter.split_documents(document)

### Vector Part

In [30]:
from langchain.vectorstores import FAISS
vector_store = FAISS.from_documents(splitted_doc, embedding_model)

In [32]:
vector_store.similarity_search("llama2 finetuning benchmark experiments.", k=2)

[Document(id='3abaf823-0fca-4618-bfdb-2497e3f89dcb', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': '..\\data\\sample.pdf', 'total_pages': 77, 'page': 7, 'page_label': '8'}, page_content='13B 18.9 66.1 52.6 62.3 10.9 46.9 37.0 33.9\n33B 26.0 70.0 58.4 67.6 21.4 57.8 39.8 41.7\n65B 30.7 70.7 60.5 68.6 30.8 63.4 43.5 47.6\nLlama 2\n7B 16.8 63.9 48.9 61.3 14.6 45.3 32.6 29.3\n13B 24.5 66.9 55.4 65.8 28.7 54.8 39.4 39.1\n34B 27.8 69.9 58.7 68.0 24.2 62.6 44.1 43.4\n70B 37.5 71.9 63.6 69.4 35.2 68.9 51.2 54.2\nTable 3: Overall performance on grouped academic benchmarks compared to open-source base models.'),
 Document(id='53c6674a-d11b-4995-acd0-cff0cade5d08', metadata={'producer': 'pdf

## 2. Data Retrieval

### Retrieval part

In [33]:
retriever = vector_store.as_retriever() 

In [34]:
retriever.invoke("llama2 finetuning benchmark experiments.")

[Document(id='3abaf823-0fca-4618-bfdb-2497e3f89dcb', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': '..\\data\\sample.pdf', 'total_pages': 77, 'page': 7, 'page_label': '8'}, page_content='13B 18.9 66.1 52.6 62.3 10.9 46.9 37.0 33.9\n33B 26.0 70.0 58.4 67.6 21.4 57.8 39.8 41.7\n65B 30.7 70.7 60.5 68.6 30.8 63.4 43.5 47.6\nLlama 2\n7B 16.8 63.9 48.9 61.3 14.6 45.3 32.6 29.3\n13B 24.5 66.9 55.4 65.8 28.7 54.8 39.4 39.1\n34B 27.8 69.9 58.7 68.0 24.2 62.6 44.1 43.4\n70B 37.5 71.9 63.6 69.4 35.2 68.9 51.2 54.2\nTable 3: Overall performance on grouped academic benchmarks compared to open-source base models.'),
 Document(id='53c6674a-d11b-4995-acd0-cff0cade5d08', metadata={'producer': 'pdf

## 3.Data Generation

prompt

In [40]:
from langchain.prompts import PromptTemplate

prompt_template = """
        Answer the question based on the context provided below. 
        If the context does not contain sufficient information, respond with: 
        "I do not have enough information about this."

        Context: {context}

        Question: {question}

        Answer:"""
prompt = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)
prompt

PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\n        Answer the question based on the context provided below. \n        If the context does not contain sufficient information, respond with: \n        "I do not have enough information about this."\n\n        Context: {context}\n\n        Question: {question}\n\n        Answer:')

model

In [45]:
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x0000018AEA8D7730>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x0000018AEA8D5180>, model_name='deepseek-r1-distill-llama-70b', model_kwargs={}, groq_api_key=SecretStr('**********'))

parser

In [47]:
from langchain_core.output_parsers import StrOutputParser
parser = StrOutputParser()

Chaining

In [50]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

In [52]:
from langchain_core.runnables import RunnablePassthrough
rag_chain = (
    {
        "context" : retriever | format_docs ,
        "question" : RunnablePassthrough() 
    }
    | prompt
    | llm
    | parser
)

In [53]:
rag_chain.invoke("tell  me about the llama2 finetuning benchmark experiments?")

"<think>\nOkay, I need to figure out how to answer the question about the Llama 2 fine-tuning benchmark experiments based on the provided context. Let me start by reading through the context carefully.\n\nFirst, I see that the context includes a table labeled Table 3, which summarizes overall performance on grouped academic benchmarks. The table lists different model sizes (like 7B, 13B, etc.) and their corresponding scores across various benchmarks. It mentions that for these models, the best score between their evaluation framework and publicly reported results is chosen. \n\nAdditionally, the context talks about hyperparameters used in training, such as the AdamW optimizer, learning rate schedule, weight decay, and gradient clipping. It also mentions that Figure 5(a) shows the training loss for Llama 2 with these hyperparameters. There's a section about pretraining where they started with an approach from Touvron et al. (2023), using an optimized auto-regressive transformer with imp