In [16]:
print("all_ok")

all_ok


In [17]:
from langchain_groq import ChatGroq
from dotenv import load_dotenv

In [18]:
load_dotenv()

True

In [19]:
llm=ChatGroq(model="openai/gpt-oss-20b")

In [20]:
llm.invoke("What is the capital city of Malta?").content

'The capital city of Malta is **Valletta**.'

In [21]:
print(llm.invoke("What is the capital city of Malta?").content)

The capital city of Malta is **Valletta**.


In [22]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [23]:
embedding_model=GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [49]:
embed_vec = embedding_model.embed_query("What is the capital city of Malta?")

## 1. Data Ingestion

In [2]:
from langchain.document_loaders import PyPDFLoader

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [4]:
import os

In [5]:
os.getcwd()

'd:\\LLMOPs\\Document_Portal\\notebook'

In [8]:
# Assign a file path
file_path = os.path.join(os.getcwd(), "data", "sample.pdf")

In [9]:
# Load the file assigned to var loader
loader = PyPDFLoader(file_path)

In [48]:
# Fetching the data
documents = loader.load()
documents[:4]

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'd:\\LLMOPs\\Document_Portal\\notebook\\data\\sample.pdf', 'total_pages': 77, 'page': 0, 'page_label': '1'}, page_content='Llama 2: Open Foundation and Fine-Tuned Chat Models\nHugo Touvron∗ Louis Martin† Kevin Stone†\nPeter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra\nPrajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen\nGuillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller\nCynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou\nHakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev\nPu

In [11]:
# Check the length
len(documents)

77

In [12]:
# We chunk the text (no deterministic way to split data).
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=150,
    length_function=len
)

In [13]:
# After chunking we get our docs
docs = text_splitter.split_documents(documents)
len(docs)

765

In [14]:
# Check docs metadata
docs[0].metadata

{'producer': 'pdfTeX-1.40.25',
 'creator': 'LaTeX with hyperref',
 'creationdate': '2023-07-20T00:30:36+00:00',
 'author': '',
 'keywords': '',
 'moddate': '2023-07-20T00:30:36+00:00',
 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5',
 'subject': '',
 'title': '',
 'trapped': '/False',
 'source': 'd:\\LLMOPs\\Document_Portal\\notebook\\data\\sample.pdf',
 'total_pages': 77,
 'page': 0,
 'page_label': '1'}

In [15]:
# Check actual page content
docs[0].page_content

'Llama 2: Open Foundation and Fine-Tuned Chat Models\nHugo Touvron∗ Louis Martin† Kevin Stone†\nPeter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra\nPrajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen\nGuillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller\nCynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou\nHakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev'

In [25]:
# Storing data in vector database
from langchain.vectorstores import FAISS


In [26]:
len(embedding_model.embed_documents(docs[0].page_content)[0])

768

## This is a Retreival process

Means from the vector database we are going to fetch or retreive or rank most appropriate result.

In [27]:
vectorstore = FAISS.from_documents(docs, embedding_model)

In [28]:
# Perform similarity search
vectorstore.similarity_search("What is a llama2 llm model?")

[Document(id='1fed5390-972b-427e-8374-f2927ba70d54', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'd:\\LLMOPs\\Document_Portal\\notebook\\data\\sample.pdf', 'total_pages': 77, 'page': 33, 'page_label': '34'}, page_content='5.2 Limitations and Ethical Considerations\nLlama 2-Chatis subject to the same well-recognized limitations of other LLMs, including a cessation of\nknowledge updates post-pretraining, potential for non-factual generation such as unqualified advice, and a\npropensity towards hallucinations.\nFurthermore, our initial version ofLlama 2-Chatpredominantly concentrated on English-language data.\nWhile our experimental observations suggest the model has garnered some 

In [31]:
# Check another search
filtered_doc = vectorstore.similarity_search("llama2 finetuning benchmark experiments.", k=3)

In [32]:
filtered_doc

[Document(id='f5409cbe-8e7f-4fff-b31b-01cc8dd38d9b', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'd:\\LLMOPs\\Document_Portal\\notebook\\data\\sample.pdf', 'total_pages': 77, 'page': 7, 'page_label': '8'}, page_content='13B 18.9 66.1 52.6 62.3 10.9 46.9 37.0 33.9\n33B 26.0 70.0 58.4 67.6 21.4 57.8 39.8 41.7\n65B 30.7 70.7 60.5 68.6 30.8 63.4 43.5 47.6\nLlama 2\n7B 16.8 63.9 48.9 61.3 14.6 45.3 32.6 29.3\n13B 24.5 66.9 55.4 65.8 28.7 54.8 39.4 39.1\n34B 27.8 69.9 58.7 68.0 24.2 62.6 44.1 43.4\n70B 37.5 71.9 63.6 69.4 35.2 68.9 51.2 54.2\nTable 3: Overall performance on grouped academic benchmarks compared to open-source base models.'),
 Document(id='aade8323-d524-493a-b039-63aad5

In [33]:
# Check result
filtered_doc[0].page_content

'13B 18.9 66.1 52.6 62.3 10.9 46.9 37.0 33.9\n33B 26.0 70.0 58.4 67.6 21.4 57.8 39.8 41.7\n65B 30.7 70.7 60.5 68.6 30.8 63.4 43.5 47.6\nLlama 2\n7B 16.8 63.9 48.9 61.3 14.6 45.3 32.6 29.3\n13B 24.5 66.9 55.4 65.8 28.7 54.8 39.4 39.1\n34B 27.8 69.9 58.7 68.0 24.2 62.6 44.1 43.4\n70B 37.5 71.9 63.6 69.4 35.2 68.9 51.2 54.2\nTable 3: Overall performance on grouped academic benchmarks compared to open-source base models.'

In [None]:
# as_retreiver enables us to call an invoke form my cahin
retreiver = vectorstore.as_retriever()

In [35]:
retreiver.invoke("llama2 finetuning benchmark experiments.")

[Document(id='f5409cbe-8e7f-4fff-b31b-01cc8dd38d9b', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'd:\\LLMOPs\\Document_Portal\\notebook\\data\\sample.pdf', 'total_pages': 77, 'page': 7, 'page_label': '8'}, page_content='13B 18.9 66.1 52.6 62.3 10.9 46.9 37.0 33.9\n33B 26.0 70.0 58.4 67.6 21.4 57.8 39.8 41.7\n65B 30.7 70.7 60.5 68.6 30.8 63.4 43.5 47.6\nLlama 2\n7B 16.8 63.9 48.9 61.3 14.6 45.3 32.6 29.3\n13B 24.5 66.9 55.4 65.8 28.7 54.8 39.4 39.1\n34B 27.8 69.9 58.7 68.0 24.2 62.6 44.1 43.4\n70B 37.5 71.9 63.6 69.4 35.2 68.9 51.2 54.2\nTable 3: Overall performance on grouped academic benchmarks compared to open-source base models.'),
 Document(id='aade8323-d524-493a-b039-63aad5

### Question: User question
### Context: Based on the question retreiving the info from the vector database

In [36]:
prompt_template="""
    Answer the question based on the context provided below.
    If the context does not contain sufficient information, respond with:
    "I do not have enough info about this."
    
    Context: {context}

    Question: {question}
    
    Answer:"""

In [37]:
# Import prompt template library
from langchain.prompts import PromptTemplate

In [38]:
# Assign prompt
prompt=PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)

In [39]:
# Call the promt to check content
prompt

PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\n    Answer the question based on the context provided below.\n    If the context does not contain sufficient information, respond with:\n    "I do not have enough info about this."\n    \n    Context: {context}\n\n    Question: {question}\n    \n    Answer:')

In [45]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
parser=StrOutputParser()

In [43]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

In [46]:
# Create a chain
rag_chain= (
    {"context": retreiver | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [47]:
rag_chain.invoke("tell me about llama2 finetuning benchmark experiments")

'**LLaMA\u202f2 fine‑tuning benchmark experiments**\n\n| Model size | Fine‑tuned | 7\u202fB | 13\u202fB | 34\u202fB | 70\u202fB |\n|-----------|-----------|-----|------|------|------|\n| **Accuracy (%) on grouped academic benchmarks** | | | | |\n| **Baseline (open‑source)** | | 16.8\u202f% | 24.5\u202f% | 27.8\u202f% | 37.5\u202f% |\n| **Fine‑tuned** | | 16.8\u202f% | 24.5\u202f% | 27.8\u202f% | 37.5\u202f% |\n\n> (The table above is a simplified representation of the performance numbers reported in the paper; the full table contains many more columns and metrics.)\n\n### What the experiments did\n\n| Step | What was done | Why |\n|------|--------------|-----|\n| **Model selection** | LLaMA\u202f2 base models of 7\u202fB, 13\u202fB, 34\u202fB, and 70\u202fB parameters were chosen. | To cover a range of sizes and to compare against the same base models. |\n| **Fine‑tuning** | Each base model was fine‑tuned on a curated set of academic benchmarks (the “grouped academic benchmarks” referr