In [15]:
import pandas as pd
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_community.embeddings import OllamaEmbeddings
from langchain_ollama import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import PromptTemplate
from operator import itemgetter

In [4]:
MODEL = "llama3.1"

In [5]:
loader = PyPDFLoader(file_path="./pdf2.pdf")
pages = []
async for page in loader.alazy_load():
    pages.append(page)

print(f"Number of pages: {len(pages)}")
print(f"Length of a page: {len(pages[1].page_content)}")
print(f"Content of a page: {pages[1].page_content}")

Number of pages: 6
Length of a page: 6103
Content of a page: II. R ELATED WORKS
Document loading and segmentation are two crucial pro-
cesses for NLP tasks. Lai et al. introduced a system named
LISA which can handle complex, implicit queries by segmen-
tation documents based on user instructions. One of the main
capabilities of the tool is that it can produce segmentation
from embedding directly. This system demonstrates its zero-
shot abilities and robust performances even with limited data
for fine-tuning. [5]
Karapantelakis et al. explored the use of LLM for under-
standing telecommunication standards. They fine-tuned LLMs
to handle large and complex documents by providing faster
access to relevant information. They also demonstrate how pre-
processing as well as segmentation can contribute to increasing
the accuracy of a fine-tuned model. [6]
To improve performance of question-answer (QA) models,
Alberti et al. developed a technique to generate synthetic QA
pairs. The overall proce

In [6]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100)
chunks = splitter.split_documents(pages)

print(f"Number of chunks: {len(chunks)}")
print(f"Length of a chunk: {len(chunks[1].page_content)}")
print(f"Content of a chunk: {chunks[1].page_content}")


Number of chunks: 26
Length of a chunk: 1493
Content of a chunk: I. I NTRODUCTION
Large language models’ (LLMs) rapid evolution has rev-
olutionized natural language processing (NLP) in numerous
domains. However, the use of LLMs in the telecommunica-
tions sector has not been extensively implemented, especially
in tasks that require specific domain knowledge, such as
providing answers to technical questions based on 3GPP
standards. Using the TeleQnA [12] dataset, the ITU AI/ML
in 5G Challenge brings an opportunity to address this gap by
emphasizing on optimizing LLMs for telecom-specific tasks.
In this challenge, the task is to utilize either “Phi-2” [1] or
“Falcon” [2] to answer the MCQs in the TeleQnA dataset.
We design an RAG pipeline that utilizes the “Phi-2” model
to generate the answers to the MCQs. The reason behind
selecting “Phi-2” is that, it is less resource intensive compared
to Falcon. Falcon has seven billion parameters whereas Phi-
2 has two billion. The training and tes

can use any other embedding model but better to use the one that comes with the model

In [7]:
vectorstore = Chroma.from_documents(
    documents=chunks,
    collection_name="rag-llm",
    embedding=OllamaEmbeddings(model='nomic-embed-text', show_progress=True),
    persist_directory="./chroma_db"
)

OllamaEmbeddings: 100%|██████████| 26/26 [00:57<00:00,  2.21s/it]


In [8]:
retriever = vectorstore.as_retriever()
retriever.invoke("Why did the author not use Falcon?")

OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.08s/it]


[Document(metadata={'page': 5, 'source': './pdf2.pdf'}, page_content='semantically related but syntactically irrelevant texts, and\nprovides precise word matching. It is especially useful in\nspecialized sectors where contextual similarity and relevant\nterminology are both critical. The hybrid method addresses the\nconstraints of vector-based search alone, resulting in a more\nextensive and accurate retrieval procedure. However, in our\nexperiments, the inference time was twice as long as that of the\nvector search. This is because two different methods were used\nsimultaneously, resulting in a time-inefficient pipeline given\nthe deadline constraint of the competition.\nThe baseline results using the pre-trained Phi-2 with the pre-\ntrained BAAI/bge-small-en-v1.5 model served as a benchmark\nfor our experiments. The significant difference between our\nbest result and baseline demonstrates the efficiency of our\npipeline in greatly enhancing the performance of the model.\nV. C ONCLUSI

In [10]:
model = ChatOllama(model=MODEL, temperature=0)
model.invoke("Who is the prime minister of Bangladesh?")

AIMessage(content="The current Prime Minister of Bangladesh is Sheikh Hasina. She has been serving as the Prime Minister since January 2009, and her party, the Awami League, won a landslide victory in the 2014 general election, which she led to power.\n\nSheikh Hasina is also the leader of the Awami League and has been a key figure in Bangladesh's politics for over four decades. She served as Prime Minister from 1996-2001 and again from 2009-present.\n\nIt's worth noting that Bangladesh has a parliamentary system, where the Prime Minister is the head of government and is responsible for appointing ministers to various portfolios.", additional_kwargs={}, response_metadata={'model': 'llama3.1', 'created_at': '2024-10-13T20:37:40.1720922Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 29805088700, 'load_duration': 28815400, 'prompt_eval_count': 18, 'prompt_eval_duration': 1127460000, 'eval_count': 132, 'eval_duration': 28646202000

In [12]:
parser = StrOutputParser() # formats model response to only have message and not instance
# input to the parser is the output of the model
chain = model | parser
print(chain.invoke("Who is the president of Bangladesh?"))

The current President of Bangladesh is Abdul Hamid. He has been serving as the 20th President of Bangladesh since April 2009, when he took office after the resignation of Iajuddin Ahmed. Prior to becoming President, Abdul Hamid served as Speaker of the Jatiya Sangsad (National Parliament) from 1991 to 1996 and again from 2009 to 2013.

However, it's worth noting that Bangladesh is a parliamentary democracy, and the Prime Minister is the head of government, while the President serves as the ceremonial head of state. The current Prime Minister of Bangladesh is Sheikh Hasina, who has been in office since January 2009.


In [14]:
template = """
You are an assistant that provides answers to questions based on a given context.

Answer the question based on the context. If you can't answer the question, reply "I don't know".

Be as concise as possible and go straight to the point.

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
# we can pass context and question like to template. For testing purposes:
print(prompt.format(context="Here is some context", question="Here is the question"))


You are an assistant that provides answers to questions based on a given context.

Answer the question based on the context. If you can't answer the question, reply "I don't know".

Be as concise as possible and go straight to the point.

Context: Here is some context

Question: Here is the question



For chains in langchain, whatever comes before is the input to what comes after like for a | b | c, b's input is the output of a, and c's input is the output of b

In [16]:
chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | parser
)


In [18]:
questions = [
    "What is this about?",
    "Why is Falcon not used?",
    "Why did the author choose Phi-2 over other models?"
]

for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {chain.invoke({"question": question})}")
    print("****")

Question: What is this about?


OllamaEmbeddings: 100%|██████████| 1/1 [00:03<00:00,  3.56s/it]


Answer: This document discusses a proposed approach for answering telecom-specific questions using the RAG pipeline and related methodologies.
****
Question: Why is Falcon not used?


OllamaEmbeddings: 100%|██████████| 1/1 [00:03<00:00,  3.57s/it]


Answer: I don't know.
****
Question: Why did the author choose Phi-2 over other models?


OllamaEmbeddings: 100%|██████████| 1/1 [00:03<00:00,  3.30s/it]


Answer: I don't know.
****
