#for extracting the documents from the url

In [1]:
import requests
import zipfile
import os

# Define the URL of the ZIP file
url = "https://www.3gpp.org/ftp/Specs/2023-06/Rel-18/23_series/23501-i20.zip"

# Define the directory where you want to save the downloaded ZIP file
download_dir = "downloads"

# Create the directory if it doesn't exist
if not os.path.exists(download_dir):
    os.makedirs(download_dir)

# Define the file path to save the downloaded ZIP file
zip_file_path = os.path.join(download_dir, "23501-i20.zip")

# Download the ZIP file
response = requests.get(url)
with open(zip_file_path, "wb") as zip_file:
    zip_file.write(response.content)

# Extract the document from the ZIP file
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
    # Extract all files to a directory
    zip_ref.extractall(download_dir)


##cleaning the docx and restoring the text back as docx for further use

In [2]:
!pip install docx

Collecting docx
  Downloading docx-0.2.4.tar.gz (54 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/54.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.9/54.9 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: docx
  Building wheel for docx (setup.py) ... [?25l[?25hdone
  Created wheel for docx: filename=docx-0.2.4-py3-none-any.whl size=53895 sha256=0270c55f85d91df6d8e744b2921c0c5746ad7a2fcb8db19b5e93472b92f9e1a9
  Stored in directory: /root/.cache/pip/wheels/81/f5/1d/e09ba2c1907a43a4146d1189ae4733ca1a3bfe27ee39507767
Successfully built docx
Installing collected packages: docx
Successfully installed docx-0.2.4


In [3]:
!pip uninstall python-docx
!pip install python-docx


[0mCollecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: python-docx
Successfully installed python-docx-1.1.2


In [4]:
from docx import Document
import re

def clean_docx(docx_file_path, output_file_path):
    # Read the contents of the original DOCX file
    doc = Document(docx_file_path)

    # Extract text from paragraphs
    document_text = ""
    for paragraph in doc.paragraphs:
        document_text += paragraph.text + "\n"

    # Remove non-alphanumeric characters and extra whitespace
    cleaned_text = re.sub(r'[^a-zA-Z0-9\s]', '', document_text)
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()

    # Create a new DOCX document
    cleaned_doc = Document()

    # Add the cleaned text to the new document
    cleaned_doc.add_paragraph(cleaned_text)

    # Save the cleaned DOCX document
    cleaned_doc.save(output_file_path)

# Example usage:
docx_file_path = os.path.join(download_dir, "23501-i20.docx")
output_file_path = os.path.join(download_dir, "cleaned_23501-i20.docx")
clean_docx(docx_file_path, output_file_path)
print(f"Cleaned DOCX file saved to: {output_file_path}")



Cleaned DOCX file saved to: downloads/cleaned_23501-i20.docx


# RAG with Llama 2 and LangChain
Retrieval-Augmented Generation (RAG) is a technique that combines a retriever and a generative language model to deliver accurate response. It involves retrieving relevant information from a large corpus and then generating contextually appropriate responses to queries. Here we use the quantized version of the Llama 2 13B LLM with LangChain to perform generative QA with RAG. The notebook file has been tested in Google Colab with T4 GPU. Please change the runtime type to T4 GPU before running the notebook.

## Install required Packages to use model and langchain and Faiss

In [5]:
!pip install transformers==4.37.2 optimum==1.12.0 --quiet
!pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ --quiet
!pip install langchain==0.1.9 --quiet
# !pip install chromadb
!pip install sentence_transformers==2.4.0 --quiet
!pip install unstructured --quiet
!pip install pdf2image --quiet
!pip install pdfminer.six==20221105 --quiet
!pip install unstructured-inference --quiet
!pip install faiss-gpu==1.7.2 --quiet
!pip install pikepdf==8.13.0 --quiet
!pip install pypdf==4.0.2 --quiet
!pip install pillow_heif==0.15.0 --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.6/380.6 kB[0m [31m43.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m68.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m55.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m95.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m114.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.8/8.8 MB[0m [31m119.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━

## downloading necessary pacakge to convert docx file into pdf
#although its not necessary

In [6]:
pip install docx2pdf

Collecting docx2pdf
  Downloading docx2pdf-0.1.8-py3-none-any.whl (6.7 kB)
Installing collected packages: docx2pdf
Successfully installed docx2pdf-0.1.8


## to use libreoffice in function during docx to pdf conversion we need it


In [7]:
!apt-get update
!apt-get install libreoffice --fix-missing

0% [Working]            Hit:1 http://archive.ubuntu.com/ubuntu jammy InRelease
0% [Waiting for headers] [Waiting for headers] [Connected to cloud.r-project.org (143.204.215.56)] [                                                                                                    Get:2 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [119 kB]
                                                                                                    Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [110 kB]
0% [2 InRelease 35.9 kB/119 kB 30%] [3 InRelease 31.5 kB/110 kB 29%] [Connected to cloud.r-project.o0% [3 InRelease 60.5 kB/110 kB 55%] [Waiting for headers] [Waiting for headers] [Waiting for headers                                                                                                    Get:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
0% [Waiting for headers] [3 InRelease 83.7 kB/110 kB 76%] [4 InRelease 3,626 

##convert docx to pdf


In [8]:
import subprocess
def convert_docx_to_pdf(input_docx_path):
    output_pdf_path = input_docx_path.replace(".docx", ".pdf")
    subprocess.run(["/usr/bin/libreoffice", "--headless", "--convert-to", "pdf", input_docx_path])
    return output_pdf_path
# Example usage
input_docx_path = "/content/downloads/23501-i20.docx"
output_pdf_path = convert_docx_to_pdf(input_docx_path)
print("PDF file converted:", output_pdf_path)


PDF file converted: /content/downloads/23501-i20.pdf


## Load Llama 2
We will use the quantized version of the LLAMA 2 13B model from HuggingFace for our RAG task.

In [None]:
#this is my access token of huggingface
# import os
# os.environ['HF_TOKEN']="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"
# os.environ['HUGGINGFACEHUB_API_TOKEN']="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"

In [9]:
from langchain.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline

model_name = "TheBloke/Llama-2-13b-Chat-GPTQ"

model = AutoModelForCausalLM.from_pretrained(model_name,
                                             device_map="auto",
                                             trust_remote_code=True)

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

gen_cfg = GenerationConfig.from_pretrained(model_name)
gen_cfg.max_new_tokens=512
gen_cfg.temperature=0.0000001 # 0.0
gen_cfg.return_full_text=True
gen_cfg.do_sample=True
gen_cfg.repetition_penalty=1.11

pipe=pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    generation_config=gen_cfg
)

llm = HuggingFacePipeline(pipeline=pipe)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/837 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/7.26G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

#### Test LLM with Llama 2 prompt structure and LangChain PromptTemplate

## RAG from PDF Files
### A. Create a vectore store for the context/external data
Here, we'll create embedding vectores of the unstructured data loaded from the the source and store them in a vectore store.  

In [10]:
##for proper encoding
import locale
locale.getpreferredencoding = lambda: "UTF-8"

####Download pdf files

####Load PDF Files
Depending on the type of the source data, we can use the appropriate data loader from LangChain to load the data.

In [11]:
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.vectorstores.utils import filter_complex_metadata # 'filter_complex_metadata' removes complex metadata that are not in str, int, float or bool format
pdf_loader = UnstructuredPDFLoader("/content/23501-i20.pdf")
pdf_doc = pdf_loader.load()
updated_pdf_doc = filter_complex_metadata(pdf_doc)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


#### Spit the document into chunks
Due to the limited size of the context window of an LLM, the data need to be divided into smaller chunks with a text splitter like CharacterTextSplitter or RecursiveCharacterTextSplitter. In this way, the smaller chunks can be fed into the LLM.

In [21]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64)

chunked_pdf_doc = text_splitter.split_documents(updated_pdf_doc)
len(chunked_pdf_doc)

6622

#### Create a vector database of the chunked documents with HuggingFace embeddings

In [22]:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()



We can either use FAISS or Chroma to create the [Vector Store](https://python.langchain.com/docs/modules/data_connection/vectorstores.html).

In [23]:
%%time
# Create the vectorized db with FAISS
from langchain.vectorstores import FAISS
db_pdf = FAISS.from_documents(chunked_pdf_doc, embeddings)
db_pdf


CPU times: user 1min 46s, sys: 175 ms, total: 1min 46s
Wall time: 1min 47s


<langchain_community.vectorstores.faiss.FAISS at 0x7fb1caf66410>

### B. Use RetrievalQA chain
We instantiate a RetrievalQA chain from LangChain which takes in a retriever, LLM and a chain_type as the input arguments. When the QA chain receives a query, the retriever retrieves information relevent to the query from the vectore store.   The ``chain type = "stuff"`` method stuffs all the retrieved information into context and makes a call to the language model. The LLM then generates the text/response from the retrieved documents. [See information on Langchain Retriver](https://python.langchain.com/docs/use_cases/question_answering/vector_db_qa).

**LLM prompt structure**

We can also pass in the recommended prompt structue for Llama 2 for the QA. In this way, we'd be able to advise our LLM to only use the available context to answer our question. If it cannot find information relevant to our query in the context, it'll **NOT** make up an answer, rather, it would advise that it's unable to find relevant information in the context.

In [54]:
%%time
from textwrap import fill
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

# use the recommended propt style for the LLAMA 2 LLM
prompt_template = """
<s>[INST] <<SYS>>
Use the following context to Answer the question at the end. Do not use any other information. If you can't find the relevant information in the context, just say you don't have enough information to answer the question. Don't try to make up an answer.
{context}

Analytical Question Example:
Question: How does the deployment of a Service Capability Exposure Function (SCEF) contribute to the overall efficiency of NF service discovery in the 5G Core network architecture? [/INST]

Factual Question Example:
Question: What is the purpose of a Notification Correlation ID in the context of "Subscribe-Notify" NF Service interactions, as described in the document? [/INST]

Inferential Question Example:
Question: Considering the described mechanisms for NF service authorization, what potential challenges might arise in ensuring seamless NF service access across different operator networks in a roaming scenario? [/INST]


{context}
you have to generate question other than provided example
{context}
Question: {question} [/INST]
"""

prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
Chain_pdf = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    # retriever=db_pdf.as_retriever(search_type="similarity_score_threshold", search_kwargs={'k': 5, 'score_threshold': 0.5}),
    # Similarity Search is the default way to retrieve documents relevant to a query, but we can use MMR by setting search_type = "mmr"
    # k defines how many documents are returned; defaults to 4.
    # score_threshold allows to set a minimum relevance for documents returned by the retriever, if we are using the "similarity_score_threshold" search type.
    # return_source_documents=True, # Optional parameter, returns the source documents used to answer the question
     # (search_kwargs={'k': 5, 'score_threshold': 0.8}),
    retriever=db_pdf.as_retriever(),
    chain_type_kwargs={"prompt": prompt},
)
def retrieve_chunks(query,k=2):
    matching_results=db_pdf.similarity_search(query,k=k)
    return matching_results
query = "Generate as many  Analytical question as you can raise"
result = Chain_pdf.invoke(query)
chunks=retrieve_chunks(query,k=2)
# print(result)
print(fill(result['result'].strip(), width=100))
print()
print("this is chunks from which question is coming: " ,chunks)



OutOfMemoryError: CUDA out of memory. Tried to allocate 114.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 27.06 MiB is free. Process 2056 has 14.72 GiB memory in use. Of the allocated memory 13.51 GiB is allocated by PyTorch, and 1.09 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [55]:
import os
# Set the environment variable
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'


In [46]:
%%time
from textwrap import fill
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

# Use the recommended prompt style for the LLAMA 2 LLM
prompt_template = """
<s>[INST] <<SYS>>
Use the following context to Answer the question at the end. Do not use any other information. If you can't find the relevant information in the context, just say you don't have enough information to answer the question. Don't try to make up an answer.
{context}

Analytical Question Example:
Question: How does the deployment of a Service Capability Exposure Function (SCEF) contribute to the overall efficiency of NF service discovery in the 5G Core network architecture? [/INST]

Factual Question Example:
Question: What is the purpose of a Notification Correlation ID in the context of "Subscribe-Notify" NF Service interactions, as described in the document? [/INST]

Inferential Question Example:
Question: Considering the described mechanisms for NF service authorization, what potential challenges might arise in ensuring seamless NF service access across different operator networks in a roaming scenario? [/INST]


{context}
you have to generate question other than provided example
{context}
Question: {question} [/INST]
"""

# Prompt to feed to the model
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

# Instantiate the RetrievalQA chain
Chain_pdf = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db_pdf.as_retriever(),
    chain_type_kwargs={"prompt": prompt},
)

# Function to retrieve the chunks from where questions are raised
def retrieve_chunks(query, k=2):
    matching_results = db_pdf.similarity_search(query, k=k)
    return matching_results

# Query to generate factual questions from documents
query = "Generate the Analytical question from documents"
result = Chain_pdf.invoke(query)

# Retrieve chunks from where questions are raised
chunks = retrieve_chunks(query, k=2)

# Parse the result to extract the generated questions
generated_questions = []
for line in result['result'].split('\n'):
    if line.startswith("Question:"):
        generated_questions.append(line.split("Question: ")[1])

# Print generated questions and chunks
print("Generated Questions:")
for question in generated_questions:
    print(question)

print("\nChunks from which questions were retrieved:")
for chunk in chunks:
    print(chunk)




Generated Questions:
How does the deployment of a Service Capability Exposure Function (SCEF) contribute to the overall efficiency of NF service discovery in the 5G Core network architecture? [/INST]
What is the purpose of a Notification Correlation ID in the context of "Subscribe-Notify" NF Service interactions, as described in the document? [/INST]
Considering the described mechanisms for NF service authorization, what potential challenges might arise in ensuring seamless NF service access across different operator networks in a roaming scenario? [/INST]
Generate the Analytical question from documents [/INST]

Chunks from which questions were retrieved:
page_content='General.......................................................................................................................................................482 Architecture................................................................................................................................................483 

In [27]:
%%time
from textwrap import fill
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

# use the recommended propt style for the LLAMA 2 LLM
# prompt_template = """
# <s>[INST] <<SYS>>
# Use the following context to Answer the question at the end. Do not use any other information. If you can't find the relevant information in the context, just say you don't have enough information to answer the question. Don't try to make up an answer.

# <</SYS>>

# {context}
# give analytical ,factual and inferefntial question from documents i provided
# also thinks that factual question answer  will be  at max one sentence long or one word
# Question: {question} [/INST]
# """
prompt_template = """
<s>[INST] <<SYS>>
Use the following context to Answer the question at the end. Do not use any other information. If you can't find the relevant information in the context, just say you don't have enough information to answer the question. Don't try to make up an answer.
{context}

Analytical Question Example:
Question: How does the deployment of a Service Capability Exposure Function (SCEF) contribute to the overall efficiency of NF service discovery in the 5G Core network architecture? [/INST]

Factual Question Example:
Question: What is the purpose of a Notification Correlation ID in the context of "Subscribe-Notify" NF Service interactions, as described in the document? [/INST]

Inferential Question Example:
Question: Considering the described mechanisms for NF service authorization, what potential challenges might arise in ensuring seamless NF service access across different operator networks in a roaming scenario? [/INST]


{context}
you have to generate question other than provided example
{context}
Question: {question} [/INST]
"""
#prompt to feed to model
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

Chain_pdf = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    # retriever=db_pdf.as_retriever(search_type="similarity_score_threshold", search_kwargs={'k': 5, 'score_threshold': 0.5}),
    # Similarity Search is the default way to retrieve documents relevant to a query, but we can use MMR by setting search_type = "mmr"
    # k defines how many documents are returned; defaults to 4.
    # score_threshold allows to set a minimum relevance for documents returned by the retriever, if we are using the "similarity_score_threshold" search type.
    # return_source_documents=True, # Optional parameter, returns the source documents used to answer the question
     # (search_kwargs={'k': 5, 'score_threshold': 0.8}),
    retriever=db_pdf.as_retriever(),
    chain_type_kwargs={"prompt": prompt},
)
#function to retrieve the chunks from where question is raising
Analytical_question=[]
def retrieve_chunks(query,k=2):
    matching_results=db_pdf.similarity_search(query,k=k)
    return matching_results
# for i in range(1,100):
query = "Generate the inferential question from documents"
result = Chain_pdf.invoke(query)
chunks=retrieve_chunks(query,k=2)
print(fill(result['result'].strip(), width=100))
print("this is chunks from which question is coming: " ,chunks)

<s>[INST] <<SYS>> Use the following context to Answer the question at the end. Do not use any other
information. If you can't find the relevant information in the context, just say you don't have
enough information to answer the question. Don't try to make up an answer. The SMF discovery and
selection functionality follows the principles stated in clause 6.3.1.  results of a discovery
procedure with NRF using the UE's SUPI as input for PCF discovery.  e) PCF selected by the AMF for
the UE.  f) MA PDU Session capability of the PCF, for an MA PDU session.  g) The PCF Group ID
provided by the AMF to the SMF.  h) PCF Set ID.  i) Same PCF Selection Indication.  In the case of
delegated discovery and selection in SCP, the SMF includes the factors b) - h), if available, in the
first request.  Mandatory  Mandatory (NOTE 1)  Mandatory (NOTE 2)  Optional  Optional  Optional
Mandatory  Mandatory (NOTE 8)  Optional (NOTE 6)  Optional (NOTE 6) Optional (NOTE 5) (NOTE 8)
Optional (NOTE 7)  No  Yes  

In [None]:
%%time
from textwrap import fill
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

# use the recommended propt style for the LLAMA 2 LLM
# prompt_template = """
# <s>[INST] <<SYS>>
# Use the following context to Answer the question at the end. Do not use any other information. If you can't find the relevant information in the context, just say you don't have enough information to answer the question. Don't try to make up an answer.

# <</SYS>>

# {context}
# give analytical ,factual and inferefntial question from documents i provided
# also thinks that factual question answer  will be  at max one sentence long or one word
# Question: {question} [/INST]
# """
prompt_template = """
<s>[INST] <<SYS>>
Use the following context to Answer the question at the end. Do not use any other information. If you can't find the relevant information in the context, just say you don't have enough information to answer the question. Don't try to make up an answer.
{context}

Analytical Question Example:
Question: How does the deployment of a Service Capability Exposure Function (SCEF) contribute to the overall efficiency of NF service discovery in the 5G Core network architecture? [/INST]

Factual Question Example:
Question: What is the purpose of a Notification Correlation ID in the context of "Subscribe-Notify" NF Service interactions, as described in the document? [/INST]

Inferential Question Example:
Question: Considering the described mechanisms for NF service authorization, what potential challenges might arise in ensuring seamless NF service access across different operator networks in a roaming scenario? [/INST]


{context}
you have to generate question other than provided example
{context}
Question: {question} [/INST]
"""
#prompt to feed to model
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

Chain_pdf = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    # retriever=db_pdf.as_retriever(search_type="similarity_score_threshold", search_kwargs={'k': 5, 'score_threshold': 0.5}),
    # Similarity Search is the default way to retrieve documents relevant to a query, but we can use MMR by setting search_type = "mmr"
    # k defines how many documents are returned; defaults to 4.
    # score_threshold allows to set a minimum relevance for documents returned by the retriever, if we are using the "similarity_score_threshold" search type.
    # return_source_documents=True, # Optional parameter, returns the source documents used to answer the question
     # (search_kwargs={'k': 5, 'score_threshold': 0.8}),
    retriever=db_pdf.as_retriever(),
    chain_type_kwargs={"prompt": prompt},
)
#function to retrieve the chunks from where question is raising
Analytical_question=[]
def retrieve_chunks(query,k=2):
    matching_results=db_pdf.similarity_search(query,k=k)
    return matching_results
for i in range(1,100):
  query = "Generate the Inferential question from documents"
  result = Chain_pdf.invoke(query)
  chunks=retrieve_chunks(query,k=2)
  Analytical_question.append({fill(result['result'].strip(), width=100):chunks})
  # print("this is chunks from which question is coming: " ,chunks)

<s>[INST] <<SYS>> Use the following context to Answer the question at the end. Do not use any other
information. If you can't find the relevant information in the context, just say you don't have
enough information to answer the question. Don't try to make up an answer.  <</SYS>>  Solve the EN
about PIN deletion, activation and deactivation Clarification on Redundant Steering Mode
Clarification of RTT measurement for RSM Clarification on N3IWF/TNGF selection to support of S-NSSAI
needed by UE Explicit subscription to NSSF for network slice instance replacement Open issue
resolutions for MBSR support Support of provisioning periodicity set Updates for registration and
discovery for FL entity Update supporting Edge Computing Add FQDN in Traffic Detection Information
Considering capability of UPF event exposure during UPF discovery KI#1 V-SMF selection enhancement
to support HR-SBO Resolve ENs for support of PDU Set handling Clarification on IAB Authorization
Update of MBSR Configuration 

In [None]:
%%time
from textwrap import fill
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

# use the recommended propt style for the LLAMA 2 LLM
prompt_template = """
<s>[INST] <<SYS>>
Use the following context to Answer the question at the end. Do not use any other information. If you can't find the relevant information in the context, just say you don't have enough information to answer the question. Don't try to make up an answer.

<</SYS>>

{context}
raise only  inferential  question as many as you can
Question: {question} [/INST]
"""

prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
Chain_pdf = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    # retriever=db_pdf.as_retriever(search_type="similarity_score_threshold", search_kwargs={'k': 5, 'score_threshold': 0.5}),
    # Similarity Search is the default way to retrieve documents relevant to a query, but we can use MMR by setting search_type = "mmr"
    # k defines how many documents are returned; defaults to 4.
    # score_threshold allows to set a minimum relevance for documents returned by the retriever, if we are using the "similarity_score_threshold" search type.
    # return_source_documents=True, # Optional parameter, returns the source documents used to answer the question
     # (search_kwargs={'k': 5, 'score_threshold': 0.8}),
    retriever=db_pdf.as_retriever(),
    chain_type_kwargs={"prompt": prompt},
)
def retrieve_chunks(query,k=2):
    matching_results=db_pdf.similarity_search(query,k=k)
    return matching_results
query = "raise  infrential question "
result = Chain_pdf.invoke(query)
chunks=retrieve_chunks(query,k=2)
# print(result)
print(fill(result['result'].strip(), width=100))
print()
print("this is chunks from which question is coming: " ,chunks)

<s>[INST] <<SYS>> Use the following context to Answer the question at the end. Do not use any other
information. If you can't find the relevant information in the context, just say you don't have
enough information to answer the question. Don't try to make up an answer.  <</SYS>>  receiving
duplicated traffic via 3GPP and non-3GPP access simultaneously.  The UPF may indicate in PMF-Suspend
Duplication Request message the type of traffic (i.e. GBR or non-GBR) for which traffic duplication
is being suspended. Once the UE receives PMF-Suspend Duplication Request message from the UPF, the
UE shall stop duplicating the type of traffic for which traffic duplication is suspended.  NOTE 2:
If the UPF does not provide the type of traffic (GBR or non-GBR) in the PMF-Suspend Duplication
Request message, traffic duplication is suspended for all traffic for which traffic duplication is
being performed.  UPF may decide to resume traffic duplication for a UE by sending PMF-Resume
Duplication Request 

In [None]:
query = "raise only inferential question from the documents"
result = Chain_pdf.invoke(query)
chunks=retrieve_chunks(query,k=2)
print(fill(result['result'].strip(), width=100))
print()
print("This is the chunk from which the question is coming: {}".format(chunks))


<s>[INST] <<SYS>> Use the following context to Answer the question at the end. Do not use any other
information. If you can't find the relevant information in the context, just say you don't have
enough information to answer the question. Don't try to make up an answer.  <</SYS>>  results of a
discovery procedure with NRF using the UE's SUPI as input for PCF discovery.  e) PCF selected by the
AMF for the UE.  f) MA PDU Session capability of the PCF, for an MA PDU session.  g) The PCF Group
ID provided by the AMF to the SMF.  h) PCF Set ID.  i) Same PCF Selection Indication.  In the case
of delegated discovery and selection in SCP, the SMF includes the factors b) - h), if available, in
the first request.  The selected PCF instance for serving the UE and the selected PCF instance for
serving a PDU session of this UE may be the same or may be different.  In the case of delegated
discovery, the AMF, shall send all the available factors a)-d), k) and n) to the SCP.  In addition,
the AMF may

### C. Hallucination Check
Hallucination in RAG refers to the generation of content by an LLM that is not based onn the retrieved knowledge.

Let's test our LLM with a query that is not relevant to the context. The model should respond that it does not have enough information to respond to this query.

In [None]:
%%time
query = "Explain in detail how the solar system was formed."
result = Chain_pdf.invoke(query)
chunks=retrieve_chunks(query,k=2)
print(fill(result['result'].strip(), width=100))
chunks=retrieve_chunks(query,k=2)
print("This is the chunk from which the question is coming: {}".format(chunks))


<s>[INST] <<SYS>> Use the following context to Answer the question at the end. Do not use any other
information. If you can't find the relevant information in the context, just say you don't have
enough information to answer the question. Don't try to make up an answer.  <</SYS>>  AF Discovery
and Selection.......................................................................................
...............................575 NRF discovery and selection......................................
...............................................................................575  SP-230062
SP-230078 SP-230040  SP-230056  SP-230064 SP-230043 SP-230077 SP-230053  3823 3825  3830  3831 3834
3835 3837  3840 3841 3842 3992 3843  3844 3848 3850 3854 3855 3858  3859 3860 3864 3867 3870 3871
3872  3875  3878 3881 3883  3886 3887 3892  3895  3896  3897 3898 3910  3912 3914  3919  3923 3924
3925 3926 3927  3928  3929  3933 3935  3937  3939 3948 3949 3953  6 1  1  1 1 3 1  1 - 3 3 1  9 - -
4 11 1  1 1 

In [None]:
%%time
query = "What are the planets of the solar system composed of? Give a detailed response."
result = Chain_pdf.invoke(query)
print(fill(result['result'].strip(), width=100))

Based on the provided context, the planets of the solar system are composed of a variety of
materials, including rocks, metals, gases, and ices.  The four terrestrial planets - Mercury, Venus,
Earth, and Mars - are composed primarily of rocky materials, such as silicates and metals. These
planets are also composed of iron and nickel, and have a definite surface.  The four giant planets -
Jupiter, Saturn, Uranus, and Neptune - are composed mostly of gases, with extremely low melting
points and high vapor pressure. These gases include hydrogen, helium, and neon. In addition, these
planets have large amounts of ices, such as water, methane, ammonia, hydrogen sulfide, and carbon
dioxide. These ices can be found as solids, liquids, or gases throughout the Solar System.  The
composition of the planets in the Solar System is important because it affects their properties and
characteristics. For example, the high metallicity of the Sun is thought to have played a role in
the formation of its p

The model responded as expected. The context provided to it do not contain any information on tranformers architectures. So, it cannot answer this question and do not suffer from hallucination!