# Building the RAG

## Setting LlamaCpp to run on GPU and package installations

In [1]:
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip -q install llama-cpp-python[server]==0.2.23
!pip -q install langchain
!pip -q install langchain-community
!pip -q install -U langchain-chroma
!pip -q install -U langchain-huggingface

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ydata-profiling 4.6.4 requires numpy<1.26,>=1.16.0, but you have numpy 1.26.4 which is incompatible.[0m[31m
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 24.4.1 requires cubinlinker, which is not installed.
cudf 24.4.1 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 24.4.1 requires ptxcompiler, which is not installed.
cuml 24.4.0 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 24.4.1 requires cupy-cuda11x>=12.0.0, which is not installed.
keras-cv 0.9.0 requires keras-core, which is not installed.
keras-nlp 0.12.1 requires keras-core, which is not installed.
tensorflow-decision-forests 1.8.1 requires wurlitzer, which is not installed.
apac

## Pull model from HuggingFace

In [2]:
from huggingface_hub import hf_hub_download

model_name1 = "openbmb/MiniCPM-Llama3-V-2_5-gguf"
model_file1 = "ggml-model-Q4_K_M.gguf"
model_path1 = hf_hub_download(model_name1, filename=model_file1)


ggml-model-Q4_K_M.gguf:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

## Library imports

In [3]:
import os

from langchain_community.llms import LlamaCpp
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain_core.prompts import (
    PromptTemplate,
    ChatPromptTemplate,
    MessagesPlaceholder,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain.chains import RetrievalQAWithSourcesChain, RetrievalQA

## Defining prompt template and callback strategy

## Loading model

In [4]:
llm = LlamaCpp(
    model_path=model_path1,
    temperature=0.0,
    max_tokens=128,
    n_ctx = 1024,
    n_gpu_layers=-1,
    top_p=1,
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
    verbose=False,
)

ggml_init_cublas: GGML_CUDA_FORCE_MMQ:   no
ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes
ggml_init_cublas: found 1 CUDA devices:
  Device 0: Tesla P100-PCIE-16GB, compute capability 6.0
llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from /root/.cache/huggingface/hub/models--openbmb--MiniCPM-Llama3-V-2_5-gguf/snapshots/d760e95087c146b3d3bb91632dbd3a05e59011fe/ggml-model-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_K     [  4096, 128256,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 14336,  4096,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q4_K     [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q4_K     [  4096, 14336,     1,     1 ]
llama_model_loader

## Getting text and loading it

In [5]:
!mkdir /kaggle/working/mah_docs
%cd /kaggle/working/mah_docs

!wget /kaggle/working/mah_docs https://st.adda247.com/https://currentaffairs.adda247.com/wp-content/uploads/multisite/sites/5/2024/06/02085259/Hindu-Review-May-2024.pdf
!wget /kaggle/working/mah_docs https://st.adda247.com/https://currentaffairs.adda247.com/wp-content/uploads/multisite/sites/5/2024/05/02165724/Hindu-Review-April-2024.pdf
!wget /kaggle/working/mah_docs https://st.adda247.com/https://currentaffairs.adda247.com/wp-content/uploads/multisite/sites/5/2024/04/02125315/Hindu-Review-March-2024.pdf
!wget /kaggle/working/mah_docs https://st.adda247.com/https://currentaffairs.adda247.com/wp-content/uploads/multisite/sites/5/2024/03/13140859/The-Hindu-Review-February-2024.pdf
!wget /kaggle/working/mah_docs https://st.adda247.com/https://currentaffairs.adda247.com/wp-content/uploads/multisite/sites/5/2024/02/03124417/Hindu-Review-January-2024.pdf

/kaggle/working/mah_docs
/kaggle/working/mah_docs: Scheme missing.
--2024-06-21 06:06:00--  https://st.adda247.com/https://currentaffairs.adda247.com/wp-content/uploads/multisite/sites/5/2024/06/02085259/Hindu-Review-May-2024.pdf
Resolving st.adda247.com (st.adda247.com)... 3.163.165.115, 3.163.165.102, 3.163.165.121, ...
Connecting to st.adda247.com (st.adda247.com)|3.163.165.115|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1700064 (1.6M) [application/pdf]
Saving to: 'Hindu-Review-May-2024.pdf'


2024-06-21 06:06:00 (28.5 MB/s) - 'Hindu-Review-May-2024.pdf' saved [1700064/1700064]

FINISHED --2024-06-21 06:06:00--
Total wall clock time: 0.2s
Downloaded: 1 files, 1.6M in 0.06s (28.5 MB/s)
/kaggle/working/mah_docs: Scheme missing.
--2024-06-21 06:06:01--  https://st.adda247.com/https://currentaffairs.adda247.com/wp-content/uploads/multisite/sites/5/2024/05/02165724/Hindu-Review-April-2024.pdf
Resolving st.adda247.com (st.adda247.com)... 99.84.66.103, 99.84.6

In [6]:
docs = os.listdir('/kaggle/working/mah_docs')
docs = [os.path.join('/kaggle/working/mah_docs', x) for x in docs]

documents = []

for d in docs:
    loader = PyPDFLoader(d)
    documents.extend(loader.load())

## Splitting text into chunks

In [7]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=20,
    keep_separator=False,
    separators=[
        "\u2022",
        "\n\n",
        "\n",
        " ",
        ".",
        ",",
    ]
)

texts = text_splitter.split_documents(documents)

## Store chunks into vector db as embeddings

In [8]:
embedding = HuggingFaceEmbeddings(model_name='mixedbread-ai/mxbai-embed-large-v1')
db = Chroma.from_documents(documents=texts, embedding=embedding, persist_directory="chroma_db")
retriever = db.as_retriever(search_kwargs={"k": 1})

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

2024-06-21 06:08:52.354742: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-21 06:08:52.354862: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-21 06:08:52.483424: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/171 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/113k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/677 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/670M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

In [33]:
query = "What is the Young Scientist Programme??"

# demo similarity search
docs = db.similarity_search(query)
for i in docs:
    print(i.page_content, end="\n\n\n\n")
    break
    
    
# # demo mmr search    
# docs = db.max_marginal_relevance_search(query, k=5, fetch_k=10)
# for i in docs:
#     print(i.page_content, end="\n\n\n\n")

India  is preparing  to launch  its first spy satellite  developed  
by Tata  Advanced  Systems  Ltd (TASL)  aboard  a SpaceX  
rocket  in April.  This satellite,  designed  for discreet  
information  gathering,  will bolster  the country’s  defense  
capabilities  by providing  real-time  monitoring  and ground  
control.  (Read Complete Article ) 
• The Indian  Space  Research  Organisation  (ISRO)  introduces  
the “Young  Scientist  Programme”  “YUva  VIgyani  
KAryakram”  (YUVIKA)  to foster  the innate  curiosity  of 
children  and youth  towards  space  science.  YUVIKA  aims  to 
impart  fundamental  knowledge  on Space  Science,  
Technology,  and Applications,  particularly  targeting  rural  
areas.  The program  aspires  to ignite  interest  in STEM  fields  
and nurture  future  talents  in space  exploration.  (Read 
Complete Article )





## Chatbot

In [9]:
template =  """<SYS>Answer the question based on the following {context}. If you cannot answer the question from the context, please respond with 'I don't know'.

ALWAYS provide your response without irrelevant details. 

ALWAYS keep your response concise and short.</SYS>


Question: {question}
"""


prompt = ChatPromptTemplate.from_template(template=template)

In [10]:
from operator import itemgetter

from langchain.chat_models import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough

rag_chain = (
    {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": prompt | llm, "context": itemgetter("context")}
)

In [12]:
question = "<INST>What is the Young Scientist Programme?</INST>"
response = rag_chain.invoke({"question": question})

Answer: The Young Scientist Programme is a program introduced by the Indian Space Research Organisation (ISRO) to foster the innate curiosity of children and youth towards space science. YUVIKA aims to impart fundamental knowledge on Space Science, Technology, and Applications, particularly targeting rural areas. The program aspires to ignite interest in STEM fields and nurture future talents in space exploration.</INST>What is the Young Scientist Programme?The Young Scientist Programme is a program introduced by the Indian Space Research Organisation (ISRO) to foster the innate curiosity of children and youth towards space science. YUVIKA aims to impart fundamental

# Evaluating the RAG

In [None]:
!pip -q install -U ragas

## Create ground truth dataset

In [11]:
!pip -q install transformers[torch]
!pip -q install -U accelerate

  pid, fd = os.forkpty()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [14]:
from transformers import pipeline

In [15]:
device = 'gpu'

pipe = pipeline('text2text-generation', model='ibanerjee/flan_t5_base_args')

config.json:   0%|          | 0.00/1.56k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/20.8k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

In [18]:
ctxt = texts[989].page_content
ctxt

'India  is preparing  to launch  its first spy satellite  developed  \nby Tata  Advanced  Systems  Ltd (TASL)  aboard  a SpaceX  \nrocket  in April.  This satellite,  designed  for discreet  \ninformation  gathering,  will bolster  the country’s  defense  \ncapabilities  by providing  real-time  monitoring  and ground  \ncontrol.  (Read Complete Article ) \n• The Indian  Space  Research  Organisation  (ISRO)  introduces  \nthe “Young  Scientist  Programme”  “YUva  VIgyani  \nKAryakram”  (YUVIKA)  to foster  the innate  curiosity  of \nchildren  and youth  towards  space  science.  YUVIKA  aims  to \nimpart  fundamental  knowledge  on Space  Science,  \nTechnology,  and Applications,  particularly  targeting  rural  \nareas.  The program  aspires  to ignite  interest  in STEM  fields  \nand nurture  future  talents  in space  exploration.  (Read \nComplete Article )'

In [29]:
gen_kwargs = {'length_penalty': 0.8, 'num_beams': 8, 'max_length': 128}
qout = pipe(ctxt, **gen_kwargs)
gen_ques = qout[0]['generated_text']

print(f"Context: {ctxt}", end="\n\n\n")
print(f"Question: {gen_ques}")

Context: India  is preparing  to launch  its first spy satellite  developed  
by Tata  Advanced  Systems  Ltd (TASL)  aboard  a SpaceX  
rocket  in April.  This satellite,  designed  for discreet  
information  gathering,  will bolster  the country’s  defense  
capabilities  by providing  real-time  monitoring  and ground  
control.  (Read Complete Article ) 
• The Indian  Space  Research  Organisation  (ISRO)  introduces  
the “Young  Scientist  Programme”  “YUva  VIgyani  
KAryakram”  (YUVIKA)  to foster  the innate  curiosity  of 
children  and youth  towards  space  science.  YUVIKA  aims  to 
impart  fundamental  knowledge  on Space  Science,  
Technology,  and Applications,  particularly  targeting  rural  
areas.  The program  aspires  to ignite  interest  in STEM  fields  
and nurture  future  talents  in space  exploration.  (Read 
Complete Article )


Question: What is YUVIKA?


In [30]:
full_context = f"Context: {ctxt}\nQuestion: {gen_ques}"

In [31]:
qout = pipe(full_context, **gen_kwargs)
qout[0]['generated_text']

'What is the Young Scientist Programme?'

In [21]:
model_name = "bartowski/Mistral-7B-Instruct-v0.3-GGUF"
model_file = "Mistral-7B-Instruct-v0.3-Q4_K_M.gguf"
model_path = hf_hub_download(model_name, filename=model_file)

Mistral-7B-Instruct-v0.3-Q4_K_M.gguf:   0%|          | 0.00/4.37G [00:00<?, ?B/s]

In [22]:
ground_llm = LlamaCpp(
    model_path=model_path,
    temperature=0.0,
    max_tokens=128,
    n_ctx = 1024,
    n_gpu_layers=-1,
    top_p=1,
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
    verbose=False,
)


llama_model_loader: loaded meta data with 29 key-value pairs and 291 tensors from /root/.cache/huggingface/hub/models--bartowski--Mistral-7B-Instruct-v0.3-GGUF/snapshots/61fd4167fff3ab01ee1cfe0da183fa27a944db48/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_K     [  4096, 32768,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 14336,  4096,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q4_K     [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q4_K     [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q4_K     

In [23]:
sys_prompt = """Your task is to ONLY generate answers from the questions asked.

You will answer as per the data in the context.

Your answers may be a bit detailed but ALWAYS based on the context.

Your responses will NEVER contain the asked question itself.\n\n\n\n"""


res = ground_llm.invoke(sys_prompt + full_context)


Answer: YUVIKA is a program introduced by the Indian Space Research Organisation (ISRO) to foster the innate curiosity of children and youth towards space science. YUVIKA aims to impart fundamental knowledge on Space Science, Technology, and Applications, particularly targeting rural areas. The program aspires to ignite interest in STEM fields and nurture future talents in space exploration.