# Hindi and Tamil Question Answer / RAG

In this notebook, we use new Navrasa LLMs from TeluguLLM to create a Hindi and Tamil Question Answering system

In [1]:
!pip install -U fastembed dataset qdrant-client peft transformers accelerate bitsandbytes -qq

In [2]:
from datasets import load_dataset
from fastembed import TextEmbedding
from qdrant_client import QdrantClient
from qdrant_client.http.models import PointStruct
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
from typing import List
import numpy as np



## Setting Up

We'll download the dataset, our LLM model weights and embedding model weights next

In [3]:
ds = load_dataset("nirantk/chaii-hindi-and-tamil-question-answering", split="train")

In [4]:
embedding_model = TextEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")

Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/650 [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

model.onnx:   0%|          | 0.00/90.4M [00:00<?, ?B/s]

In [5]:
model = AutoPeftModelForCausalLM.from_pretrained(
    "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa",
    load_in_4bit = False,
    token="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"
)
tokenizer = AutoTokenizer.from_pretrained("Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


In [6]:
questions, contexts = list(ds['question']), list(ds['context'])

In [7]:
context_embeddings: List[np.ndarray] = list(embedding_model.embed(contexts)) # Note the list() call - this is a generator

In [8]:
context_points = [PointStruct(
    id=idx,
    vector=emb,
    payload={"text": text}
) for idx, (emb, text) in enumerate(zip(context_embeddings, contexts))]

In [None]:
input_prompt = """
Answer the following question based on the context given after it in the same language as the question: 
### Question:
{}

### Context:
{}

### Answer:
{}"""

input_text = input_prompt.format(
        questions[100], # question
        contexts[100], # context
        "", # output - leave this blank for generation!
    )

inputs = tokenizer([input_text], return_tensors = "pt")

outputs = model.generate(**inputs, max_new_tokens = 300, use_cache = True)
response = tokenizer.batch_decode(outputs)[0]

In [None]:
response