# Configuration


In [8]:
import os
from dotenv import load_dotenv
load_dotenv()
hf_token = os.getenv("HF_TOKEN")

In [None]:
from huggingface_hub import login
login(token=hf_token)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


: 

In [3]:
import logging
import warnings

import os
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    pipeline,
    GenerationConfig,
)
import traceback
from langchain_core.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.schema import Document
from rich import print as rprint
from rich.panel import Panel
from tqdm import tqdm
import warnings
import re


CACHE_DIR = "./models"

warnings.filterwarnings("ignore")
# Default constants for the script
show_context = True
chat_model = "asif00/bangla-llama-1B"
embed_model = "l3cube-pune/bengali-sentence-similarity-sbert"
text_path = "test.txt"
k = 4
top_k = 2
top_p = 0.6
temperature = 0.6
chunk_size = 500
chunk_overlap = 150
max_new_tokens = 256


# Experiment with Asif Bangla Llama


In [11]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load tokenizer and model (after conversion)
tokenizer = AutoTokenizer.from_pretrained("asif00/bangla-llama-1B")
# model = AutoModelForCausalLM.from_pretrained("path/to/converted-model")
model = AutoModelForCausalLM.from_pretrained("asif00/bangla-llama-1B", torch_dtype=torch.float16, device_map="auto")

prompt = """
নিচের নির্দেশনা বাংলা ভাষায় যা একটি কাজ বর্ণনা করে, এবং ইনপুটও বাংলা ভাষায় যা অতিরিক্ত প্রসঙ্গ প্রদান করে। উপযুক্তভাবে অনুরোধ পূরণ করে বাংলা ভাষায় একটি প্রতিক্রিয়া লিখুন।

### নির্দেশনা:
{}

### ইনপুট:
{}

### প্রতিক্রিয়া:
"""

def generate_response(instruction, context):
    input_text = prompt.format(instruction, context)
    inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
    output = model.generate(**inputs, max_length=512, eos_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    response = response.split("### প্রতিক্রিয়া:")[-1].strip()
    return response

# Example
instruction = "ভারতীয় বাঙালি কথাসাহিত্যিক মহাশ্বেতা দেবীর সম্পর্কে একটি সংক্ষিপ্ত বিবরণ দিন।"
context = "মহাশ্বেতা দেবী ২০১৬ সালে হৃদরোগে আক্রান্ত হয়ে কলকাতায় মৃত্যুবরণ করেন।"
print(generate_response(instruction, context))

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
2025-05-11 00:43:08,242 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


মহাশ্বেতা দেবী একজন বিখ্যাত বাংলা কথাসাহিত্যিক মহাশ্বেতা হিসে


# RAG Pipeline


In [4]:
class BanglaRAGChain:
    """
    Bangla Retrieval-Augmented Generation (RAG) Chain for question answering.

    This class uses a HuggingFace/local language model for text generation, a Chroma vector database for
    document retrieval, and a custom prompt template to create a RAG chain that can generate
    responses to user queries in Bengali.
    """

    def __init__(self):
        """Initializes the BanglaRAGChain with default parameters."""
        self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.chat_model_id = None
        self.embed_model_id = None
        self.k = 4
        self.max_new_tokens = 1024
        self.chunk_size = 500
        self.chunk_overlap = 150
        self.text_path = ""
        self.temperature = 0.9
        self.top_p = 0.6
        self.top_k = 50
        self._text_content = None
        self.hf_token = None

        self.tokenizer = None
        self.chat_model = None
        self._llm = None
        self._retriever = None
        self._db = None
        self._documents = []
        self._chain = None

    def load(
        self,
        chat_model_id,
        embed_model_id,
        text_path,
        k=4,
        top_k=2,
        top_p=0.6,
        max_new_tokens=1024,
        temperature=0.6,
        chunk_size=500,
        chunk_overlap=150,
        hf_token=None,
    ):
        """
        Loads the required models and data for the RAG chain.

        Args:
            chat_model_id (str): The Hugging Face model ID for the chat model.
            embed_model_id (str): The Hugging Face model ID for the embedding model.
            text_path (str): Path to the text file to be indexed.
            k (int): The number of documents to retrieve.
            top_k (int): The top_k parameter for the generation configuration.
            top_p (float): The top_p parameter for the generation configuration.
            max_new_tokens (int): The maximum number of new tokens to generate.
            temperature (float): The temperature parameter for the generation configuration.
            chunk_size (int): The chunk size for text splitting.
            chunk_overlap (int): The chunk overlap for text splitting.
            hf_token (str): The Hugging Face token for authentication.
        """
        self.chat_model_id = chat_model_id
        self.embed_model_id = embed_model_id
        self.k = k
        self.top_k = top_k
        self.top_p = top_p
        self.temperature = temperature
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap
        self.text_path = text_path
        self.max_new_tokens = max_new_tokens
        self.hf_token = hf_token

        if self.hf_token is not None:
            os.environ["HF_TOKEN"] = str(self.hf_token)

        rprint(Panel("[bold green]Loading chat models...", expand=False))
        self._load_models()

        rprint(Panel("[bold green]Creating document...", expand=False))
        self._create_document()

        rprint(Panel("[bold green]Updating Chroma database...", expand=False))
        self._update_chroma_db()

        rprint(Panel("[bold green]Initializing retriever...", expand=False))
        self._get_retriever()

        rprint(Panel("[bold green]Initializing LLM...", expand=False))
        self._get_llm()
        rprint(Panel("[bold green]Creating chain...", expand=False))
        self._create_chain()

    def _load_models(self):
        """Loads the chat model and tokenizer."""
        try:
            self.tokenizer = AutoTokenizer.from_pretrained(self.chat_model_id)
            self.chat_model = AutoModelForCausalLM.from_pretrained(
                    self.chat_model_id,
                    device_map="auto",
                    torch_dtype=torch.float16,
                    cache_dir=CACHE_DIR
                )
        except Exception as e:
            rprint(Panel(f"[red]Error loading chat model: {e}", expand=False))

    def _create_document(self):
        """Splits the input text into chunks using RecursiveCharacterTextSplitter."""
        try:
            with open(self.text_path, "r", encoding="utf-8") as file:
                self._text_content = file.read()
            character_splitter = RecursiveCharacterTextSplitter(
                separators=["!", "?", "।"],
                chunk_size=self.chunk_size,
                chunk_overlap=self.chunk_overlap,
            )
            self._documents = list(
                tqdm(
                    character_splitter.split_text(self._text_content),
                    desc="Chunking text",
                )
            )
            print(f"Number of chunks: {len(self._documents)}")
            rprint(Panel("[bold green]Document created successfully!", expand=False))
        except Exception as e:
            rprint(Panel(f"[red]Chunking failed: {e}", expand=False))

    def _update_chroma_db(self):
        """Updates the Chroma vector database with the text chunks."""
        try:
            rprint(Panel("[bold green]Loading embedding model...", expand=False))
            embeddings = HuggingFaceEmbeddings(
                model_name=self.embed_model_id,
                model_kwargs={"device": self._device},
            )
            rprint(Panel("[bold green]Embedding model loaded.", expand=False))
            docs = [Document(page_content=chunk) for chunk in self._documents]
            self._db = Chroma.from_documents(
                collection_name='porag_rag_collection',
                persist_directory=CACHE_DIR,
                documents=docs,
                embedding=embeddings
            )
            self._db.persist()  
            rprint(Panel("[bold green]Chroma DB created and persisted!", expand=False))
        except Exception as e:
            print(e)
            rprint(Panel(f"[red]Vector DB initialization failed: {e}", expand=False))
            traceback.print_exc()


    def _create_chain(self):
        """Creates the retrieval-augmented generation (RAG) chain."""
        template = """Below is an instruction in Bengali language that describes a task, paired with an input also in Bengali language that provides further context. Write a response in Bengali that appropriately completes the request.

        ### Instruction:
        {question}

        ### Input:
        {context}

        ### Response:
        """
        prompt_template = ChatPromptTemplate(
            input_variables=["question", "context"],
            output_parser=None,
            partial_variables={},
            messages=[
                HumanMessagePromptTemplate(
                    prompt=PromptTemplate(
                        input_variables=["question", "context"],
                        output_parser=None,
                        partial_variables={},
                        template=template,
                        template_format="f-string",
                        validate_template=True,
                    ),
                    additional_kwargs={},
                )
            ],
        )

        try:
            rag_chain_from_docs = (
                RunnablePassthrough.assign(
                    context=lambda x: self._format_docs(x["context"])
                )
                | prompt_template
                | self._llm
                | StrOutputParser()
            )

            rag_chain_with_source = RunnableParallel(
                {"context": self._retriever, "question": RunnablePassthrough()}
            ).assign(answer=rag_chain_from_docs)

            self._chain = rag_chain_with_source
            rprint(Panel("[bold green]RAG chain created successfully!", expand=False))
        except Exception as e:
            rprint(Panel(f"[red]RAG chain initialization failed: {e}", expand=False))

    def _get_llm(self):
        """Initializes the language model for the generation."""
        try:
            config = GenerationConfig(
                do_sample=True,
                temperature=self.temperature,
                max_new_tokens=self.max_new_tokens,
                top_p=self.top_p,
                top_k=self.top_k,
                eos_token_id=self.tokenizer.eos_token_id,
                pad_token_id=self.tokenizer.eos_token_id,
                bos_token_id=self.tokenizer.bos_token_id, 
            )
            pipe = pipeline(
                "text-generation",
                model=self.chat_model,
                tokenizer=self.tokenizer,
                torch_dtype=torch.float16,
                device_map="auto",
                generation_config=config,
            )
            self._llm = HuggingFacePipeline(pipeline=pipe)
            rprint(Panel("[bold green]LLM initialized successfully!", expand=False))
        except Exception as e:
            rprint(Panel(f"[red]LLM initialization failed: {e}", expand=False))

    def _get_retriever(self):
        """Initializes the retriever for document retrieval."""
        try:
            self._retriever = self._db.as_retriever(
                search_type="similarity", search_kwargs={"k": self.k}
            )
            rprint(
                Panel("[bold green]Retriever initialized successfully!", expand=False)
            )
        except Exception as e:
            rprint(Panel(f"[red]Retriever initialization failed: {e}", expand=False))

    def _format_docs(self, docs):
        """Formats the retrieved documents into a single string."""
        return "\n\n".join(doc.page_content for doc in docs)

    def _clean_up(self, messages):
        messages = re.sub("[^A-Za-z]+", "", messages)
        return messages

    def get_response(self, query):
        """
        Generates a response to the query using the RAG chain.

        Args:
            query (str): The input query.

        Returns:
            tuple: A tuple containing the generated response and the retrieved context.
        """
        try:
            response = self._chain.invoke(query)
            response_start = response["answer"].find("### Response:") + len(
                "### Response:"
            )
            final_answer = response["answer"][response_start:].strip()
            if self._clean_up(final_answer):
                self.get_response(query)

            return final_answer, self._format_docs(response["context"])
        except Exception as e:
            rprint(Panel(f"[red]Answer generation failed: {e}", expand=False))
            return None, None


In [5]:
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)

try:
    rag_chain = BanglaRAGChain()
    rag_chain.load(
        chat_model_id=chat_model,
        embed_model_id=embed_model,
        text_path=text_path,
        k=k,
        top_k=top_k,
        top_p=top_p,
        temperature=temperature,
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        hf_token=hf_token,
        max_new_tokens=max_new_tokens
    )
    logging.info(
        f"RAG model loaded successfully: chat_model={chat_model}, embed_model={embed_model}"
    )

except Exception as e:
    logging.critical(f"Fatal error: {e}")
    print("Error occurred, please check logs for details.")

2025-05-11 05:24:58,804 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


Chunking text: 100%|██████████| 7/7 [00:00<00:00, 66576.25it/s]

Number of chunks: 7





2025-05-11 05:25:06,873 - INFO - Load pretrained SentenceTransformer: l3cube-pune/bengali-sentence-similarity-sbert


2025-05-11 05:25:15,841 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


Device set to use cuda:0


2025-05-11 05:25:21,581 - INFO - RAG model loaded successfully: chat_model=asif00/bangla-llama-1B, embed_model=l3cube-pune/bengali-sentence-similarity-sbert


In [6]:
def prompt(query: str):
    try:
        answer, context = rag_chain.get_response(query)
        if show_context:
            print(f"প্রসঙ্গঃ {context}\n------------------------\n")
        print(f"উত্তর: {answer}")
    except Exception as e:
        logging.error(f"Couldn't generate an answer: {e}")
        print("আবার চেষ্টা করুন!")
    except Exception as e:
        logging.critical(f"Fatal error: {e}")
        print("Error occurred, please check logs for details.")

In [7]:
prompt("রবীন্দ্রনাথ ঠাকুরের জন্মস্থান কোথায়?")

প্রসঙ্গঃ ।

শৈশব ও শিক্ষা

রবীন্দ্রনাথ ঠাকুর কলকাতার জোড়াসাঁকোর ঠাকুরবাড়িতে জন্মগ্রহণ করেন। তাঁর পিতা মহর্ষি দেবেন্দ্রনাথ ঠাকুর এবং মাতা সারদাসুন্দরী দেবী। তিনি বাড়িতেই প্রাথমিক শিক্ষা গ্রহণ করেন। বাড়িতে সংস্কৃত, ইংরেজি ও বাংলা ভাষায় শিক্ষা লাভ করেন। পরে ইংল্যান্ডে পাঠানো হয়, কিন্তু সেখান থেকে পুরো শিক্ষা সম্পূর্ণ না করে দেশে ফিরে আসেন।

সাহিত্যিক কর্ম

রবীন্দ্রনাথ ঠাকুরের সাহিত্যকর্মের পরিধি অত্যন্ত ব্যাপক। তিনি ৫২টি কাব্যগ্রন্থ, ৩৮টি নাটক, ১৩টি উপন্যাস এবং ৯৫টি ছোটগল্প লিখেছেন

রবীন্দ্রনাথ ঠাকুরের জীবনী

রবীন্দ্রনাথ ঠাকুর (৭ মে ১৮৬১ - ৭ আগস্ট ১৯৪১) ছিলেন একজন বাঙালি কবি, সাহিত্যিক, নাট্যকার, সংগীতজ্ঞ, চিত্রশিল্পী এবং দার্শনিক। তিনি ভারতীয় সাহিত্য ও সঙ্গীতের প্রবাদপ্রতিম ব্যক্তিত্ব। ঠাকুর পরিবারের সমৃদ্ধ সাংস্কৃতিক পরিবেশে জন্ম নেওয়া রবীন্দ্রনাথ শৈশব থেকেই কবিতা ও গল্প লেখা শুরু করেন। তাঁর সাহিত্যকর্ম বাংলা সাহিত্যে বৈপ্লবিক পরিবর্তন আনে এবং সারা বিশ্বে সমাদৃত হয়।

শৈশব ও শিক্ষা

রবীন্দ্রনাথ ঠাকুর কলকাতার জোড়াসাঁকোর ঠাকুরবাড়িতে জন্মগ্রহণ করেন

।

মৃত্যু ও উত্তরাধিকার

রবীন্