In [None]:
!pip install chromadb
!pip install bitsandbytes
!pip install accelerate
!pip install transformers
!pip install langchain
!pip install unstructured
!pip install sentence_transformers

In [None]:
import torch
from transformers import BitsAndBytesConfig, AutoTokenizer, AutoModelForCausalLM

def load_model(model_id: str) -> dict:
    try:
        # Define configuration for BitsAndBytes model
        nf4_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_use_double_quant=True,
            bnb_4bit_compute_dtype=torch.bfloat16
        )

        # Load tokenizer and model
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=torch.bfloat16,
            device_map="auto",
            quantization_config=nf4_config,
            token="Your_HF_Token",
            low_cpu_mem_usage=True
        )
        tokenizer = AutoTokenizer.from_pretrained(model_id, token="Your_HF_Token")

        return {"model": model, "tokenizer": tokenizer}
    except Exception as e:
        print(f"Error loading model: {e}")
        return None



In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

def load_huggingface_embeddings(model_name: str, model_kwargs: dict) -> HuggingFaceEmbeddings:
    """
    Loads Hugging Face embeddings model.

    Args:
        model_name (str): Name of the Hugging Face model.
        model_kwargs (dict): Keyword arguments to pass to the Hugging Face model.

    Returns:
        HuggingFaceEmbeddings: Hugging Face embeddings model.
    """
    try:
        # Instantiate HuggingFaceEmbeddings
        embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)
        return embeddings
    except Exception as e:
        print(f"Error loading Hugging Face embeddings model: {e}")
        return None

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.vectorstores.chroma import Chroma
import os
from langchain_community.document_loaders import PyPDFLoader



def process_documents_and_query(DATA_PATH: str, embeddings, CHROMA_PATH: str) -> str:
    """
    Process documents from a given directory, perform a similarity search based on the query text,
    and return the concatenated context text if matching results are found.

    Args:
        DATA_PATH (str): Path to the directory containing text documents.
        CHROMA_PATH (str): Path to the directory where the Chroma database will be stored.
        query_text (str): Text to be used for similarity search.
        k (int, optional): Number of top results to retrieve. Defaults to 3.

    Returns:
        str: Concatenated context text if matching results are found, otherwise an empty string.
    """
    loader = DirectoryLoader(DATA_PATH, glob="*.txt")
    documents = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=300,
        chunk_overlap=100,
       length_function=len,
        add_start_index=True,
    )
    chunks = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(chunks)} chunks.")
    document = chunks[0]
    print(document.page_content)
    print(document.metadata)
    db = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory="chroma_db")

    return db


def find_similarity(db, query_text, k: int = 3):
  results = db.similarity_search_with_relevance_scores(query_text, k=k)
  if len(results) == 0 or results[0][1] < 0.7:
        print(f"Unable to find matching results.")
        context_text = ""
  else:
        context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])

  return context_text



In [None]:
from langchain.prompts import ChatPromptTemplate

def generate_response_with_context(context_text: str, query_text: str, model, tokenizer):
    """
    Generate a response to a query based on the provided context using a language model.

    Args:
        context_text (str): The context text used to generate the response.
        query_text (str): The query text for which a response is generated.
        model: The language model used for response generation.
        tokenizer: The tokenizer associated with the language model.

    Returns:
        str: The generated response.
    """
    PROMPT_TEMPLATE = """
    "You are youth from Dominican Republic."
    Answer the question based only on the following context:
    If the context is blank string, then just use your own knowledge

    {context}

    ---
    """

    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text)
    print(prompt)

    messages = [
        {"role": "system", "content": prompt},
        {"role": "user", "content": query_text},
    ]

    input_ids = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)


    terminators = [
        tokenizer.eos_token_id,
        tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]

    outputs = model.generate(
        input_ids,
        max_new_tokens=256,
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
    )
    response = outputs[0][input_ids.shape[-1]:]

    return tokenizer.decode(response, skip_special_tokens=True)




In [None]:
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
loaded_model = load_model(model_id)
if loaded_model is None:
    print("Failed to load model.")


In [None]:
!pip install chromadb

In [None]:
import time
start_time = time.time()
import numpy as np
model = loaded_model["model"]
tokenizer = loaded_model["tokenizer"]
print("Model loaded successfully!")

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}
embeddings = load_huggingface_embeddings(model_name, model_kwargs)

In [None]:
DATA_PATH = "/content/data"
CHROMA_PATH = "chroma"
db = process_documents_and_query(DATA_PATH, embeddings, CHROMA_PATH)

In [None]:
query_text = "Who is the current president of the Dominican Republic?"
context_text = find_similarity(db, query_text)
#print("Context Text:", context_text)
response = generate_response_with_context(context_text, query_text, model, tokenizer)


In [None]:
print("Question: ", query_text)
print("Answer: ", response)
end_time = time.time()

###The End
