## Langchain using Huggingface models - RAG

### Import packages

In [1]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter  
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.vectorstores import Weaviate
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.agents import AgentType, initialize_agent
from langchain.schema import SystemMessage

from langchain import HuggingFacePipeline

from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline, BitsAndBytesConfig, AutoModelForSeq2SeqLM
import torch

from typing import List, Optional, Union
import weaviate
from weaviate.embedded import EmbeddedOptions
import weaviate
from weaviate.embedded import EmbeddedOptions

  from .autonotebook import tqdm as notebook_tqdm
2024-02-03 12:21:19.520563: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-03 12:21:19.520603: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-03 12:21:19.521487: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-03 12:21:19.527050: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  r

In [2]:
model_name = "iocuydi/llama-2-amharic-3784m"
model = AutoModelForCausalLM.from_pretrained(model_name)

OSError: iocuydi/llama-2-amharic-3784m does not appear to have a file named pytorch_model.bin, tf_model.h5, model.ckpt or flax_model.msgpack.

### Add the embedding

In [3]:

embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
# embedding_model_name = "sentence-transformers/clip-ViT-B-32"
# embedding_model_name = "sentence-transformers/average_word_embeddings_komninos"
# embedding_model_name = "iocuydi/llama-2-amharic-3784m"

model_kwargs = {}
embeddings = HuggingFaceEmbeddings(
  model_name=embedding_model_name, 
  model_kwargs=model_kwargs
)

In [4]:
def data_loader(file_path: str, chunk_size: int = 500, chunk_overlap: int = 50) -> Union[List[str], None]:
    """
    Load data from a file, split it into chunks, and return the chunks.

    Parameters:
    - file_path (str): The path to the file containing the data.
    - chunk_size (int): The size of each data chunk. Default is 500.
    - database (int): The overlap between consecutive chunks. Default is 50.

    Returns:
    - list: A list of data chunks.
    """
    try:
        loader = TextLoader(file_path)
        documents = loader.load()

        # Chunk the data
        text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
        chunks = text_splitter.split_documents(documents)
        
        print("Data loaded to vector database successfully")
        return chunks
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None 
    
    
def create_retriever(chunks):
   try:
    #    Setup vector database
       client = weaviate.Client(embedded_options=EmbeddedOptions())

       # Populate vector database using embeddings from the Hugging Face model
       vectorstore = Weaviate.from_documents(
           client=client,
           documents=chunks,
           embedding=embeddings,  # Use the model's encode function for embeddings
           by_text=False
       )

       # Define vectorstore as retriever to enable semantic search
       retriever = vectorstore.as_retriever()
       print("Retriever created successfully.")

       return retriever

   except Exception as e:
       print(f"An unexpected error occurred: {e}")
       return None

### Load the data and create chunks

In [5]:
chuncks = data_loader("../prompts/context.txt")
len(chuncks)

Data loaded to vector database successfully


9

### Create the retriver

In [6]:
retriever = create_retriever(chuncks)

            Consider upgrading to the new and improved v4 client instead!
            See here for usage: https://weaviate.io/developers/weaviate/client-libraries/python
            
{"action":"startup","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2024-02-03T12:21:43+03:00"}
{"action":"startup","auto_schema_enabled":true,"level":"info","msg":"auto schema enabled setting is set to \"true\"","time":"2024-02-03T12:21:43+03:00"}
{"level":"info","msg":"No resource limits set, weaviate will use all available memory and CPU. To limit resources, set LIMIT_RESOURCES=true","time":"2024-02-03T12:21:44+03:00"}


Started /home/babi/.cache/weaviate-embedded: process ID 218546


{"action":"grpc_startup","level":"info","msg":"grpc server listening at [::]:50060","time":"2024-02-03T12:21:44+03:00"}
{"action":"restapi_management","level":"info","msg":"Serving weaviate at http://127.0.0.1:8079","time":"2024-02-03T12:21:44+03:00"}
{"level":"info","msg":"Completed loading shard langchain_0a43cce8a9f54813a3ad4f3d498d6434_NISdUtmHet4a in 12.697056ms","time":"2024-02-03T12:21:45+03:00"}
{"action":"hnsw_vector_cache_prefill","count":3000,"index_id":"main","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2024-02-03T12:21:45+03:00","took":106241}
{"level":"info","msg":"Completed loading shard langchain_0312ef7773c64bd5a9d3d2ba96f090fb_0TLKGeLxTnEG in 52.510427ms","time":"2024-02-03T12:21:45+03:00"}
{"action":"hnsw_vector_cache_prefill","count":3000,"index_id":"main","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2024-02-03T12:21:45+03:00","took":116783}
{"level":"info","msg":"Completed loading shard langchain_133fb57

Retriever created successfully.


In [None]:
retriever.add_documents()

In [7]:
type(retriever)

langchain.vectorstores.base.VectorStoreRetriever

In [8]:
retriever.get_relevant_documents("ኢትዮጵያ")

[Document(page_content='በ1500 ሜትር ሴቶች ገንዘቤ ዲባባ ወደ ፍፃሜ ያለፈች ብቸኛ የኢትዮጵያ ተወካይ ናት።\nየሴቶች የ10 ሺ ሜትር የፍፃሜ ውድድር ተጀመረ። መልካም እድል ለኢትዮጵያዊያን አትሌቶች !\nአልማዝ አያና ውድድሩ ገና 5 ሺ ሜትር እየቀረው ወደፊት ወጥታለች። አልማዝ አልማዝዬ!\nይህ ገና ውድድሩ ሳይጠናቀቅ መሆኑ ነው አልማዝ አያና ለኢትዮጵያ የመጀመሪያውን የወርቅ ሜዳልያ አስገኝታለች እንኳን ደስ አላችሁ !\nአልማዜ ስንቱን ደርበሽ ትችይዋለሽ\nወርቅቅቅቅቅቅቅቅቅቅቅቅ ለኢትዮጵያ !\n1ኛ : አልማዝ አያና 2ኛ : ጥሩነሽ ዲባባ እንኳን ደስ አላችሁ !\n: በትዊተር ገፁ 30:1633\nአልማዛችን !', metadata={'source': '../prompts/context.txt'}),
 Document(page_content='ታሪክ የሌለው ህዝብ ታሪካዊ ስራ ለመስራት አይጓጓም። ነፃነትም የማያውቅ\nህዝብ ነፃነቱን ለማግኘት አይደክምም። ኢትዮጵያ የረጅም ዘመን ታሪክ ያላት ነፃ \nሀገር ስለሆነች ልጆቿ ታሪኳን አውቀው የነፃነቷን ጉልላት ጠብቀው/ ከተፈፀሙ ስህተቶች \nተምረው እንዲኖሩ ግዴታቸው ነው። አንድ ወጣት የሀገሩን ታሪክ ሳያውቅና ሳያጠናቅቅ\nየአውሮጳንና የአሜሪካንን ታሪክ ለማሳመር ቢሞክር << በቅሎ አባትህ ማነው? ቢሉት\n\nፈረስ አጎቴ ነው>> ይላል እንደሚባለው የሚያሳፍር ይሆናል። በሀገራችን ተስፋ አንቆርጥም!\nኢትዮጵያ ኣደይ !\nአደራ ልጄ ገንዘብ ውርሴን አምጪ አትበይኝ አደራ መኪና ቪላ ፎቅ አትበይኝ\nአደራ ባቡር አውሮፕላን አትበይኝ አደራ እናትሽ ድሀ ነኝ ሀገር ተቀበይኝ ሰንደ\nቅ እንዳበራ ደስ አሰኝኝ ልጄ እኔም ባንቺ ልኩራ። ገጣሚ : ሰላማዊት አበባየሁ\nኢትዮጵያ ሀገሬ መመኪያ ነሽ ክብሬ !', metadata={'sourc

### Preparing the LLM Model

In [9]:
from huggingface_hub import login

login("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")

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/babi/.cache/huggingface/token
Login successful


In [26]:
# device_map = {
#     "transformer.word_embeddings": 0,
#     "transformer.word_embeddings_layernorm": 0,
#     "lm_head": "cpu",
#     "transformer.h": 0,
#     "transformer.ln_f": 0,
# }


def load_model(model_name, bnb_config):
    n_gpus = torch.cuda.device_count()
    max_memory = f'{23000}MB'

#method from the Hugging Face Transformers library to load a pre-trained language model
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto", # dispatch efficiently the model on the available ressources
        max_memory = {i: max_memory for i in range(n_gpus)},
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)

    # Needed for LLaMA tokenizer
    tokenizer.pad_token = tokenizer.eos_token

    return model, tokenizer
    
    
    initialize_agent
''' This function, create_bnb_config(), is designed to create and return a
configuration object for quantization using the Bits and Bytes (BNB)
quantization scheme. '''
def create_bnb_config():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        load_in_8bit_fp32_cpu_offload=True,
        llm_int8_enable_fp32_cpu_offload=True

    )

    return bnb_config

In [27]:
model_name = "meta-llama/Llama-2-7b-hf"

bnb_config = create_bnb_config()
model, tokenizer2 = load_model(model_name, bnb_config)

ValueError: 
                        Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit
                        the quantized model. If you want to dispatch the model on the CPU or the disk while keeping
                        these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom
                        `device_map` to `from_pretrained`. Check
                        https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu
                        for more details.
                        

### Create pipeline

In [None]:
pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer2,
    use_cache=True,
    device_map="auto",
    max_length=2048,
    do_sample=True,
    top_k=5,
    num_return_sequences=1,
    eos_token_id=tokenizer2.eos_token_id,
    pad_token_id=tokenizer2.eos_token_id,
)

# specify the llm
llm = HuggingFacePipeline(pipeline=pipeline)

### Create langchain executor agent

In [None]:
from langchain.agents import tool

@tool
def get_link_data(link: str) -> List:
    """"""
    return None


def get_agent_executor():
    with open("../prompts/system_message.txt", "r") as file:
        system_message = file.read()

    agent_kwargs = {
    "system_message": SystemMessage(content=system_message),
    "retriever": retriever  # Pass the retriever to the agent
    }

    llm_agent = initialize_agent(
        llm=llm,
        # agent=AgentType.OPENAI_FUNCTIONS,
        tools=[get_link_data],
        agent_kwargs=agent_kwargs,
        verbose=True,
        max_iterations=20,
        early_stopping_method='generate'
    )

    return llm_agent


In [None]:
llm_agent = get_agent_executor()

In [None]:
llm_agent.run("ገንዘቤ ዲባባ ማን  ናት?")