## Langchain using Huggingface models - RAG

### Import packages

In [14]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter  
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.vectorstores import Weaviate
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain import HuggingFacePipeline
from langchain.agents import AgentType, initialize_agent
from langchain.schema import SystemMessage

from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline, BitsAndBytesConfig, AutoModelForSeq2SeqLM
import torch

from typing import List, Optional, Union
import weaviate
from weaviate.embedded import EmbeddedOptions
import weaviate
from weaviate.embedded import EmbeddedOptions

In [17]:
model_name = "iocuydi/llama-2-amharic-3784m"
model = AutoModelForCausalLM.from_pretrained(model_name)

OSError: iocuydi/llama-2-amharic-3784m does not appear to have a file named pytorch_model.bin, tf_model.h5, model.ckpt or flax_model.msgpack.

### Add the embedding

In [8]:

# embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
# embedding_model_name = "sentence-transformers/clip-ViT-B-32"
embedding_model_name = "sentence-transformers/average_word_embeddings_komninos"
# embedding_model_name = "iocuydi/llama-2-amharic-3784m"

model_kwargs = {}
embeddings = HuggingFaceEmbeddings(
  model_name=embedding_model_name, 
  model_kwargs=model_kwargs
)

.gitattributes: 100%|██████████| 690/690 [00:00<00:00, 1.03MB/s]
pytorch_model.bin: 100%|██████████| 267M/267M [00:21<00:00, 12.4MB/s] 
(…)beddings/whitespacetokenizer_config.json: 100%|██████████| 2.59M/2.59M [00:01<00:00, 2.09MB/s]
(…)WordEmbeddings/wordembedding_config.json: 100%|██████████| 164/164 [00:00<00:00, 921kB/s]
1_Pooling/config.json: 100%|██████████| 190/190 [00:00<00:00, 862kB/s]
README.md: 100%|██████████| 2.13k/2.13k [00:00<00:00, 10.5MB/s]
config_sentence_transformers.json: 100%|██████████| 122/122 [00:00<00:00, 628kB/s]
modules.json: 100%|██████████| 248/248 [00:00<00:00, 498kB/s]


In [1]:
from langchain.schema.embeddings import Embeddings


class CustomLlamaEmbeddings(Embeddings):
    def __init__(self, model_name, model_kwargs):
        super().__init__()
        self.model_name = model_name
        self.model_kwargs = model_kwargs
        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name, **model_kwargs)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)

    def _compute_embeddings(self, input_ids):
        outputs = self.model(input_ids=input_ids)
        embeddings = outputs.last_hidden_state[:, 0, :].mean(dim=0)  # Mean pooling
        return embeddings

# Usage:
embeddings = CustomLlamaEmbeddings()

TypeError: Can't instantiate abstract class CustomLlamaEmbeddings with abstract methods embed_documents, embed_query

In [9]:
def data_loader(file_path: str, chunk_size: int = 500, chunk_overlap: int = 50) -> Union[List[str], None]:
    """
    Load data from a file, split it into chunks, and return the chunks.

    Parameters:
    - file_path (str): The path to the file containing the data.
    - chunk_size (int): The size of each data chunk. Default is 500.
    - database (int): The overlap between consecutive chunks. Default is 50.

    Returns:
    - list: A list of data chunks.
    """
    try:
        loader = TextLoader(file_path)
        documents = loader.load()

        # Chunk the data
        text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
        chunks = text_splitter.split_documents(documents)
        
        print("Data loaded to vector database successfully")
        return chunks
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None 
    
    
def create_retriever(chunks):
   try:
    #    Setup vector database
       client = weaviate.Client(embedded_options=EmbeddedOptions())

       # Populate vector database using embeddings from the Hugging Face model
       vectorstore = Weaviate.from_documents(
           client=client,
           documents=chunks,
           embedding=embeddings,  # Use the model's encode function for embeddings
           by_text=False
       )

       # Define vectorstore as retriever to enable semantic search
       retriever = vectorstore.as_retriever()
       print("Retriever created successfully.")

       return retriever

   except Exception as e:
       print(f"An unexpected error occurred: {e}")
       return None

### Load the data and create chunks

In [10]:
chuncks = data_loader("../prompts/context.txt")
len(chuncks)

Data loaded to vector database successfully


9

### Create the retriver

In [None]:
retriever = create_retriever(chuncks)

            Consider upgrading to the new and improved v4 client instead!
            See here for usage: https://weaviate.io/developers/weaviate/client-libraries/python
            
{"action":"startup","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2024-02-01T23:25:07+03:00"}
{"action":"startup","auto_schema_enabled":true,"level":"info","msg":"auto schema enabled setting is set to \"true\"","time":"2024-02-01T23:25:07+03:00"}
{"level":"info","msg":"No resource limits set, weaviate will use all available memory and CPU. To limit resources, set LIMIT_RESOURCES=true","time":"2024-02-01T23:25:07+03:00"}


Started /home/babi/.cache/weaviate-embedded: process ID 109687


{"action":"grpc_startup","level":"info","msg":"grpc server listening at [::]:50060","time":"2024-02-01T23:25:07+03:00"}
{"action":"restapi_management","level":"info","msg":"Serving weaviate at http://127.0.0.1:8079","time":"2024-02-01T23:25:07+03:00"}


Retriever created successfully.


{"level":"info","msg":"Created shard langchain_4696c9c0bd674d0ca4fc2d7fe99b6bfa_pLeBhYvfVAC8 in 1.186865ms","time":"2024-02-01T23:25:08+03:00"}
{"action":"hnsw_vector_cache_prefill","count":1000,"index_id":"main","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2024-02-01T23:25:08+03:00","took":103518}
{"action":"lsm_recover_from_active_wal_success","class":"LangChain_653cbe3e6689480293e4a8215b930959","index":"langchain_653cbe3e6689480293e4a8215b930959","level":"info","msg":"successfully recovered from write-ahead-log","path":"/home/babi/.local/share/weaviate/langchain_653cbe3e6689480293e4a8215b930959/nlo2bDOgVacv/lsm/objects/segment-1706788243719981128.wal","shard":"nlo2bDOgVacv","time":"2024-02-01T23:25:08+03:00"}
{"action":"lsm_recover_from_active_wal_success","class":"LangChain_653cbe3e6689480293e4a8215b930959","index":"langchain_653cbe3e6689480293e4a8215b930959","level":"info","msg":"successfully recovered from write-ahead-log","path":"/home/babi/.local/

{"action":"lsm_recover_from_active_wal_success","class":"LangChain_26666663df8145759ff4ae05bfd0b380","index":"langchain_26666663df8145759ff4ae05bfd0b380","level":"info","msg":"successfully recovered from write-ahead-log","path":"/home/babi/.local/share/weaviate/langchain_26666663df8145759ff4ae05bfd0b380/ZErds8qG0OhT/lsm/property__id/segment-1706788243755956446.wal","shard":"ZErds8qG0OhT","time":"2024-02-01T23:25:08+03:00"}
{"action":"lsm_recover_from_active_wal_success","class":"LangChain_26666663df8145759ff4ae05bfd0b380","index":"langchain_26666663df8145759ff4ae05bfd0b380","level":"info","msg":"successfully recovered from write-ahead-log","path":"/home/babi/.local/share/weaviate/langchain_26666663df8145759ff4ae05bfd0b380/ZErds8qG0OhT/lsm/property_text/segment-1706788243759495773.wal","shard":"ZErds8qG0OhT","time":"2024-02-01T23:25:08+03:00"}
{"action":"lsm_recover_from_active_wal_success","class":"LangChain_26666663df8145759ff4ae05bfd0b380","index":"langchain_26666663df8145759ff4ae05b

In [12]:
type(retriever)

langchain.vectorstores.base.VectorStoreRetriever

In [13]:
retriever.get_relevant_documents("ኢትዮጵያ")

[Document(page_content='በ1500 ሜትር ሴቶች ውድድር ገንዘቤ ዲባባና በሱ ሳዶ ወደሚቀጥለው ዙር ማለፋቸውን አረጋገጡ❤️\nከጥቂት ደቂቃዎች በኋላ በጉጉት የሚጠበቀው የወንዶች የ10 ሺህ ሜትር የፍፃሜ ውድድር ይካሄዳል፡፡ መልካም እድል ኢትዮጵያዬ !\nለንደን : በሴቶች 1500 ሜትር ውድድር ወደ ቀጣዩ ዙር ያለፉ የኢትዮጵያ አትሌቶች ገንዘቤ ዲባባ ጎደፋ ፀጋይ ባሱ ሶዱ\nለንደን : የወንዶች የ10 ሺህ ሜትር የፍፃሜ ውድድር ተጀመረ\n10ሺ ሜትሩን ሞ በ1ኛነት አጠናቀቀ !\nለንደን : ሞ ፋራ የ10 ሺ ሜትር ውድድርን በበላይነት አጠናቀቀ።', metadata={'source': '../prompts/context.txt'}),
 Document(page_content='ከተወለዱት እናት እና አባት ከተወለዱት እናት እና አባት ከተወለዱት እናት እና አባት ከተወለዱት እናት እና አባት ከተወለዱት እናት እና አባት ከተወለዱት \nእናት እና አባት ከተወለዱት እናት እና አባት ከተወለዱት እናት እና አባት ከተወለዱት እናት እና አባት ከተወለዱት እናት እና አባት ከተወለዱት እናት እና \nአባት ከተወለዱት እናት እና አባት ከተወለዱት እናት እና አባት ከተወለዱት እናት እና አባት ከተወለዱት እናት እና አባት ከተወለዱት እናት እና አባት ከተወለዱት \nእናት እና አባት ከተወለዱት እናት እና አባት ከተወለዱት እናት እና አባት ከተወለዱት እናት እና አባት ከተወለዱት እናት እና አባት ከተወለዱት እናት እና አባት', metadata={'source': '../prompts/context.txt'}),
 Document(page_content='ኢትዮጵያዊነት ኢትዮጵያዊነት ብዙ የተለያዩ ህብረተሰቦች የተዋህዱበት አካል ክፍል\nመሆን ነው። ኢትዮጵያዊነት ከጎሰኝኛት በላይና ውጭ የሆነ አጠቃላይ ማንነት

### Preparing the LLM Model

In [7]:
from huggingface_hub import login

login("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")

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/babi/.cache/huggingface/token
Login successful


In [8]:
def load_model(model_name, bnb_config):
    n_gpus = torch.cuda.device_count()
    max_memory = f'{23000}MB'

#method from the Hugging Face Transformers library to load a pre-trained language model
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto", # dispatch efficiently the model on the available ressources
        max_memory = {i: max_memory for i in range(n_gpus)},
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)

    # Needed for LLaMA tokenizer
    tokenizer.pad_token = tokenizer.eos_token

    return model, tokenizer
    
    
    
''' This function, create_bnb_config(), is designed to create and return a
configuration object for quantization using the Bits and Bytes (BNB)
quantization scheme. '''
def create_bnb_config():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )

    return bnb_config

In [None]:
model_name = "meta-llama/Llama-2-7b-hf"

bnb_config = create_bnb_config()
model, tokenizer2 = load_model(model_name, bnb_config)

### Create pipeline

In [None]:
pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer2,
    use_cache=True,
    device_map="auto",
    max_length=2048,
    do_sample=True,
    top_k=5,
    num_return_sequences=1,
    eos_token_id=tokenizer2.eos_token_id,
    pad_token_id=tokenizer2.eos_token_id,
)

# specify the llm
llm = HuggingFacePipeline(pipeline=pipeline)

### Create langchain executor agent

In [None]:
from langchain.agents import tool

@tool
def get_link_data(link: str) -> List:
    """"""
    return None


def get_agent_executor():
    with open("../prompts/system_message.txt", "r") as file:
        system_message = file.read()

    agent_kwargs = {
    "system_message": SystemMessage(content=system_message),
    "retriever": retriever  # Pass the retriever to the agent
    }

    llm_agent = initialize_agent(
        llm=llm,
        # agent=AgentType.OPENAI_FUNCTIONS,
        tools=[get_link_data],
        agent_kwargs=agent_kwargs,
        verbose=True,
        max_iterations=20,
        early_stopping_method='generate'
    )

    return llm_agent


In [None]:
llm_agent = get_agent_executor()

In [None]:
llm_agent.run("ገንዘቤ ዲባባ ማን  ናት?")