In [1]:
import os

from langchain.chains import LLMChain, RetrievalQA, SimpleSequentialChain
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain_core.prompts import PromptTemplate
from langchain.document_loaders import PyPDFLoader, OnlinePDFLoader, TextLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma, Pinecone

from langchain_community.document_loaders import TextLoader as CommunityTextLoader
from langchain_community.embeddings import LlamaCppEmbeddings
from langchain_community.llms import LlamaCpp

from langchain_pinecone import PineconeVectorStore

from langchain_core.prompts import SystemMessagePromptTemplate, MessagesPlaceholder

from langchain_community.embeddings import HuggingFaceEmbeddings


from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

from langchain_core.messages import HumanMessage

# from sentence_transformers import SentenceTransformer



  from tqdm.autonotebook import tqdm


# Loading LLAMA LLM

In [2]:
n_gpu_layers = -1  
n_batch = 2048  
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
llm = LlamaCpp(
    model_path="models/llama-2-7b-chat.gguf.q4_0.bin",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    callback_manager=callback_manager,
    verbose=False, 
    embeddings=True
)

                embeddings was transferred to model_kwargs.
                Please confirm that embeddings is what you intended.


In [2]:
embed_model = LlamaCppEmbeddings(
    model_path='models/llama-2-7b-chat.gguf.q4_0.bin'
)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from models/llama-2-7b-chat.gguf.q4_0.bin (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = llama-2-7b-chat.ggmlv3.q8_0.bin
llama_model_loader: - kv   2:                        general.description str              = converted from legacy GGJTv3 MOSTLY_Q...
llama_model_loader: - kv   3:                          general.file_type u32              = 7
llama_model_loader: - kv   4:                       llama.context_length u32              = 2048
llama_model_loader: - kv   5:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   6:                          llama.block_count u32              = 32
llama_model_loader

In [3]:
pc_v =  PineconeVectorStore(index_name='powerfule',pinecone_api_key=os.environ.get('PINECONE_API_KEY'),embedding=embed_model)


In [5]:
loader = TextLoader('./data/test_text.txt') # to load text document
documents = loader.load()

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=80, chunk_overlap=40)
chunks = text_splitter.split_documents(documents)

In [6]:
x = embed_model.embed_query('aplle')


llama_print_timings:        load time =   26636.76 ms
llama_print_timings:      sample time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =   26616.49 ms /     4 tokens ( 6654.12 ms per token,     0.15 tokens per second)
llama_print_timings:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =   26637.56 ms /     5 tokens


4


In [25]:
len(x[0])

4096

In [16]:
chunks

[Document(page_content='TEXT FILES ARE AWESOME AS THEY CONTAIN TEXTS AND STUPID TEXTS. THEY ALSO CONTAIN', metadata={'source': './data/test_text.txt'}),
 Document(page_content='AND STUPID TEXTS. THEY ALSO CONTAIN WHATEVER HAHAHA', metadata={'source': './data/test_text.txt'})]

In [None]:
pc_v

In [17]:
pc_v.add_documents(chunks)


llama_print_timings:        load time =   29919.33 ms
llama_print_timings:      sample time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =   25769.72 ms /    36 tokens (  715.83 ms per token,     1.40 tokens per second)
llama_print_timings:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =   25838.37 ms /    37 tokens

llama_print_timings:        load time =   29919.33 ms
llama_print_timings:      sample time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =   25260.92 ms /    23 tokens ( 1098.30 ms per token,     0.91 tokens per second)
llama_print_timings:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =   25339.81 ms /    24 tokens


['f85e829e-1b4a-4850-85ec-841a38f48d01',
 '20105b79-6a1e-473c-a3e7-24622acaf321']

In [4]:
pc_v.search('AWESOME',search_type='similarity')


llama_print_timings:        load time =   27776.19 ms
llama_print_timings:      sample time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =   27755.22 ms /     6 tokens ( 4625.87 ms per token,     0.22 tokens per second)
llama_print_timings:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =   27777.31 ms /     7 tokens


4096


PineconeApiException: (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'Date': 'Wed, 22 May 2024 09:10:06 GMT', 'Content-Type': 'application/json', 'Content-Length': '105', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '3069', 'x-pinecone-request-id': '6763055092546981643', 'x-envoy-upstream-service-time': '3', 'server': 'envoy'})
HTTP response body: {"code":3,"message":"Vector dimension 24576 does not match the dimension of the index 4096","details":[]}


In [5]:
len(x)

2

In [11]:
len(x)

2

In [13]:
len([0])

4096

In [21]:
embeddings = x

In [26]:
list(map(float, embeddings))

TypeError: float() argument must be a string or a number, not 'list'

In [23]:
res = [list(map(float, e)) for e in embeddings]


In [25]:
len(res)

2

In [18]:
def square(x):
    return x * x

numbers = [1, 2, 3, 4, 5]
squared_numbers = map(square, numbers)
print(list(squared_numbers))  # Output: [1, 4, 9, 16, 25]


[1, 4, 9, 16, 25]


In [20]:
squared_numbers = map(square, numbers)
print(list(squared_numbers))  # Output: [1, 4, 9, 16, 25]

[1, 4, 9, 16, 25]


In [None]:
[lst[i:i + n] for i in range(0, len(lst), n)]


Testing it out on a prompt

In [None]:
# llm_chain = prompt | llm
# question = "what is cricket"
# llm_chain.invoke({"question": question})




# Trying Chat

In [8]:
query_= 'Monkey names'

In [2]:
model_name = 'hkunlp/instructor-large'
# model_name = "meta-llama/Llama-2-7b-chat-hf"
# model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embed_model = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)



In [7]:
pc_v =  PineconeVectorStore(index_name='with-hugging-face',pinecone_api_key=os.environ.get('PINECONE_API_KEY'),embedding=embed_model)
retriever=pc_v.as_retriever()
chain = RetrievalQA.from_chain_type(llm, retriever=retriever)
response = chain.run(query_)

NameError: name 'embed_model' is not defined

In [77]:
response

" Coco and Mango are the monkey's names!"

In [None]:
from langchain_core.messages import HumanMessage

In [111]:
chat_history =[]

In [112]:
system_prompt = (
            "Use the given context to answer the question. "
            "If you don't know the answer, say you don't know. "
            "Use three sentence maximum and keep the answer concise. "
            "Context: {context}"
        )
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
chain = create_retrieval_chain(retriever, question_answer_chain)

# chain.invoke({"input": 'give me a breif summary of the story'})

In [26]:
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_text_splitters import RecursiveCharacterTextSplitter


### Contextualize question ###
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = pc_v.as_retriever()

# chain = RetrievalQA.from_chain_type(llm, retriever=retriever)
# response = chain.run(query_)


### Answer question ###
qa_system_prompt = """You are an awesome reader and reviewer of text based stuff, like documents and books, please answer the querys as best as you can to the content provided to you

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


### Statefully manage chat history ###
store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [27]:
question = 'give me a breif summary of the story'

In [28]:
# conversational_rag_chain.invoke({
#     "input": question,
#     'configurable': {'session_id': '1'}
#     })
conversational_rag_chain.invoke({"input": question,}, {'configurable': {'session_id': '1'}})

Parent run d2477df8-b735-4701-8a47-9c547b6f6c8e not found for run a443f0ce-7e8e-4e52-ab5f-6e69997dcddb. Treating as a root run.


TypeError: 'ChatPromptValue' object is not subscriptable

In [13]:
question = 'where did the monkeys go?'
conversational_rag_chain.invoke({"input": question,}, {'configurable': {'session_id': '1'}})

Parent run dd54c889-c8ba-4f88-b68f-c216b52b67ef not found for run 336b6c68-cea9-4c76-a29b-8e9a8ca3703e. Treating as a root run.


 
AI:  I apologize, but there is no information about monkeys in the text provided.

Is there anything else I can help you with?

{'input': 'where did the monkeys go?',
 'chat_history': [HumanMessage(content='give me a breif summary of the story'),
  AIMessage(content=' above?\n\nAssistant: Sure! The story above is about two friends, Coco and Mango, who share a special bond. They have a lot of fun together, playing games and sharing food, especially fruits. One day, Coco finds an interesting book with a wide grin on its cover and decides to show it to Mango with a big smile on her face. The story ends with Coco presenting the book to Mango with excitement and joy in their friendship.')],
 'context': [Document(page_content='TEXT FILES ARE AWESOME AS THEY CONTAIN TEXTS AND STUPID TEXTS. THEY ALSO CONTAIN WHATEVER HAHAHA', metadata={'source': 'data/test_text.txt'}),
  Document(page_content='stronger with each passing adventure. As they', metadata={'file_name': 'monkey1.txt'}),
  Document(page_content='companions, sharing everything from juicy fruits', metadata={'file_name': 'monkey1.txt'}),
  Document(page_content='

# Querying

In [12]:
query_ = 'text'

In [69]:
help(LlamaCpp)

Help on class LlamaCpp in module langchain_community.llms.llamacpp:

class LlamaCpp(langchain_core.language_models.llms.LLM)
 |  LlamaCpp(*, name: Union[str, NoneType] = None, cache: ForwardRef('Union[BaseCache, bool, None]') = None, verbose: bool = True, callbacks: ForwardRef('Callbacks') = None, tags: Union[List[str], NoneType] = None, metadata: Union[Dict[str, Any], NoneType] = None, custom_get_token_ids: Union[Callable[[str], List[int]], NoneType] = None, callback_manager: Union[langchain_core.callbacks.base.BaseCallbackManager, NoneType] = None, client: Any = None, model_path: str, lora_base: Union[str, NoneType] = None, lora_path: Union[str, NoneType] = None, n_ctx: int = 512, n_parts: int = -1, seed: int = -1, f16_kv: bool = True, logits_all: bool = False, vocab_only: bool = False, use_mlock: bool = False, n_threads: Union[int, NoneType] = None, n_batch: Union[int, NoneType] = 8, n_gpu_layers: Union[int, NoneType] = None, suffix: Union[str, NoneType] = None, max_tokens: Union[in

In [55]:
llm = LlamaCpp(
    model_path="models/llama-2-7b-chat.gguf.q4_0.bin",
    temperature=0.75,
    max_tokens=2000,
    top_p=1,
    callback_manager=callback_manager,
    verbose=False,
)

In [None]:
LlamaCpp()

In [63]:
prompt_template = PromptTemplate(
input_variables = [ 'language'],
template = 'Give me a breif description of 20 words in {language}'
)

In [65]:
prompt_chain = LLMChain(llm=llm, prompt=prompt_template, verbose=True)


In [66]:
prompt_chain.run({'language':'english' })

  warn_deprecated(




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGive me a breif description of 20 words in english[0m
?

I would like a brief description of 20 words in English. Can you help me with that?
[1m> Finished chain.[0m


'?\n\nI would like a brief description of 20 words in English. Can you help me with that?'

In [None]:
pc_v.search('text',search_type='similarity',)

# Testing Deletion

In [None]:
pc_v =  Pinecone(index_name='powerfule',pinecone_api_key=os.environ.get('PINECONE_API_KEY'),embedding=None)

In [None]:
pc_v.delete()

In [None]:
pc_v =  PineconeVectorStore(index_name='powerfule',pinecone_api_key=os.environ.get('PINECONE_API_KEY'))

In [None]:
help(pc_v.get_pinecone_index)

In [None]:
pc_index = pc_v.get_pinecone_index('powerfule',pool_threads=32)

In [None]:
pc_index.list()

In [None]:
pc_index_list_gen_obj = pc_index.list()

In [None]:
doc_ids = sum([ids for ids in pc_index_list_gen_obj], [])


In [None]:
doc_ids

In [None]:
pc_v.delete(doc_ids)

# Testing with ChromaDB

In [None]:
loader = TextLoader('./data/test_text.txt') # to load text document
documents = loader.load()


In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10, chunk_overlap=2)
chunks = text_splitter.split_documents(documents)

In [None]:
embeddings = LlamaCppEmbeddings(
    model_path="models/llama-2-7b-chat.gguf.q4_0.bin",
    verbose= False
)

In [None]:
vector_store = Chroma.from_documents(chunks, embeddings)


# With HuggingFace Embeddings

In [27]:
from langchain_community.embeddings import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)



In [38]:
pc_v =  PineconeVectorStore(index_name='with-hugging-face',pinecone_api_key=os.environ.get('PINECONE_API_KEY'),embedding=hf)


# Using LLAMA Cpp Library

In [2]:
from llama_cpp import Llama


In [3]:
llm = Llama(
      model_path=r"D:\DigiMark\Chat-with-Doc-LLAMA-Langcahin\models\llama-2-7b-chat.gguf.q4_0.bin",
      # n_gpu_layers=-1, # Uncomment to use GPU acceleration
      # seed=1337, # Uncomment to set a specific seed
      # n_ctx=2048, # Uncomment to increase the context window
      verbose = False,
      embedding=True
)

In [12]:
llm.e

<llama_cpp.llama.Llama at 0x29262ebccd0>

In [13]:
embeddings = llm.create_embedding("Hello, world!")


In [16]:
len(embeddings)

4

In [46]:
len(embeddings['data'][0]['embedding'])

5

In [32]:
len(embeddings)

4

In [45]:
pc_v =  PineconeVectorStore(index_name='with-hugging-face',pinecone_api_key=os.environ.get('PINECONE_API_KEY'))
retriever=pc_v.as_retriever()
chain = RetrievalQA.from_chain_type(llm, retriever=retriever)
response = chain.run(query_)

ValueError: Embedding must be provided

In [None]:
llm.create_embedding()

In [44]:
help(llm)

Help on Llama in module llama_cpp.llama object:

class Llama(builtins.object)
 |  Llama(model_path: 'str', *, n_gpu_layers: 'int' = 0, split_mode: 'int' = 1, main_gpu: 'int' = 0, tensor_split: 'Optional[List[float]]' = None, vocab_only: 'bool' = False, use_mmap: 'bool' = True, use_mlock: 'bool' = False, kv_overrides: 'Optional[Dict[str, Union[bool, int, float, str]]]' = None, seed: 'int' = 4294967295, n_ctx: 'int' = 512, n_batch: 'int' = 512, n_threads: 'Optional[int]' = None, n_threads_batch: 'Optional[int]' = None, rope_scaling_type: 'Optional[int]' = -1, pooling_type: 'int' = -1, rope_freq_base: 'float' = 0.0, rope_freq_scale: 'float' = 0.0, yarn_ext_factor: 'float' = -1.0, yarn_attn_factor: 'float' = 1.0, yarn_beta_fast: 'float' = 32.0, yarn_beta_slow: 'float' = 1.0, yarn_orig_ctx: 'int' = 0, logits_all: 'bool' = False, embedding: 'bool' = False, offload_kqv: 'bool' = True, flash_attn: 'bool' = False, last_n_tokens_size: 'int' = 64, lora_base: 'Optional[str]' = None, lora_scale: 'f

In [48]:
len(embeddings['data'][0]['embedding'][0])

4096

In [50]:
len(embeddings['data'][0]['embedding'][0])

4096

In [7]:
model_name = 'meta-llama/Llama-2-7b-hf'
# model_name = "meta-llama/Llama-2-7b-chat-hf"
# model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embed_model = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

No sentence-transformers model found with name meta-llama/Llama-2-7b-hf. Creating a new one with MEAN pooling.
Downloading shards:   0%|          | 0/2 [02:02<?, ?it/s]


KeyboardInterrupt: 

In [2]:
a = [1,2,3,4]