In [1]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.llms import LlamaCpp
from langchain_pinecone import PineconeVectorStore
from langchain_community.vectorstores import Pinecone
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import DirectoryLoader,PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


from langchain_community.llms import CTransformers
import os
import pinecone
from pinecone import Pinecone
from dotenv import load_dotenv

In [2]:
#os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [3]:
load_dotenv()
key= os.getenv("PINECONE_API_KEY")

In [4]:
PINECONE_API_KEY = key
PINECONE_API_ENV = 'gcp-starter'

In [5]:
#extract data func
def load_pdf_data(data):
    loader = DirectoryLoader(data,
                    glob='*.pdf',
                    loader_cls=PyPDFLoader)
    documents = loader.load()

    return documents

In [None]:
#path = '../'
extracted_data = load_pdf_data("../data/gary_Brecka_yt")


In [None]:
#create chunks of text
def text_chunk_splitter(extracted_data):
    text_split = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunk = text_split.split_documents(extracted_data)

    return text_chunk

In [None]:
text_chunks = text_chunk_splitter(extracted_data)
print("chunk length:  ", len(text_chunks))

In [None]:
#download embedding model
def download_embedding_model():
    embedding= HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

    return embedding


In [None]:
embedding = download_embedding_model()

# hybrid search exper.

In [None]:
from pinecone_text.sparse import BM25Encoder
from langchain_community.retrievers import (
    PineconeHybridSearchRetriever,
)

In [None]:
list_chunks=[x.page_content for x in text_chunks]

In [None]:
# use default tf-idf values for sparse encoder
bm25_encoder = BM25Encoder().default()

In [None]:
bm25_encoder.fit(list_chunks)

In [None]:
#store the values into json file format
bm25_encoder.dump("bm25_values.json")
# load to your BM25Encoder object
bm25_encoder = BM25Encoder().load("bm25_values.json")

In [None]:
pinecone.Pinecone(
   api_key=os.getenv("PINECONE_API_KEY"),  
   environment=os.getenv("PINECONE_ENV"),  
)
index_name = "gary-chatbot"
index = pinecone.Index(index_name, host="https://gary-chatbot-mhwhc2g.svc.gcp-starter.pinecone.io")

Run this when loading the data in to the vector store for the first time

In [None]:
# Run this line when loading the embedding to the vector store first time
#docsearch = PineconeVectorStore.from_texts([t.page_content for t in text_chunks], embedding, index_name=index_name)

In [None]:
# retriever = PineconeHybridSearchRetriever(
#     embeddings=embedding, sparse_encoder=bm25_encoder, index=index
# )

pinecone hybrid search will give an error

In [None]:
# query = "what is allergy?"
# #retiriever = hybrid_retriever.as_retriever(search_kwargs=dict(k=3))

# docs = retriever.get_relevant_documents(query)

# #print("Results", docs)

In [None]:
# query = "what is allergy?"
# #retiriever = hybrid_retriever.as_retriever(search_kwargs=dict(k=3))

# #docs = retriever.get_relevant_documents(query)
# docs = await retriever.aget_relevant_documents(query)
# #print("Results", docs)

In [None]:
query_res = embedding.embed_query("Hi today is March 12 Tuesday")
print("Length: ", len(query_res))

In [None]:
from langchain_pinecone import PineconeVectorStore

In [None]:
pinecone.Pinecone(
   api_key=os.getenv("PINECONE_API_KEY"),  
   environment=os.getenv("PINECONE_ENV"),  
)
index_name = "gary-chatbot"

In [None]:
# Run this line when loading the embedding to the vector store first time
docsearch = PineconeVectorStore.from_texts([t.page_content for t in text_chunks], embedding, index_name=index_name)

In [None]:
docsearch = PineconeVectorStore.from_existing_index(index_name, embedding)
query = "insulin?"
retiriever = docsearch.as_retriever(search_kwargs=dict(k=3))
docs = retiriever.get_relevant_documents(query)

print("Results", docs)
docs[0].page_content
#[doc.page_content for doc in docs]

In [None]:
prompt_template = """ 
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.


Context : {context}
Question: {question}

Only return helpful answer below and nothing else.
Helpful answer:
"""

In [None]:
context = "\n\n".join([doc.page_content for doc in docs])
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
#PROMPT = ChatPromptTemplate(prompt_template) #,input_variables=["context", "question"])

#prompt = PROMPT.format(context=context,question= query) 
chain_type_kwargs = {"prompt": PROMPT}

In [None]:
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

In [None]:
from llama_cpp import Llama

## N.B
CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python==0.2.56


In [None]:
llm = LlamaCpp(
    model_path= "../model/gguf/llama-2-7b-chat.Q3_K_M.gguf",
    n_gpu_layers= -1,
    n_batch= 512,
    max_tokens= 512,
    temperature=0.7,
    callback_manager=callback_manager,
    #verbose=True # Verbose is required to pass to the callback manager
)

In [None]:
QA = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever = retiriever,
    #retriever = docsearch.as_retriever(search_kwargs={'k':3}),
    return_source_documents = True,
    chain_type_kwargs=chain_type_kwargs
)


In [None]:
import textwrap

In [None]:
import sys
while True:
    user_input = input(f"Input prompt: ")
    if user_input == 'exit':
        print("Exiting")
        sys.exit()
    if user_input == ' ':
        continue
    result = QA({'query': user_input})
    result = textwrap.fill(result["result"], width=50)
    #print(f"Answer: {wraped_result['wraped_result']}")
    print(f"Answer: {result}")

In [None]:
# while True:
#     user_Input: input(f"Input prompt: ")
#     result = QA({"query": user_Input})
#     print("Response: ", result["result"])