In [37]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

In [38]:
PINECONE_API_KEY = "ea5280ee-3ad6-429f-a1e4-f2e2b97dd937"
PINECONE_API_ENV = "gcp-starter"

In [39]:
def load_pdf(Books):
    loader = DirectoryLoader(Books,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents = loader.load()

    return documents

In [40]:
extracted_data = load_pdf("Books/")

In [41]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 660, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [42]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 15191


In [43]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [44]:
embeddings = download_hugging_face_embeddings()

In [45]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [47]:
#Initializing the Pinecone
pinecone.init(api_key=PINECONE_API_KEY,
              environment=PINECONE_API_ENV)

index_name="chatbot"

#Creating Embeddings for Each of The Text Chunks & storing
docsearch=Pinecone.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

In [48]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer from the domain given in the pieces of information don't try to make up an answer.
Also if the user sends Hi Hey or anything sort of that do make up a reply that you are a chatbot which is specifically used to answer questions from courses  BT203,BT204 and BT205  

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
You can Reply to anything except from the domain given in the pieces of information
Helpful answer:
"""

In [49]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [None]:
llm=CTransformers(model="model/llama-2-7b-chat.ggmlv3.q8_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.7})

In [51]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [52]:
query = "What are cells"