In [None]:
from langchain import PromptTemple
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings # create embedding
from langchain.vectorstores import pinecore # vector store form database
import pinecore # vector database
from langchain.document_loaders import DirectoryLoader, PyPDFLoader # for uploading document
from langchain.text_splitter import RecursiveCharacterTextSplitter # creating chunks from ducument
from langchain.llms import CTransformers # load models
from langchain.prompts import PromptTemplate

: 

## Import Pinecore Credentials

In [None]:
PINCORE_API_KEY = ""
PINCORE_API_ENV = ""

: 

In [None]:
# Extract date frpm pdf file 

def load_data(data):
    loader = DirectoryLoader(data, glob = "*.pdf", loader_cls = PyPDFLoader)
    document = loader.load()
    return document

In [None]:
# extracted data 
extracted_data = load_data('data/')

In [None]:
# create text split
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [None]:
text_chunks = text_split(extracted_data)
print("length of chunks" , len(text_chunks))

In [None]:
# domenload embeddings model
def download_huggingface_embeddings():
    embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings_model

In [None]:
embeddings = download_huggingface_embeddings()
embeddings

In [None]:
query_result = embeddings.embed_query("Hello_world")
print("length", len(query_result))

In [None]:
# initail pincore 
pinecore.init(api_key = PINCORE_API_KEY, environment=PINCORE_API_ENV)
index_name = "text" # enter index name from pincore database

# Create Embeddings for each of the text chunks and storing on pinecore vector database
docsearch = Pinecore.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

In [None]:
# load data form vecotr database if load
docsearch = Pinecore.from_existing_index(index_name, embeddings)
query = "what is allergies"
docs = docsearch.similarity_search(query, k=3)
print('results', docs)


In [None]:
prompt_template = """use this piece of informations to ansers user question.
if you don't know answer so just say i don't know , don't make answer own.
context : {context}
questions : {question}

only return helpfull answer .
Helpfull Answer: 
"""

: 

In [None]:
prompt = PromptTemplate(template = prompt_template, input_variables=['context', 'question'])
chain_type_kwargs = {"prompt" : prompt}

In [None]:
llm = CTransformers(model= "model/llama-2-7b.chat.ggmlv3.q4_0", 
                    model_type= "llama", 
                    config = {'max_new_tokens':512, 'temperature': 0.8})

In [None]:
qa = RetrievalQA.from_chain_type(
    llm = llm, 
    chain_type = "stuff",
    retriever=docsearch.as_retriever(search_kwargs={'k':2}), # k means two relwent answers
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [None]:
while True:
    user_input = input(f"Input Prompt : ")
    result = qa({'query': user_input})
    print( "response", result['result'] )


: 