In [None]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
import pinecone
from langchain.vectorstores import Pinecone
from langchain.embeddings import HuggingFaceEmbeddings


# Initializing Pinecone Vector DB
pinecone.init(
    api_key=PINECONE_API_KEY,
    environment=PINECONE_ENV
)



# Pinecone Vector DB index name
index_name = 'langchain-demo'
index = pinecone.Index(index_name)



loader = TextLoader("PLACE FILE PATH HERE RAVI")
docs = loader.load()



text_splitter = CharacterTextSplitter(
        chunk_size=1000,      # Specify chunk size
        chunk_overlap=200,    # Specify chunk overlap to prevent loss of information
    )

docs_split = text_splitter.split_documents(docs)

embeddings = HuggingFaceEmbeddings()



# create new embedding to upsert in vector store
doc_db = Pinecone.from_documents(
          docs_split,
          embeddings,
          index_name=index_name
        )

queryPwC1 = "HOW CAN I IMPROVE THE EFFICIANCY OF THIS HEALTH CARE PROCESS AND MAKE SURE IT IS HIPPA COMPLIANT AND ACCURATE? "
queryPwC2 = "HOW CAN I AUTOMATICALLY FILL IN THIS HEALTH CARE FORM AND MAKE SURE IT IS HIPPA COMPLIANT AND ACCURATE? "
queryPwC3 = "WHAT IS CORRECT ANSWER TO THIS HEALTH CARE QUESTION : XXXXXXX USING PROFESSIONAL DOCUMENT SITATIONS AND MAKE SURE IT IS HIPPA COMPLIANT AND ACCURATE? "
# search for matched entities and return score


search_docs = doc_db.similarity_search_with_score(query)

from langchain import HuggingFaceHub


repo_id = "tiiuae/falcon-40b" 
llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={
                                  "temperature":0, 
                                  "max_length":64})



from langchain.chains import RetrievalQA

qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type='stuff',
    retriever=doc_db.as_retriever(),
)
query = "USER QUERY"
result = qa.run(query)


