In [21]:
import os 
from dotenv import load_dotenv

import warnings
warnings.filterwarnings('ignore')

In [22]:
load_dotenv()

True

In [23]:
HUGGINGFACE_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

## Data Loading

In [24]:
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.document_loaders import PyPDFLoader

In [25]:
loader = PyPDFDirectoryLoader("../Data", glob = "**/*.pdf")
data = loader.load()

## Text Splitting

In [26]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [27]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
texts_chunks = text_splitter.split_documents(data)

In [28]:
len(texts_chunks[0].page_content)

46

## Creating Vector DB

In [29]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [30]:
embeddings = OpenAIEmbeddings()

In [31]:
# vectorstore = FAISS.from_documents(texts_chunks, embeddings)
# vectorstore.save_local("faiss_index")

In [32]:
docsearch = FAISS.load_local("faiss_index", embeddings)

In [76]:
query = 'How much percentage of women who undergo selective reduction will go into premature labor.'

In [77]:
docs = docsearch.similarity_search(query, k = 3)

In [78]:
docs

[Document(page_content='population in India. Aided by the efforts of DeepakChopra and the Maharishi, it has become an increasinglyaccepted alternative medical treatment in America duringthe last two decades. Chopra is an M.D. who has writtenseveral bestsellers based on Ayurvedic ideas. He alsohelped develop the Center for Mind/Body Medicine inLa Jolla, California, a major Ayurvedic center that trainsphysicians in Ayurvedic principles, produces herbalremedies, and conducts research and documentation ofits healing', metadata={'source': '../Data/Medical_book.pdf', 'page': 443}),
 Document(page_content='Msc.Psych, MRSNZ\nFRSH, FRIPHH, FAIC, FZSDAPA, DABFC, DABCIConsultant Psychotherapist in\nPrivate Practice\nLathrup Village, MI\nKapil Gupta, M.D.\nMedical WriterWinston-Salem, NC\nMaureen Haggerty\nMedical WriterAmbler, PA\nClare Hanrahan\nMedical WriterAsheville, NCThomas Scott Eagan\nStudent ResearcherUniversity of ArizonaTucson, AZ\nAltha Roberts Edgren\nMedical WriterMedical InkSt. Pau

## Prompt Template

In [79]:
prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer: 
"""

In [80]:
from langchain.prompts import PromptTemplate
PROMPT = PromptTemplate(template = prompt_template, input_variables = ['context', 'question'])
chain_type_kwargs = {'prompt': PROMPT}

## Make a chain

In [81]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

In [82]:
llm = OpenAI(temperature = 0.8)

In [83]:
qa_chain = RetrievalQA.from_chain_type(llm = llm, 
                                chain_type = 'stuff', 
                                retriever = docsearch.as_retriever(search_kwargs = {'k': 2}),
                                return_source_documents = True,
                                chain_type_kwargs = chain_type_kwargs)

In [84]:
## Cite Sources
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response['source_documents']:
        print(source.metadata['source'])

In [85]:
llm_response = qa_chain(query)
process_llm_response(llm_response)

I don't know who Kushal Banda is.


Sources:
../Data/Medical_book.pdf
../Data/Medical_book.pdf
