In [1]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from tqdm.autonotebook import tqdm




  from tqdm.autonotebook import tqdm


In [2]:
PINECONE_API_KEY = "PINECONE_API_KEY = pcsk_3UKeqN_A4GBHUpUF66wcie25FGvq52tm42ZmuWnwaBbP59C4GvTfA6YRZFvezhhra2jxfE"
PINECONE_API_ENV = "us-west-2-aws"

In [3]:
#Extract data from the PDF
def load_pdf(data):
    loader = DirectoryLoader(data,
                            glob="*.pdf",  #only load pdf files
                            loader_cls=PyPDFLoader)
    documents = loader.load()

    return documents 

In [4]:
extracted_data = load_pdf("/Users/donghunshin/Documents/End-to-end-Medical-Chatbot-using-Llama2/data")

In [5]:
#Create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20) #chunk_size: how many words LLM is analyzing at once
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [6]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 5859


In [6]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [12]:
embeddings = download_hugging_face_embeddings()

In [13]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={})

In [19]:
query_result = embeddings.embed_query("Hello")
print("Length", len(query_result))

Length 384


In [24]:
#Initializing the Pinecone
pinecone.init(api_key=PINECONE_API_KEY,
              environment=PINECONE_API_ENV)

index_name="medbud"

#Creating Embeddings for Each of The Text Chunks & storing
docsearch=Pinecone.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

In [26]:
#If we already have an index we can load it like this
docsearch=Pinecone.from_existing_index(index_name, embeddings)

query = "What are skin diseases?"

docs=docsearch.similarity_search(query, k=3)

print("Result", docs)

Result [Document(page_content='tis, the condition is thought to be the result of an inherit-ed over-active immune system or a genetic defect thatcauses the skin to lose abnormally large amounts ofmoisture. The condition can be aggravated by a cycle thatdevelops in which the skin itches, the patient scratches,the condition worsens, the itching worsens, the patientscratches, etc. This cycle must be broken by relieving theitching to allow the skin time to heal. If the skin becomes', metadata={}), Document(page_content='Hair follicles —Tiny organs in the skin, each one of\nwhich grows a single hair.\nLupus erythematosus —An autoimmune disease\nthat can damage skin, joints, kidneys, and otherorgans.\nRingworm —A fungal infection of the skin, usually\nknown as tinea corporis.\nSystemic —Affecting all or most parts of the body.time, minoxidil produces satisfactory results in about one', metadata={}), Document(page_content='Hair follicles —Tiny organs in the skin, each one of\nwhich grows a si

In [27]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""


In [28]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [30]:
llm=CTransformers(model="/Users/donghunshin/Documents/End-to-end-Medical-Chatbot-using-Llama2/model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})

In [32]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [33]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa({"query": user_input})
    print("Response : ", result["result"])

Response :  Acne is a common skin disease characterized by pimples on the face, chest, and back. It occurs when the pores of the skin become clogged with oil, dead skin cells, and bacteria.
Response :  
It is generally considered right to do what you want within reason, as long as it does not harm others or infringe on their rights. This means that you have the freedom to make choices and decisions for yourself, as long as they are not hurtful or unjust towards others. However, it is important to recognize that your wants and desires may not always align with the needs and expectations of others, and that it is important to consider the impact of your actions on those around you. Ultimately, the key to making decisions that are right and just is to find a balance between your own desires and the needs and rights of others.


KeyboardInterrupt: Interrupted by user