In [1]:
print("OK")

OK


In [80]:
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone
from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain_community.llms import CTransformers

In [None]:
PINECONE_API_KEY = ""
PINECONE_API_ENV = ""

In [6]:
#Extract the data
def load_data(file_path):
    loader = DirectoryLoader(file_path, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [7]:
extracted_data = load_data("data/")

In [14]:
extracted_data[20].page_content[:500]  # Display first 500 characters of the 21st document

'Traction\nTraditional Chinese medicine\nTrager psychophysical integration\nTranscranial Doppler\nultrasonography\nTransesophageal echocardiography\nTransfusion\nTranshepatic biliary catheterization\nTransient ischemic attack\nTransposition of the great arteries\nTransurethral bladder resection\nTransvaginal ultrasound\nTransverse myelitis\nTraumatic amputations\nTravelerâ€™s diarrhea\nTremors\nTrench fever\nTrichinosis\nTrichomoniasis\nTricuspid valve insufficiency\nTricuspid valve stenosis\nTrigeminal neuralgia\nTrigg'

In [15]:
#create text chunks
def create_text_chunks(documents):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    texts = text_splitter.split_documents(documents)
    return texts

In [16]:
text_chunks = create_text_chunks(extracted_data)
print(f"Number of text chunks: {len(text_chunks)}")

Number of text chunks: 40000


In [20]:
text_chunks[2].page_content

'The GALE\nENCYCLOPEDIA of\nMEDICINE\nTHIRD EDITION\nVOLUME\n\x81\n2\nC-F\nJACQUELINE L. LONGE, PROJECT EDITOR'

In [41]:
#dowload embedding model
def get_embedding_model():
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L12-v2")
    return embeddings

In [42]:
embeddings = get_embedding_model()

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/352 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [43]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 128, 'do_lower_case': False, 'architecture': 'BertModel'})
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='all-MiniLM-L12-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [44]:
query_result = embeddings.embed_query("Hello world")
print(len(query_result))

384


In [66]:
pc = Pinecone(
    api_key=PINECONE_API_KEY,
    environment=PINECONE_API_ENV
)
index_name = "medchat"
docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    embedding=embeddings,
    index_name=index_name,
    pinecone_api_key=PINECONE_API_KEY
)

  docsearch = PineconeVectorStore.from_documents(


In [64]:
import os
print(os.getenv("PINECONE_API_KEY"))

None


In [65]:
os.environ["PINECONE_API_KEY"] = "pcsk_5sy1oR_6mRmG3eL9XMVfhf6Q5oJw7LeMwhA4b8SiKFhK2NFp7jS7C6hccR1nMzmwgF999c"

In [73]:
docsearch=PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings)

query = "What are allergies"

docs = docsearch.similarity_search(query, k=3)

print("result", docs[1].page_content)

result triggered by harmless, everyday substances. This is
the condition known as allergy, and the offending
substance is called an allergen. Common inhaled
allergens include pollen,dust, and insect parts from
tiny house mites. Common food allergens include
nuts, fish, and milk.
Allergic reactions involve a special set of cells
in the immune system known as mast cells. Mast
cells serve as guards in the tissues where the body
meets the outside world: the skin, the mucous


In [71]:
prompt_template="""
You are a helpful medical assistant. 
Use the following context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer and nothing else.
Helpful Answer:
"""

In [72]:
PROMPT=PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)
chain_type_kwargs = {"prompt": PROMPT}

In [92]:
llm=CTransformers(
    model="model/llama-2-7b-chat.ggmlv3.q4_0.bin", 
    model_type="llama", 
    config={
        "temperature": 0.8,
        "max_new_tokens": 512,
    }
)

In [93]:
qa=RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(search_kwargs={"k":2}),
    chain_type_kwargs=chain_type_kwargs
)

In [94]:
while True:
    user_input=input("Ask a medical question (or type 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break
    result=qa({"query": user_input})
    print("\nHelpful Answer:\n", result['result'], "\n")

  result=qa({"query": user_input})



Helpful Answer:
 Acne is a skin condition that occurs when new skin cells are laid down to replace damaged cells. 

