In [43]:
import os
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.document_loaders import UnstructuredFileLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Qdrant
from langchain import PromptTemplate
from langchain_community.llms import LlamaCpp
from langchain.chains import RetrievalQA
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.chains import LLMChain
from langchain.schema.runnable import RunnablePassthrough

from langchain.llms import HuggingFacePipeline
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from transformers import AutoTokenizer,AutoModelForCausalLM,BitsAndBytesConfig,pipeline

import torch

In [31]:
# Checking if GPU is available
if torch.cuda.is_available():
    print("GPU is available.")
    print('Using GPU: ', torch.cuda.get_device_name(0))
    print('Memory Usage: ')
    print('Allocated: ', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached: ', round(torch.cuda.memory_cached(0)/1024**3,1), 'GB')

else:
    print("GPU is not available.")

GPU is available.
Using GPU:  NVIDIA A100-SXM4-40GB
Memory Usage: 
Allocated:  5.1 GB
Cached:  11.4 GB


In [15]:
device = f'cuda:{torch.cuda.current_device()}' if torch.cuda.is_available() else 'cpu'


In [16]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=False,
)

In [23]:
model_id ="../pretrained_models/BioMistral-7B"

model = AutoModelForCausalLM.from_pretrained(model_id,quantization_config=bnb_config,do_sample=True,device_map = "auto")

tokenizer = tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [24]:
prompt = "tell me about covid? in 50 words"
model_inputs = tokenizer([prompt], return_tensors="pt").to(device)

generated_ids = model.generate(**model_inputs, max_new_tokens=300, do_sample=True,pad_token_id=tokenizer.eos_token_id)
decoded = tokenizer.batch_decode(generated_ids)[0]

print(decoded.replace('\\n', '\n'))

<s> tell me about covid? in 50 words or less.( 6)so you are saying covid is still around?( 7)yes.( 8)its still around?( 9)not really( 10)not really?( 11)what do you mean not really? so you saying that covid is not really here?( 12)im saying there still 1000 people die from it each day.( 13)is there 1000?( 14)covid is really in america?( 15)what do you mean covid is in america?( 16)what did you mean by that? like in 2021?( 17)i told you this year,( 18)there is new variant around?( 19)what are the variants?( 20)new variant? what are the new variants?( 21)what do you mean covid is still around?( 22)what do you mean?( 23)how many people get vaccinated?( 24)what do you mean?( 25)do you vaccinate?( 26)what you talk about?( 27)you talking about vaccination?( 28)new variant covid?( 29)what do you mean new variant?( 30)what means new variant? do you mean new vaccine?( 31)you are saying there is


In [25]:
emb_model_name = "../pretrained_models/pubmedbert-base-embeddings"
embeddings = SentenceTransformerEmbeddings(model_name=emb_model_name)

print(embeddings)


client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
) model_name='../pretrained_models/pubmedbert-base-embeddings' cache_folder=None model_kwargs={} encode_kwargs={} multi_process=False show_progress=False


In [26]:

loader = DirectoryLoader('dataset/medical_data/', glob="**/*.pdf", show_progress=True, loader_cls=UnstructuredFileLoader)

documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=70)

texts = text_splitter.split_documents(documents)

print(texts[1])



100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:17<00:00,  8.76s/it]

page_content='relation to exercise prescription, management of complications and palliative care.\n\nThroughout our short course we will emphasise the importance of effective communication\n\nwith MDT, patients and their families to ensure optimum care is provided.\n\nWe trust that the following short-course and information booklet will add to your knowledge\n\naround the area of breast cancer care and enhance your skills as a developing clinician.\n\nJohn Allen, Clodagh Burrell, Clara Caplice, Deirdre Collins, Patrick McGreal & Joanne Purcell.\n\nthis\n\n2\n\nLearning Objectives\n\n1. To describe the pathophysiology of cancer with a primary focus on breast cancer.' metadata={'source': 'dataset/medical_data/Oncology.pdf'}





In [27]:
qdrant = Qdrant.from_documents(
    texts,
    embeddings,
    location=":memory:",
    collection_name="vector_db"
)

print("Vector DB Successfully Created!")

Vector DB Successfully Created!


In [28]:
retriever = qdrant.as_retriever(search_kwargs={"k":1})

In [34]:
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=300,
    eos_token_id=tokenizer.eos_token_id,
)

mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

In [35]:
prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer. Answer must be detailed and well explained.
Helpful answer:
"""
prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])


In [38]:
# Create llm chain 
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

In [44]:
rag_chain = ( 
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

In [45]:
query = "tell me about covid? in 50 words"
response = rag_chain.invoke(query)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [46]:
print ("Question:", response["question"])
print (response["text"].replace('\\n', '\n'))

Question: tell me about covid? in 50 words
Covid is caused by SARS CoV-2 virus. It is transmitted through respiratory droplets and contact with infected people. Symptoms include fever, cough, shortness of breath, fatigue, muscle pain, sore throat, loss of taste and smell, diarrhea, nausea, vomiting, and conjunctivitis. Most people recover without treatment while some may need supportive care and antiviral therapy.
