In [1]:
!pip install datasets transformers



In [2]:
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd

In [3]:
#Dataset

!pip install langchain



In [4]:
with open("train.txt", "r") as f:
    data = f.read()

In [5]:
data[:100]

'4\tCatheterization laboratory events and hospital outcome with direct angioplasty for acute myocardia'

In [6]:
from langchain.docstore.document import Document as LangchainDocument

In [7]:
raw_database = LangchainDocument(page_content=data)

In [8]:
MARKDOWN_SEPARATORS = [
    "\n#{1,6} ",
    "'''\n",
    "\n\\*\\*\\**\n",
    "\n---+\n",
    "\n___+\n",
    "\n\n",
    "\n",
    " ",
    "",
]

In [9]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [10]:
splitter = RecursiveCharacterTextSplitter(
    separators=MARKDOWN_SEPARATORS,
    chunk_size=1000,
    chunk_overlap=100,
    )

In [11]:
processed_data = splitter.split_documents([raw_database])

In [12]:
processed_data[0]

Document(page_content='4\tCatheterization laboratory events and hospital outcome with direct angioplasty for acute myocardial infarction To assess the safety of direct infarct angioplasty without antecedent thrombolytic therapy, catheterization laboratory and hospital events were assessed in consecutively treated patients with infarctions involving the left anterior descending (n = 100 patients), right (n = 100), and circumflex (n = 50) coronary arteries. The groups of patients were similar for age (left anterior descending coronary artery, 59 years; right coronary artery, 58 years; circumflex coronary artery, 62 years), patients with multivessel disease (left anterior descending coronary artery, 55%; right coronary artery, 55%; circumflex coronary artery, 64%), and patients with initial grade 0/1 antegrade flow (left anterior descending coronary artery, 79%; right coronary artery, 84%; circumflex coronary artery, 90%). Cardiogenic shock was present in eight patients with infarction of

In [13]:
!pip install langchain_community
!pip install sentence_transformers



**Tokenizing/Vectorizing the dataset**

In [14]:
from langchain_community.embeddings import HuggingFaceEmbeddings
model_name = "thenlper/gte-large"

In [15]:
embedding_model = HuggingFaceEmbeddings(
    model_name=model_name,
    multi_process=True,
    model_kwargs={"device": "cpu"},
    encode_kwargs={"normalize_embeddings": True}, #Set 'True' for cosine singularity
)

  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [16]:
sample_vector = embedding_model.embed_query("Hello World")
print(len(sample_vector))

1024


In [17]:
!pip install pinecone-client



In [18]:
from pinecone import Pinecone

pc = Pinecone(api_key="3e61f584-0a90-44cc-bdac-11f8feffd8fb")
index = pc.Index("lab-rag-index")

In [19]:
index_description = index.describe_index_stats()
index_dimension = 1024

data_to_add = []

for i, entry in tqdm(enumerate(processed_data[:5])):
    text = entry.page_content
    vector = embedding_model.embed_query(text)


    data_to_add.append({
        "id": "vec_{}".format(i),
        "values": vector,
        "metadata": {"text": text}
    })

0it [00:00, ?it/s]

In [20]:
index.upsert(data_to_add, namespace="ns1")

{'upserted_count': 5}

**Loading a LLM**

In [21]:
from transformers import pipeline
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_name = "HuggingFaceH4/zephyr-7b-beta"
#Zephyr is a series of language models that are trained to act as helpful assistants.

In [22]:
!pip uninstall bitsandbytes -y
!pip uninstall accelerate -y
!pip install bitsandbytes
!pip install accelerate


Found existing installation: bitsandbytes 0.43.3
Uninstalling bitsandbytes-0.43.3:
  Successfully uninstalled bitsandbytes-0.43.3
Found existing installation: accelerate 0.33.0
Uninstalling accelerate-0.33.0:
  Successfully uninstalled accelerate-0.33.0
Collecting bitsandbytes
  Using cached bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Using cached bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl (137.5 MB)
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.43.3
Collecting accelerate
  Using cached accelerate-0.33.0-py3-none-any.whl.metadata (18 kB)
Using cached accelerate-0.33.0-py3-none-any.whl (315 kB)
Installing collected packages: accelerate
Successfully installed accelerate-0.33.0


In [23]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(model_name)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]

model-00001-of-00008.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

model-00002-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00003-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00004-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00005-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00006-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00007-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00008-of-00008.safetensors:   0%|          | 0.00/816M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

In [24]:
llm_model = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=0.2,
    max_new_tokens=500
)

In [25]:
llm_model("Hey what's up?")

[{'generated_text': "Hey what's up?\n\nJASON:\n(smiling) Hey, Sarah. How's it going?\n\nSARAH:\n(smiling back) It's going well, thanks. How about you?\n\nJASON:\n(grinning) Same here. Listen, Sarah, I know we've been friends for a long time, but I've been thinking about you a lot lately. I was wondering if you'd like to go out with me sometime?\n\nSARAH:\n(surprised but happy) Really? I'd like that a lot, Jason.\n\nJASON:\n(smiling) Great! How about dinner next Friday at that new Italian place downtown?\n\nSARAH:\n(smiling back) That sounds perfect.\n\nJASON:\n(grinning) I'll see you then, Sarah.\n\nSARAH:\n(smiling) See you then, Jason.\n\n(Jason and Sarah exit the coffee shop, both looking happy and excited.)\n\nINT. ITALIAN RESTAURANT - NEXT FRIDAY\n\n(Jason and Sarah are sitting across from each other at a candlelit table, enjoying a delicious meal.)\n\nJASON:\n(smiling) This place is amazing, isn't it?\n\nSARAH:\n(smiling back) It really is. Thank you for bringing me here, Jason.\

**Prompting the model**

In [26]:
prompt = """
<|system|>
You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
Respond only to the question asked.

<|user|>
Context:
{}
---
Here is the question you need to answer.

Question: {}
<|assistant|>
"""

In [27]:
user_input = input("User: ")

vectorized_input = embedding_model.embed_query(user_input)

context = index.query(
    namespace="ns1",
    vector=vectorized_input,
    top_k=1,
    include_metadata=True
)

answer = llm_model(prompt.format(context['matches'][0]['metadata']['text'], user_input))

print("AI response: ", answer[0]['generated_text'])

User: tell me about fiberoptic bronchoscopty
AI response:  
<|system|>
You are a helpful assistant that answers on medical questions based on the real information provided from different sources and in the context.
Give the rational and well written response. If you don't have proper info in the context, answer "I don't know"
Respond only to the question asked.

<|user|>
Context:
rectosigmoid hyperplastic polyps had the same risk for additional proximal adenomas as patients with rectosigmoid adenomatous polyps.
---
Here is the question you need to answer.

Question: tell me about fiberoptic bronchoscopty
<|assistant|>
Fiberoptic bronchoscopy is a medical procedure that allows healthcare providers to examine the airways inside the lungs using a flexible tube with a small camera attached to the end. This procedure is commonly used to diagnose and monitor conditions affecting the respiratory system, such as lung cancer, infections, and airway diseases like asthma and chronic obstructive p