In [1]:
!pip install datasets requests tqdm faiss-cpu transformers tensorflow sentence-transformers textblob gensim numba



In [2]:
# Import necessary libraries
import os
import json
import faiss
import numpy as np
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
import torch
from tqdm import tqdm
import re
from gensim.utils import simple_preprocess
from numba import cuda
import gc

# Define file paths for saving data
CHUNKED_DOCS_PATH = "pubmedqa_chunked_documents.json"
INDEX_FILE_PATH = "faiss_index_pubmedqa.idx"
gpu_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.environ["CUDA_LAUNCH_BLOCKING"] = str(1)
retrieval_device_inference = "cpu"
# Clear GPU function

def clear_gpu():
  if torch.cuda.is_available():
    torch.cuda.empty_cache()  # Clear GPU memory from torch
    numba_device = cuda.get_current_device() # Clear GPU memory from tf
    numba_device.reset()
  gc.collect()
clear_gpu()

In [3]:
!env

SHELL=/bin/bash
NV_LIBCUBLAS_VERSION=12.2.5.6-1
NVIDIA_VISIBLE_DEVICES=all
COLAB_JUPYTER_TRANSPORT=ipc
NV_NVML_DEV_VERSION=12.2.140-1
NV_CUDNN_PACKAGE_NAME=libcudnn8
CGROUP_MEMORY_EVENTS=/sys/fs/cgroup/memory.events /var/colab/cgroup/jupyter-children/memory.events
NV_LIBNCCL_DEV_PACKAGE=libnccl-dev=2.19.3-1+cuda12.2
NV_LIBNCCL_DEV_PACKAGE_VERSION=2.19.3-1
VM_GCE_METADATA_HOST=169.254.169.253
HOSTNAME=e640242a87cf
LANGUAGE=en_US
TBE_RUNTIME_ADDR=172.28.0.1:8011
COLAB_TPU_1VM=
GCE_METADATA_TIMEOUT=3
NVIDIA_REQUIRE_CUDA=cuda>=12.2 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 brand=tesla,driver>=525,driver<526 brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=5

In [4]:

# Load the PubMedQA dataset
def load_pubmedqa():
    dataset = load_dataset("pubmed_qa", "pqa_unlabeled")
    return dataset["train"]

# Text processing and chunking functions
def clean_and_tokenize(text):
    text = re.sub(r'\s+', ' ', text)
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    tokens = simple_preprocess(text)
    return ' '.join(tokens)

def chunk_text(text, chunk_size=200):
    words = text.split()
    return [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]

# Process the dataset and save chunks
def process_and_save_documents(dataset):
    chunked_documents = []
    for example in dataset:
        question = example["question"]
        answer = example["long_answer"]
        for context_segment, label in zip(example["context"]["contexts"], example["context"]["labels"]):
            cleaned_context = clean_and_tokenize(context_segment)
            chunks = chunk_text(cleaned_context)
            for chunk in chunks:
                doc_entry = {
                    "question": question,
                    "context_chunk": chunk,
                    "label": label,
                    "answer": answer
                }
                chunked_documents.append(doc_entry)
    # Save processed chunks to a file for easy reloading
    with open(CHUNKED_DOCS_PATH, "w", encoding="utf-8") as f:
        json.dump(chunked_documents, f)
    return chunked_documents

clear_gpu()

# Load chunked documents if they exist
if os.path.exists(CHUNKED_DOCS_PATH):
    with open(CHUNKED_DOCS_PATH, "r", encoding="utf-8") as f:
        chunked_documents = json.load(f)
else:
    dataset = load_pubmedqa()
    chunked_documents = process_and_save_documents(dataset)


In [5]:

# Embedding model and FAISS index setup
retrieval_tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
retrieval_model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
retrieval_model.to(gpu_device)

# Generate embeddings for all chunks in batches and build the FAISS index
def build_faiss_index(documents, batch_size=16):
    all_embeddings = []
    for i in tqdm(range(0, len(documents), batch_size), desc="Embedding chunks"):
        batch_texts = [doc["context_chunk"] for doc in documents[i:i + batch_size]]
        inputs = retrieval_tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True, max_length=512).to(gpu_device)
        with torch.no_grad():
            outputs = retrieval_model(**inputs)
            batch_embeddings = torch.mean(outputs.last_hidden_state, dim=1).cpu().numpy()
        all_embeddings.extend(batch_embeddings)
    embedding_matrix = np.array(all_embeddings).astype("float32")

    # Build and save the FAISS index
    dimension = embedding_matrix.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embedding_matrix)
    faiss.write_index(index, INDEX_FILE_PATH)
    return index

# Load FAISS index if exists, otherwise build it
if os.path.exists(INDEX_FILE_PATH):
    index = faiss.read_index(INDEX_FILE_PATH)
else:
    index = build_faiss_index(chunked_documents)

# Function to retrieve relevant documents
def get_query_embedding(query, device):
    inputs = retrieval_tokenizer(query, return_tensors="pt", padding=True, truncation=True).to(device)
    with torch.no_grad():
        outputs = retrieval_model(**inputs)
        embedding = torch.mean(outputs.last_hidden_state, dim=1).cpu().numpy()
    return embedding

def retrieve_documents(query, device="cpu", top_k=5):
    query_embedding = get_query_embedding(query, device).astype("float32")
    distances, indices = index.search(query_embedding, top_k)
    retrieved_docs = [chunked_documents[idx] for idx in indices[0]]
    return retrieved_docs

retrieval_model.to(retrieval_device_inference)
clear_gpu()



In [6]:

# Generative model for response generation
generation_tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct", trust_remote_code=True)
generation_model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3.5-mini-instruct", trust_remote_code=True)
# Move the model to the GPU
generation_model.to(gpu_device)

# Function to generate a response using retrieved context
def generate_response(query, retrieved_docs, max_new_tokens=100):
    context = " ".join([doc["context_chunk"] for doc in retrieved_docs])
    input_text = f"User query: {query}\n\nContext:\n{context}\n\nAnswer:"
    inputs = generation_tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(gpu_device)
    with torch.no_grad():
        outputs = generation_model.generate(inputs["input_ids"], max_new_tokens=max_new_tokens)
    response = generation_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Chatbot function combining retrieval and generation
def chatbot(query, max_tokens=100):
    retrieved_docs = retrieve_documents(query, device=retrieval_device_inference)
    response = generate_response(query, retrieved_docs, max_new_tokens=max_tokens)
    return response




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
query = "Is naturopathy as effective as conventional therapy for treatment of menopausal symptoms?"
print("User query:", query)
print("Chatbot response:", chatbot(query))


User query: Is naturopathy as effective as conventional therapy for treatment of menopausal symptoms?


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.


Chatbot response: User query: Is naturopathy as effective as conventional therapy for treatment of menopausal symptoms?

Context:
in univariate analyses patients treated with naturopathy for menopausal symptoms reported higher monthly incomes versus were less likely to be smokers versus exercised more frequently and reported higher frequencies of decreased energy versus insomnia versus and hot flashes versus at baseline than those who received conventional treatment in multivariate analyses patients treated with naturopathy were approximately seven times more likely than conventionally treated patients to report improvement for insomnia odds ratio or confidence interval ci and decreased energy or ci naturopathy patients reported improvement for anxiety or ci hot flashes or ci menstrual changes or ci and vaginal dryness or ci about as frequently as patients who were treated conventionally although the use of alternative medicine in the united states is increasing no published studies ha