# Chatbot + RAG (MIMIC TextBooks Context Retrieval)

## Setup and Dependencies

This section installs the necessary libraries.

In [None]:
!pip install requests tqdm faiss-cpu transformers tensorflow sentence-transformers textblob gensim numba

Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-3.2.1-py3-none-any.whl.metadata (10 kB)
Downloading faiss_cpu-1.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m68.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sentence_transformers-3.2.1-py3-none-any.whl (255 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m255.8/255.8 kB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu, sentence-transformers
Successfully installed faiss-cpu-1.9.0 sentence-transformers-3.2.1


## Flask Application (Integration)
This section defines the Flask application that serves as the backend for the chatbot, handling document retrieval and response generation.



In [1]:
%%writefile app.py
from flask import Flask, request, jsonify
import faiss
import numpy as np
import requests
import zipfile
from pathlib import Path
from tqdm import tqdm
import re
from gensim.utils import simple_preprocess
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
import torch
import tensorflow as tf
import gc
from numba import cuda
import json
import os
import logging

# Directory to store downloaded and extracted data
DATA_DIR = Path("./mimic_textbooks")
# Define file path for the saved FAISS index
INDEX_FILE_PATH = "faiss_index.idx"
CHUNKED_DOCS_PATH = "chunked_documents.json"


def save_chunked_documents(documents, file_path=CHUNKED_DOCS_PATH):
    with open(file_path, "w", encoding="utf-8") as f:
        json.dump(documents, f)

# Function to load chunked documents
def load_chunked_documents(file_path=CHUNKED_DOCS_PATH):
    with open(file_path, "r", encoding="utf-8") as f:
        return json.load(f)

# Download and extract the dataset zip file
def download_and_extract_zip(url, extract_to=DATA_DIR):
    extract_to.mkdir(parents=True, exist_ok=True)
    zip_path = extract_to / "textbooks.zip"
    response = requests.get(url, stream=True)
    with open(zip_path, "wb") as file:
        for chunk in tqdm(response.iter_content(chunk_size=1024), unit='KB'):
            if chunk:
                file.write(chunk)
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall(extract_to)

# Text processing functions
def load_text_files(directory):
    texts = []
    for file_path in Path(directory).glob("*.txt"):
        with open(file_path, "r", encoding="utf-8") as file:
            texts.append(file.read())
    return texts

def clean_and_tokenize(text):
    text = re.sub(r'\s+', ' ', text)
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    tokens = simple_preprocess(text)
    return ' '.join(tokens)

def chunk_text(text, chunk_size=200):
    words = text.split()
    return [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]

# Function to generate embeddings for all chunks in batches with GPU acceleration
def get_embeddings_in_batch(texts, batch_size=16):
    all_embeddings = []

    for i in range(0, len(texts), batch_size):
        batch_texts = texts[i:i + batch_size]

        # Tokenize and move batch to GPU
        inputs = retrieval_tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True, max_length=512).to("cuda")

        with torch.no_grad():
            outputs = retrieval_model(**inputs)
            batch_embeddings = torch.mean(outputs.last_hidden_state, dim=1).cpu().numpy()

        all_embeddings.extend(batch_embeddings)
    return np.array(all_embeddings)

# Retrieval function
def get_query_embedding(query):
    inputs = retrieval_tokenizer(query, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = retrieval_model(**inputs)
        embedding = torch.mean(outputs.last_hidden_state, dim=1).cpu().numpy()
    return embedding

def retrieve_documents(query, top_k=5):
    query_embedding = get_query_embedding(query).astype("float32")
    distances, indices = index.search(query_embedding, top_k)
    return [chunked_documents[idx] for idx in indices[0]]

# Generation function
def generate_response(query, context, gpu_device, max_new_tokens=500):
    input_text = f"User query: {query}\n\nContext:\n{context}\n\nAnswer:"
    inputs = generation_tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(gpu_device)
    with torch.no_grad():
        outputs = generation_model.generate(inputs["input_ids"], max_new_tokens=max_new_tokens)
    return generation_tokenizer.decode(outputs[0], skip_special_tokens=True)

# Clear GPU function

def clear_gpu():
  torch.cuda.empty_cache()  # Clear GPU memory from torch
  gc.collect()
  numba_device = cuda.get_current_device() # Clear GPU memory from tf
  numba_device.reset()


# Initialize Flask app
app = Flask(__name__)

print("Clearing GPU...")
clear_gpu()

logging.basicConfig(level=logging.DEBUG)
gpu_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Embedding and FAISS index setup
retrieval_tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
retrieval_model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2").to(gpu_device)

logging.info("Retrieval model in GPU")

# Check if chunked documents exist, otherwise process and save them
if os.path.exists(CHUNKED_DOCS_PATH):
    logging.info("Loading chunked documents...")
    chunked_documents = load_chunked_documents()
else:
    logging.info("Downloading dataset...")

    # Download and extract textbooks
    dataset_url = "https://www.dropbox.com/scl/fi/54p9kkx5n93bffyx08eba/textbooks.zip?rlkey=2y2c5x8y0uncnddichn9cmd7n&st=m290nmkk&dl=1"
    download_and_extract_zip(dataset_url)

    logging.info("Creating chunks...")

    # Load, clean, and chunk documents
    documents = load_text_files(DATA_DIR / "textbooks/en")
    cleaned_documents = [clean_and_tokenize(doc) for doc in documents]
    chunked_documents = []
    for doc in cleaned_documents:
        chunked_documents.extend(chunk_text(doc))
    save_chunked_documents(chunked_documents)

# Check if FAISS index file exists, otherwise create a new one
if os.path.exists(INDEX_FILE_PATH):
    logging.info("Loading FAISS index from disk...")
    index = faiss.read_index(INDEX_FILE_PATH)
else:

    logging.info("Generating embeddings...")

    # Generate embeddings and populate FAISS index
    embeddings = get_embeddings_in_batch(chunked_documents, batch_size=128)
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings.astype("float32"))
    faiss.write_index(index, INDEX_FILE_PATH)

# Clear GPU
# Move model to CPU and clear GPU memory
retrieval_model.to("cpu")
clear_gpu()

logging.info("Retrieval model in CPU, clearing GPU")

# Load the generative model on GPU
generation_tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct", trust_remote_code=True)
generation_model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3.5-mini-instruct", trust_remote_code=True)
generation_model.to(gpu_device)

logging.info("Generation model in GPU")

print("Initialization complete.")

# Flask endpoint
@app.route("/chat", methods=["POST"])
def chat():
    try:
        logging.info("Received request")
        data = request.get_json()
        user_query = data.get("query")
        max_tokens = data.get("max_tokens", 500)

        logging.info("Retrieving documents")
        retrieved_docs = retrieve_documents(user_query)
        retrieved_text = " ".join(retrieved_docs)

        logging.info("Generating response")
        response_text = generate_response(user_query, retrieved_text, gpu_device=gpu_device, max_new_tokens=max_tokens)

        logging.info("Sending response back")
        return jsonify({"response": response_text})
    except Exception as e:
        logging.error(f"Error occurred: {e}")
        return jsonify({"error": "An internal error occurred"}), 500

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000, debug=True)


Writing app.py


## Running the Flask Application

This cell executes the Flask application in the background using `nohup` so that it continues running even after the notebook session ends.

In [None]:
!nohup python app.py &

nohup: appending output to 'nohup.out'


## Clearing GPU Memory

This cell includes commands to clear the GPU memory, which can be helpful after loading large models or performing intensive computations.

In [None]:
from numba import cuda
import gc
import torch

torch.cuda.empty_cache()  # Clear GPU memory from torch
gc.collect()
device = cuda.get_current_device() # Clear GPU memory from tf
device.reset()

## Checking Running Processes

This command checks for processes listening on network ports, which can be useful to verify if the Flask application is running and listening on port 5000.

In [None]:
!sudo lsof -i -P -n | grep LISTEN

node         7 root   21u  IPv6  18071      0t0  TCP *:8080 (LISTEN)
kernel_ma   12 root    3u  IPv4  16839      0t0  TCP 172.28.0.12:6000 (LISTEN)
colab-fil   86 root    3u  IPv4  27706      0t0  TCP 127.0.0.1:3453 (LISTEN)
jupyter-n  131 root    7u  IPv4  18142      0t0  TCP 172.28.0.12:9000 (LISTEN)
python3   1504 root   22u  IPv4  65155      0t0  TCP 127.0.0.1:41875 (LISTEN)
python3   1549 root    3u  IPv4  60986      0t0  TCP 127.0.0.1:40727 (LISTEN)
python3   1549 root    5u  IPv4  60987      0t0  TCP 127.0.0.1:34661 (LISTEN)
python3   4760 root   40u  IPv4 151255      0t0  TCP *:5000 (LISTEN)
python3   4875 root   40u  IPv4 151255      0t0  TCP *:5000 (LISTEN)
python3   4875 root   49u  IPv4 151255      0t0  TCP *:5000 (LISTEN)


## Getting Hostname and IP Address

This cell retrieves the hostname and IP address of the Colab environment, which will be used to connect to the running Flask application.

In [None]:
import requests
from socket import gethostname, gethostbyname

hostname = gethostname()
ip = gethostbyname(hostname)

print(f"Hostname: {hostname}")
print(f"IP Address: {ip}")

Hostname: 7b5277268d83
IP Address: 172.28.0.12


## Chatbot Interaction Function

This cell defines a Python function `ask_chatbot` that sends queries to the running Flask application and retrieves the chatbot's response.

In [None]:
def ask_chatbot(query):
    response = requests.post(f"http://{ip}:5000/chat", json={"query": query, "max_tokens": 1500})
    return response.json()["response"]

## Testing the Chatbot

These cells demonstrate how to use the `ask_chatbot` function to interact with the deployed chatbot and get responses to different queries.

In [None]:
print(ask_chatbot("Which are the vessels that irrigate the leg?"))

User query: What are the causes of heart failure?

Context:
down to six principal mechanisms failure of the pump in the most common situation the cardiac muscle contracts weakly and the chambers cannot empty systolic dysfunction in some cases the muscle cannot relax sufficiently to permit ventricular filling resulting in diastolic dysfunction obstruction to flow lesions that prevent valve opening eg calcific aortic valve stenosis or cause increased ventricular chamber pressures eg systemic hypertension or aortic coarctation can overwork the myocardium which has to pump against the obstruction regurgitant flow valve pathology that allows backward flow of blood results in increased volume workload and may overwhelm the pumping capacity of the affected chambers shunted flow defects congenital or acquired that divert blood inappropriately from one chamber to another or from one vessel to another lead to pressure and volume overloads disorders of cardiac conduction uncoordinated cardiac imp

In [None]:
print(ask_chatbot("What do beta blockers do?"))

User query: What do beta blockers do?

Context:
they are an excellent source of firstline therapy especially for migraine sufferers the original formulation propranolol is highly lipid soluble and contributed to bothersome side effects such as depression sleep disturbances nightmares in the elderly and constipation in higher doses propranolol has relative lack of beta selectivity which promotes other undesirable phenomena formulations such as atenolol are water soluble are beta selective and have fewer side effects than propranolol at higher doses beta effects emerge there is no evidence to support speculation that beta selective agents may be safe for use in individuals who have asthma an advantage of watersoluble agents is longer halflife reduced dosing schedules improve compliance side effects of betablockers include an increase in triglyceride levels and decrease in highdensity lipoprotein hdl cholesterol and blunting of adrenergic release in response to hypoglycemia nsaids may dec

In [None]:
print(ask_chatbot("How do cells of the pancreas look like in the microscope?"))

User query: How do cells of the pancreas look like in the microscope?

Context:
scattered throughout the organ in cell groupings of varying size fig it is estimated that million to million islets constitute about to of the volume of the pancreas but are most numerous in the tail individual islets may contain only few cells or many hundreds of cells plate page their polygonal cells are arranged in short chapter digestive system iii liver gallbladder and pancreas pancr eas figure electron micrograph of the apical cytoplasm of several pancreatic acinar cells one pancreatic acinar cell is outlined by the dashed line nuclei of adjoining cells are evident at the bottom left and right of the electron micrograph the apical cytoplasm contains extensive rough endoplasmic reticulum rer mitochondria secretory granules and golgi profiles at the apices of these cells lumen is present into which the zymogen granules are discharged junctional complex jc is indicated near the lumen irregular cords that

In [None]:
print(ask_chatbot("How do I detect pedophilia and psychosis in a patient?"))

User query: How do I detect pedophilia and psychosis in a patient?

Context:
sexual behavior temperamental there appears to be an interaction between pedophilia and antisocial ity such that males with both traits are more likely to act out sexually with children thus males with pedophilia environmental adult males with pedophilia often report that they were sexually abused as children it is unclear however whether this correlation reects causal inuence of childhood sexual abuse on adult pedophilia genetic and physiological since pedophilia is necessary condition for pedophilic dis order any factor that increases the probability of pedophilia also increases the risk of pe dophilic disorder there is some evidence that perturbation in utero increases the probability of development of pedophilic orientation laboratory measures of sexual interest which are sometimes useful in di agnosing pedophilic disorder in males are not necessarily useful in diagnosing this disorder in females even when

In [None]:
print(ask_chatbot("How do I treat and what medication should I give to someone with bipolar disorder? Please list example medications"))

User query: How do I treat and what medication should I give to someone with bipolar disorder? Please list example medications

Context:
with mixed bipolar disorder in which depressive and manic manifestations occur within single episode of illness perhaps the most marked change in the treatment of bipolar disease is to initiate one of the approved antipsychotic medications quetiapine fluoxetine rather than lithium to bring both the depression and episodic cycling into mania under control failing this mood stabilizing drug such as lamotrigine or divalproex has been used as mentioned earlier there is scant evidence that these approaches are superior to lithium the point to be made is that the use of conventional antidepressants is currently less popular because of the risk of worsening depression to the point of suicidal state or of inducing mania other combinations have been used such as olanzapine with serotonergic antidepressant eg fluoxetine these are summarized in the review by fry

In [None]:
print(ask_chatbot("Which are the vessels that irrigate the leg?"))

User query: Which are the vessels that irrigate the leg?

Context:
its location the levator ani nerve is susceptible to injury through parturition and pelvic surgery such as during sacrospinous or iliococcygeus vaginal vault suspensions figure the ligaments and fascial support of the pelvic viscera the muscles of the urogenital diaphragm anteriorly reinforce the pelvic diaphragm and are intimately related to the vagina and the urethra they are enclosed between the inferior and superior fascia of the urogenital diaphragm the muscles include the deep transverse perineal and sphincter urethrae table blood vessels the pelvic blood vessels supply genital structures as well as the following urinary and tracts muscles of the abdominal wall pelvic oor and perineum buttocks and upper thighs fasciae other connective tissue and bones skin and other superficial structures classically vessels supplying organs are known as visceral vessels and those supplying supporting structures are called parieta