# Installing dependencies

In [None]:
!pip install faiss-cpu langchain sentence-transformers
!pip install -U langchain-community
!pip install --upgrade huggingface_hub
!pip install --upgrade sentence-transformers
!pip install --upgrade bitsandbytes
!pip install --upgrade transformers accelerate
!pip install --upgrade transformers bitsandbytes accelerate sentence-transformers
!pip uninstall langchain langchain-core pydantic -y
!pip install langchain==0.1.14 langchain-core==0.1.37 pydantic==1.10.13
!pip install langchain-openai==0.0.8
!pip show langchain
!pip show langchain-core
!pip show pydantic

# Knowledge Graphs (diagnostic_kg) → JSON files containing diagnostic pathways and medical knowledge.


In [1]:
import json
import os
kg_path = "/kaggle/input/a5-data/mimic-iv-ext-direct-1.0.0/diagnostic_kg/Diagnosis_flowchart"
knowledge_graphs = {}
for file in os.listdir(kg_path):
    if file.endswith(".json"):
        with open(os.path.join(kg_path, file), "r", encoding="utf-8") as f:
            data = json.load(f)
        diagnostic_steps = data.get("diagnostic", {})
        knowledge_info = data.get("knowledge", {})
        knowledge_text = ""
        for step, details in knowledge_info.items():
            if isinstance(details, dict):  
                for key, value in details.items():
                    knowledge_text += f"{step} - {key}: {value}\n"
            else:  
                knowledge_text += f"{step}: {details}\n"
        knowledge_graphs[file] = knowledge_text

# Annotated Clinical Notes (samples) → JSON files with real patient records and step-by-step diagnoses.

In [2]:
import os
import json

# Path to clinical notes
sample_path = "/kaggle/input/a5-data/mimic-iv-ext-direct-1.0.0/samples/Finished"

# Dictionary to store clinical notes
clinical_notes = {}

# Loop through disease categories inside 'Finished'
for disease_category in os.listdir(sample_path):
    disease_path = os.path.join(sample_path, disease_category)
    
    if os.path.isdir(disease_path):  # Ensure it's a folder
        clinical_notes[disease_category] = {}
        
        # Loop through subcategories inside each disease category
        for subcategory in os.listdir(disease_path):
            subcategory_path = os.path.join(disease_path, subcategory)
            
            if os.path.isdir(subcategory_path):  # Ensure it's a folder
                clinical_notes[disease_category][subcategory] = []
                
                # Traverse JSON files inside subcategory
                for file in os.listdir(subcategory_path):
                    if file.endswith(".json"):
                        file_path = os.path.join(subcategory_path, file)
                        
                        with open(file_path, "r", encoding="utf-8") as f:
                            data = json.load(f)
                        
                        # Extract useful fields
                        note_text = ""
                        for key, value in data.items():
                            note_text += f"{key}: {value}\n"
                        
                        # Store extracted text
                        clinical_notes[disease_category][subcategory].append(note_text)

# Now 'clinical_notes' contains structured data organized by disease category and subcategory


# Cleaning Text

In [3]:
import re

def clean_text(text):
    """
    Cleans and normalizes medical text.
    - Removes special characters
    - Normalizes spacing
    - Converts lists into readable format
    """
    text = re.sub(r'\n+', '\n', text)  # Remove extra new lines
    text = re.sub(r'[^\w\s.;,]', '', text)  # Remove special characters
    text = text.strip()
    return text

def clean_nested_data(data):
    """
    Recursively applies cleaning to nested data (text content).
    """
    if isinstance(data, str):  # If the data is a string, clean it
        return clean_text(data)
    
    if isinstance(data, dict):  # If the data is a dictionary, apply cleaning recursively
        return {key: clean_nested_data(value) for key, value in data.items()}
    
    if isinstance(data, list):  # If the data is a list, apply cleaning recursively to each item
        return [clean_nested_data(item) for item in data]
    
    return data  # If it's neither string, dict, nor list, return as is

# Apply cleaning to both knowledge graphs and clinical notes
knowledge_graphs = {key: clean_text(value) for key, value in knowledge_graphs.items()}
clinical_notes = clean_nested_data(clinical_notes)  # Apply the recursive cleaning for nested structure


# Setting Format for Data Retrieval

In [4]:
from langchain.docstore.document import Document

# Prepare documents for retrieval
documents = []

# Convert Knowledge Graphs
for file, content in knowledge_graphs.items():
    documents.append(Document(page_content=content, metadata={"source": file, "type": "knowledge_graph"}))

# Convert Clinical Notes (Handling Nested Structure)
for disease_category, subcategories in clinical_notes.items():
    for subcategory, notes in subcategories.items():
        for idx, note in enumerate(notes):  # Each note is a separate document
            documents.append(Document(
                page_content=note,
                metadata={
                    "source": f"{disease_category}/{subcategory}/note_{idx+1}",
                    "type": "clinical_note",
                    "disease_category": disease_category,
                    "subcategory": subcategory
                }
            ))


# Storing Data in Vector form

In [9]:
import torch
from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
from langchain_community.embeddings import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer

# Verify CUDA and GPU
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"GPU Device: {torch.cuda.get_device_name(0)}")

# Configure 4-bit quantization
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
)

# Load the model manually with quantization
model_name = "sentence-transformers/all-MiniLM-L6-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="auto")

# Wrap it into SentenceTransformer
embedding_model = SentenceTransformer(model_name)
embedding_model._first_module().auto_model = model  # Inject the quantized model

# Initialize embeddings
embedding = HuggingFaceEmbeddings(model_name=model_name)

print("Model loaded successfully!")


CUDA available: True
GPU Device: Tesla T4


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  embedding = HuggingFaceEmbeddings(model_name=model_name)


Model loaded successfully!


# Converting Documents in Embeddings

In [10]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document

# Initialize the embedding model with BERT base uncased
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")  # 384-dim model

# Convert dictionaries to LangChain Documents
documents = []

# Knowledge Graphs
for file, content in knowledge_graphs.items():
    documents.append(Document(page_content=content, metadata={"source": file, "type": "knowledge_graph"}))

# Clinical Notes
for disease, subcategories in clinical_notes.items():
    for subcategory, notes in subcategories.items():
        for note in notes:
            documents.append(Document(page_content=note, metadata={"source": disease, "subcategory": subcategory, "type": "clinical_note"}))
faiss_index = FAISS.from_documents(documents, embedding_model)
faiss_index.save_local("faiss_index")


In [11]:
vectorstore = FAISS.load_local("faiss_index", embedding_model, allow_dangerous_deserialization=True)

# Using Top-K for results

# Load the FAISS Vector Store

In [14]:
from huggingface_hub import login
login(token="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")


In [None]:
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import pipeline

In [34]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI()
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import pipeline

PydanticImportError: `pydantic.errors:ConfigError` has been removed in V2.

For further information visit https://errors.pydantic.dev/2.11/u/import-error

In [35]:
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.load_local("faiss_index", embedding_model, allow_dangerous_deserialization=True)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [41]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
import torch
import bitsandbytes as bnb

# Verify CUDA is available
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"BitsAndBytes version: {bnb.__version__}")

model_name = "mistralai/Mistral-7B-Instruct-v0.2"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

if torch.cuda.is_available():
    # GPU configuration with 4-bit quantization
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        quantization_config=quantization_config,
        trust_remote_code=True
    )
else:
    # If no GPU, try loading with 8-bit quantization or regular loading
    print("No GPU detected. Loading model with basic configuration...")
    try:
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            device_map="auto",
            torch_dtype=torch.float32,
            trust_remote_code=True
        )
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        # If that fails, try with a smaller model
        print("Consider using a smaller model or enabling GPU runtime")
        raise

# Load pipeline
qa_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto"
)

CUDA available: True
CUDA version: 12.1
BitsAndBytes version: 0.45.5


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Device set to use cuda:0


In [45]:
def answer_clinical_query(query):
    docs = retriever.invoke(query)
    context = "\n".join([doc.page_content for doc in docs])
    prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
    
    response = qa_pipeline(prompt, max_new_tokens=256, num_return_sequences=1)
    full_output = response[0]['generated_text']
    
    # Extract only the answer part
    answer = full_output.split("Answer:")[-1].strip()
    return answer

In [46]:
query = "what is CT scan"
response = answer_clinical_query(query)
print(response)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


A CT scan, or computed tomography scan, is a medical imaging test that uses X-rays and computer technology to create detailed images of the body. It can be used to diagnose a wide range of conditions, including pneumonia, cancer, and injuries. CT scans can be performed on various parts of the body, including the chest, abdomen, and head. They can also be used to guide medical procedures, such as biopsies and drainage of abscesses. CT scans are typically performed in a hospital or radiology clinic, and the results are interpreted by a radiologist.


In [47]:
print(query)
print(response)

what is CT scan
A CT scan, or computed tomography scan, is a medical imaging test that uses X-rays and computer technology to create detailed images of the body. It can be used to diagnose a wide range of conditions, including pneumonia, cancer, and injuries. CT scans can be performed on various parts of the body, including the chest, abdomen, and head. They can also be used to guide medical procedures, such as biopsies and drainage of abscesses. CT scans are typically performed in a hospital or radiology clinic, and the results are interpreted by a radiologist.


In [48]:
# Test query
query = "What are the symptoms of heart failure?"
response = answer_clinical_query(query)
print(query)
print(response)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


What are the symptoms of heart failure?
The symptoms of heart failure include shortness of breath, fatigue, reduced exercise tolerance, ankle swelling, and weight gain. Other symptoms may include cough, wheezing, bloated feeling, loss of appetite, confusion, depression, palpitations, dizziness, syncope, and chest pain. Symptoms may vary depending on the type and severity of heart failure.


In [97]:
app_code = """
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
import torch
import bitsandbytes as bnb
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Load your embedding model and vectorstore
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.load_local("faiss_index", embedding_model, allow_dangerous_deserialization=True)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})

# Verify CUDA is available
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"BitsAndBytes version: {bnb.__version__}")

# Load tokenizer and model
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)

if torch.cuda.is_available():
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        quantization_config=quantization_config,
        trust_remote_code=True
    )
else:
    print("No GPU detected. Loading model with basic configuration...")
    try:
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            device_map="auto",
            torch_dtype=torch.float32,
            trust_remote_code=True
        )
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        raise

# Load the QA pipeline
qa_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto"
)

# Define the function to answer clinical queries
def answer_clinical_query(query):
    # Get the top 5 relevant documents from the retriever
    docs = retriever.invoke(query)
    context = "\\n".join([doc.page_content for doc in docs])
    
    # Create the prompt for the language model
    prompt = f"Context:\\n{context}\\n\\nQuestion: {query}\\nAnswer:"
    
    # Generate the response from the QA pipeline
    response = qa_pipeline(prompt, max_new_tokens=256, num_return_sequences=1)
    full_output = response[0]['generated_text']
    
    # Extract only the answer part from the response
    answer = full_output.split("Answer:")[-1].strip()
    return answer

# Streamlit app code
st.title("Clinical Query Answering System")

query = st.text_input("Enter your clinical query:")

if query:
    with st.spinner("Processing your query..."):
        response = answer_clinical_query(query)
        st.subheader("Answer:")
        st.write(response)

st.markdown("This tool uses clinical context to answer your queries.")
"""

with open("/kaggle/working/app.py", "w") as f:
    f.write(app_code)


In [98]:
!pip install streamlit --quiet
!streamlit run /kaggle/working/app.py & npx localtunnel --port 8501 --password mypassword123




Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[1G[0K⠹[1G[0K⠸[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.19.2.2:8501[0m
[34m  External URL: [0m[1mhttp://34.41.193.90:8501[0m
[0m
[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0Kyour url is: https://twelve-coats-move.loca.lt
2025-04-15 21:00:32.600625: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-15 21:00:32.643005: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-15 21:00:32.655242: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attem

In [86]:
!curl https://loca.lt/mytunnelpassword


34.41.193.90

In [103]:
app_code = """
import streamlit as st

# Dummy placeholder setup (replace with actual retriever and qa_pipeline)
class DummyDoc:
    def __init__(self, page_content):
        self.page_content = page_content

# Simulate the retriever and qa_pipeline for testing
def retriever(query):
    # Return a list of DummyDocs to simulate retrieved documents
    return [DummyDoc("CT scan is an imaging technique that uses X-rays to create images of the body.")]

def qa_pipeline(prompt, max_new_tokens=256, num_return_sequences=1):
    # Simulate a response from the model
    return [{"generated_text": prompt + " A CT scan, or computed tomography scan, is a medical imaging test that uses X-rays and computer technology to create detailed images of the body. It can be used to diagnose a wide range of conditions, including pneumonia, cancer, and injuries. CT scans can be performed on various parts of the body, including the chest, abdomen, and head. They can also be used to guide medical procedures, such as biopsies and drainage of abscesses. CT scans are typically performed in a hospital or radiology clinic, and the results are interpreted by a radiologist."}]

def answer_clinical_query(query):
    docs = retriever(query)
    context = "\\n".join([doc.page_content for doc in docs])
    prompt = f"Context:\\n{context}\\n\\nQuestion: {query}\\nAnswer:"
    
    response = qa_pipeline(prompt, max_new_tokens=256, num_return_sequences=1)
    full_output = response[0]['generated_text']
    
    # Extract only the answer part
    answer = full_output.split("Answer:")[-1].strip()
    return answer

st.title("Clinical Query Answering System")

query = st.text_input("Enter your clinical query:")

if query:
    with st.spinner("Processing your query..."):
        response = answer_clinical_query(query)
        st.subheader("Answer:")
        st.write(response)

st.markdown("This tool uses clinical context to answer your queries.")
"""

with open("/kaggle/working/app.py", "w") as f:
    f.write(app_code)


In [None]:
!pip install streamlit --quiet
!streamlit run /kaggle/working/app.py & npx localtunnel --port 8501 --password mypassword123



Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[1G[0K⠹[1G[0K⠸[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.19.2.2:8501[0m
[34m  External URL: [0m[1mhttp://34.41.193.90:8501[0m
[0m
[1G[0K⠼[1G[0K⠴[1G[0Kyour url is: https://wide-rooms-take.loca.lt
