### Dependencies

In [1]:
# Install essential libraries
!pip install -q transformers huggingface_hub networkx rdflib datasets
!pip install -q bitsandbytes
!pip install -q sentence-transformers
!pip install -q --upgrade transformers
!pip install -q "qwen-vl-utils[decord]==0.0.8"

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/565.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m563.2/565.1 kB[0m [31m21.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m565.1/565.1 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/491.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/116.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.9/183.9 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━

### Datasets

In [2]:
# Step 1: Save your kaggle.json inside the notebook
import json
import os

# Create the .kaggle directory
os.makedirs('/root/.kaggle', exist_ok=True)

# Write your kaggle token
kaggle_token = {
    "username": "aranyasaha",
    "key": "e6edd39652ec8bf7a896420248e50803"
}

with open('/root/.kaggle/kaggle.json', 'w') as file:
    json.dump(kaggle_token, file)

# Set file permissions
os.chmod('/root/.kaggle/kaggle.json', 600)

print("✅ Kaggle API token is set up!")

# Step 2: Install kaggle library
!pip install -q kaggle

# Step 3: Authenticate Kaggle API
from kaggle.api.kaggle_api_extended import KaggleApi

api = KaggleApi()
api.authenticate()

print("✅ Kaggle API authenticated!")

# Step 4: Download and unzip datasets

# Download the first dataset
print("⬇️ Downloading zxzzzzzzzzzzzzzz...")
api.dataset_download_files('chapkhabo/zxzzzzzzzzzzzzzz', path='./', unzip=True)
print("✅ First dataset downloaded and unzipped!")

# Create a 'dataset' folder if it doesn't exist
if not os.path.exists('dataset'):
    os.mkdir('dataset')

# Download the second dataset inside 'dataset'
print("⬇️ Downloading selective-dermnet-for-llm into dataset/...")
api.dataset_download_files('aranyasaha/selective-dermnet-for-llm', path='./dataset', unzip=True)
print("✅ Second dataset downloaded and unzipped!")

✅ Kaggle API token is set up!
✅ Kaggle API authenticated!
⬇️ Downloading zxzzzzzzzzzzzzzz...
Dataset URL: https://www.kaggle.com/datasets/chapkhabo/zxzzzzzzzzzzzzzz
✅ First dataset downloaded and unzipped!
⬇️ Downloading selective-dermnet-for-llm into dataset/...
Dataset URL: https://www.kaggle.com/datasets/aranyasaha/selective-dermnet-for-llm
✅ Second dataset downloaded and unzipped!


In [3]:
%cd /content

/content


In [4]:
import kagglehub

# Download latest version
path = kagglehub.model_download("aranyasaha/dino-model-trained-on-dermnet/pyTorch/default")

print("Path to model files:", path)

Path to model files: /kaggle/input/dino-model-trained-on-dermnet/pytorch/default/1


### Import Libraries

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
from rdflib import Graph, Literal, RDF, RDFS
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from datasets import load_dataset
from huggingface_hub import hf_hub_download

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

### DINO Model as Auxiliary Classifier

In [7]:
dinov2_vits14 = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')

Downloading: "https://github.com/facebookresearch/dinov2/zipball/main" to /root/.cache/torch/hub/main.zip
Downloading: "https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth" to /root/.cache/torch/hub/checkpoints/dinov2_vits14_pretrain.pth
100%|██████████| 84.2M/84.2M [00:00<00:00, 130MB/s]


In [8]:
class DinoVisionTransformerClassifier(nn.Module):
    def __init__(self):
        super(DinoVisionTransformerClassifier, self).__init__()
        self.transformer = dinov2_vits14
        self.classifier = nn.Sequential(
            nn.Linear(384, 256),
            nn.ReLU(),
            nn.Linear(256, 8)
        )

    def forward(self, x):
        x = self.transformer(x)
        x = self.transformer.norm(x)
        x = self.classifier(x)
        return x

In [9]:
model1 = DinoVisionTransformerClassifier()

In [10]:
model1.load_state_dict(torch.load('/kaggle/input/dino-model-trained-on-dermnet/pytorch/default/1/best_model.pth', weights_only=True))

<All keys matched successfully>

In [11]:
model1.to('cuda')

DinoVisionTransformerClassifier(
  (transformer): DinoVisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 384, kernel_size=(14, 14), stride=(14, 14))
      (norm): Identity()
    )
    (blocks): ModuleList(
      (0-11): 12 x NestedTensorBlock(
        (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
        (attn): MemEffAttention(
          (qkv): Linear(in_features=384, out_features=1152, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=384, out_features=384, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): LayerScale()
        (drop_path1): Identity()
        (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=384, out_features=1536, bias=True)
          (act): GELU(approximate='none')
          (fc2): Linear(in_features=1536, out_features=384, bias=True)
          (drop): Dropout(p=0.0, in

### Vision Language Model

In [12]:
import torch
from transformers import (
    Qwen2_5_VLForConditionalGeneration,
    Qwen2_5_VLProcessor,
    BitsAndBytesConfig
)

# Model ID
model_id = "Aranya31/DermQwen-7b-adapter"
or_model_id = "Qwen/Qwen2.5-VL-7B-Instruct"

# BitsAndBytesConfig for int-4 quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load model with quantization and optimized memory usage
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    torch_dtype=torch.bfloat16,
    device_map={"": device},  # Ensure all layers are mapped to cuda:0
    quantization_config=bnb_config
).to(device)

# Load processor
processor = Qwen2_5_VLProcessor.from_pretrained(or_model_id)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


adapter_config.json:   0%|          | 0.00/799 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/57.6k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.69G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/214 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/47.7M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


tokenizer_config.json:   0%|          | 0.00/5.70k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

In [14]:
model.to('cuda')

Qwen2_5_VLForConditionalGeneration(
  (visual): Qwen2_5_VisionTransformerPretrainedModel(
    (patch_embed): Qwen2_5_VisionPatchEmbed(
      (proj): Conv3d(3, 1280, kernel_size=(2, 14, 14), stride=(2, 14, 14), bias=False)
    )
    (rotary_pos_emb): Qwen2_5_VisionRotaryEmbedding()
    (blocks): ModuleList(
      (0-31): 32 x Qwen2_5_VLVisionBlock(
        (norm1): Qwen2RMSNorm((1280,), eps=1e-06)
        (norm2): Qwen2RMSNorm((1280,), eps=1e-06)
        (attn): Qwen2_5_VLVisionSdpaAttention(
          (qkv): Linear4bit(in_features=1280, out_features=3840, bias=True)
          (proj): Linear4bit(in_features=1280, out_features=1280, bias=True)
        )
        (mlp): Qwen2_5_VLMLP(
          (gate_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=1280, out_features=3420, bias=True)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_featur

### Knowledge Graph Retriever

In [15]:
from sentence_transformers import SentenceTransformer, util
G_loaded = nx.read_graphml("/content/knowledge_graph.graphml")

# Load a retrieval model for RAG
retrieval_model = SentenceTransformer("all-MiniLM-L6-v2")

# Convert KG nodes to embeddings
entity_embeddings = {node: retrieval_model.encode(node) for node in G_loaded.nodes}

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

### Necessary Functions

In [16]:
import torch
from PIL import Image
from torchvision import transforms
import torch.nn.functional as F
import os
import csv
import re

# List of skin disease labels
disease_labels = [
    'actinic keratosis',
    'basal cell carcinoma',
    'dermatitis',
    'lichen planus',
    'melanoma',
    'psoriasis',
    'rosacea',
    'Seborrheic keratosis'
]

# Preprocessing function
def preprocess_image(image):
    preprocess = transforms.Compose([
        transforms.Resize((336, 336)),
        transforms.ToTensor(),
    ])
    # image = Image.open(image_path)
    return preprocess(image).unsqueeze(0)

# Function to perform inference and return the disease name and probability
def predict_skin_disease(model, image_path):
    image = Image.open(image_path)
    input_tensor = preprocess_image(image).to('cuda')

    model.eval()
    with torch.no_grad():
        output = model(input_tensor)

    probabilities = F.softmax(output, dim=1)
    predicted_index = torch.argmax(probabilities, dim=1).item()

    predicted_probability = probabilities[0, predicted_index].item()
    predicted_disease = disease_labels[predicted_index]

    return predicted_probability, predicted_disease

# Extract Assistant's response
def extract_response(text):
    match = re.search(r"ASSISTANT:\s*(.*)", text, re.DOTALL)
    return match.group(1).strip() if match else None

def extract_after_second_assistant(text):
    # Find all occurrences of "assistant"
    split_text = text.split('assistant')
    if len(split_text) > 2:
        # Join everything after the second "assistant"
        return 'assistant'.join(split_text[2:]).strip()
    else:
        return None

In [17]:
import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForCausalLM

# Define the conversation
conversation = [
    {
        "role": "user",
        "content": [
            {"type": "text", "text": "What is this?"},
            {"type": "image"},
        ],
    },
]

# Apply chat template
prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

# Load the image
image_path = "/content/dataset/test_merged_selective_resized/test_merged_selective_resized/known_39/Psoriasis-Guttate-45.jpg"
raw_image = Image.open(image_path)

# Prepare inputs
device = "cuda" if torch.cuda.is_available() else "cpu"
inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to(device)

# Generate output
output = model.generate(**inputs, max_new_tokens=200, do_sample=False)

# Decode and print response
response = processor.tokenizer.decode(output[0], skip_special_tokens=True)
print(response)



system
You are a helpful assistant.
user
What is this?
assistant
This is an image of psoriasis.


In [44]:
import torch
from sentence_transformers import util
from PIL import Image
from transformers import AutoProcessor, AutoModelForCausalLM

def rag_query(query, retrieval_model, entity_embeddings, image_path, graph, threshold, model, processor, model1):
    # Encode the query
    query_embedding = retrieval_model.encode(query)

    # Find the best matching entity from the KG
    best_match = max(entity_embeddings.items(), key=lambda item: util.cos_sim(query_embedding, item[1]))
    best_entity = best_match[0]

    # Retrieve relations associated with the best entity
    related_entities = [(target, data['relation']) for target, data in graph[best_entity].items()]
    relation_text = " ".join([f"{best_entity} -({relation})-> {target}" for target, relation in related_entities])

    # Construct system prompt
    system_prompt = "You are a helpful AI assistant for medical information and recommendations."
    prompt_text = "What is the name of the disease?"

    # Load image
    raw_image = Image.open(image_path)

    # Ensure correct message format
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "image"},
                {"type": "text", "text": prompt_text},
            ],
        }
    ]

    # Apply chat template
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    device = "cuda" if torch.cuda.is_available() else "cpu"
    inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to(device)

    # Generate output
    output = model.generate(**inputs, max_new_tokens=64, do_sample=False)

    disease_name = processor.tokenizer.decode(output[0], skip_special_tokens=True)

    text_lower = disease_name.lower()

    # Match disease name
    matched_disease = None
    for disease in disease_labels:
        if disease.lower() in text_lower:
            matched_disease = disease
            break

    disease_label = matched_disease

    # Predict skin disease
    probability, predicted_disease = predict_skin_disease(model1, image_path)

    if probability > threshold:
        additional_context = f"The name of the disease is {predicted_disease}"
        source = "Auxiliary Classifier"
    else:
        additional_context = f'The name of the disease is {disease_label}'
        source = "Vision Language Model"

    # Construct detailed prompt
    prompt_text = (
        f"Using knowledge about {best_entity} and its relations ({relation_text}), "
        f"answer the question in detail: {additional_context} {query}"
    )

    # Ensure correct message format
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "image"},
                {"type": "text", "text": prompt_text},
            ],
        }
    ]

    # Apply chat template
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to(device)

    # Generate output
    output = model.generate(**inputs, max_new_tokens=1024, do_sample=False)
    detailed_response = processor.tokenizer.decode(output[0], skip_special_tokens=True)

    return (source), (additional_context), (detailed_response)

### Single Inference

In [54]:
import re

# Define image path and query
image_path = "/content/dataset/test_merged_selective_resized/test_merged_selective_resized/known_39/Psoriasis-Guttate-45.jpg"
query = "What is the treatment of the disease?"
softmax_threshold = 0.9

# Call RAG query function
source, disease_name, response = rag_query(
    query,
    retrieval_model,
    entity_embeddings,
    image_path,
    G_loaded,
    softmax_threshold,
    model,
    processor,
    model1
)

# Display output
print("Label Source:", source, "\n")
print("Disease Name:", disease_name, "\n")
print("User:", query, "\n")
print("Answer:", extract_after_second_assistant(response))

Label Source: Auxiliary Classifier 

Disease Name: The name of the disease is psoriasis 

User: What is the treatment of the disease? 

Answer: The treatment of psoriasis may include the following:

  * Topical treatments — such as soaps and emollients, dithranol, coal tar, and phototherapy.
  * Immune modulators — such as calcineurin inhibitors (e.g. tacrolimus ointment), vitamin D analogs (e.g. calcipotriol cream), and retinoids.
  * Biologic agents — a range of proteins that work by targeting specific molecules involved in the immune response.

Psoriasis is a chronic inflammatory skin condition characterized by well-defined red, scaly plaques. While the diagnosis is usually straightforward, the treatment can be more complex. Treatment goals vary according to plaque location, size, and type, as well as side effects and patient preference. Mild disease may be managed with physical and topical treatments, whereas more severe disease often requires systemic therapy or phototherapy. New 

## Gradio

In [55]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.25.2-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

In [57]:
import gradio as gr
import re

# Gradio app logic
def run_rag_pipeline(image, query, softmax_threshold):
    # image will be a path (if type="filepath"), so no issue
    image_path = image

    # Call your RAG function
    source, disease_name, response = rag_query(
        query,
        retrieval_model,
        entity_embeddings,
        image_path,
        G_loaded,
        softmax_threshold,
        model,
        processor,
        model1
    )

    cleaned_answer = extract_after_second_assistant(response)

    return source, disease_name, query, cleaned_answer

# Build Gradio app
demo = gr.Interface(
    fn=run_rag_pipeline,
    inputs=[
        gr.Image(type="filepath", label="Upload Skin Image"),
        gr.Textbox(lines=2, placeholder="Enter your question here...", label="Query"),
        gr.Slider(0.0, 1.0, value=0.9, step=0.01, label="Softmax Threshold"),
    ],
    outputs=[
        gr.Textbox(label="Label Source"),
        gr.Textbox(label="Predicted Disease Name"),
        gr.Textbox(label="User Query"),
        gr.Textbox(label="Cleaned Assistant Response")
    ],
    title="Skin Disease Diagnosis with RAG",
    description="Upload an image and enter a question. The model will predict the disease and answer your query.",
)

demo.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3f62678b76d98644e2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


