<a href="https://colab.research.google.com/github/mobarakol/PitVQA/blob/main/PitVQANet_endo18_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Download code
!git clone https://github.com/mobarakol/PitVQA.git

#Download Dataset
!mkdir /content/PitVQA/datasets
%cd /content/PitVQA/datasets
!gdown --id 1FoAEY_u0PTAlrscjEifi2om15A83wL78

# Unzipping the VQA EndoVis18 Dataset
!unzip -q EndoVis-18-VQA.zip
%cd /content/PitVQA

Cloning into 'PitVQA'...
remote: Enumerating objects: 231, done.[K
remote: Counting objects: 100% (231/231), done.[K
remote: Compressing objects: 100% (180/180), done.[K
remote: Total 231 (delta 106), reused 130 (delta 45), pack-reused 0[K
Receiving objects: 100% (231/231), 8.87 MiB | 23.96 MiB/s, done.
Resolving deltas: 100% (106/106), done.
/content/PitVQA/datasets
Downloading...
From (original): https://drive.google.com/uc?id=1FoAEY_u0PTAlrscjEifi2om15A83wL78
From (redirected): https://drive.google.com/uc?id=1FoAEY_u0PTAlrscjEifi2om15A83wL78&confirm=t&uuid=fd10a829-6f0d-436e-b5ee-caae5238f325
To: /content/PitVQA/datasets/EndoVis-18-VQA.zip
100% 2.71G/2.71G [00:18<00:00, 149MB/s]
/content/PitVQA


In [2]:
# download weights
!gdown --id 12XvyppNYBGRsSwxUn2ZGXa5oJXnk4KaV
# https://drive.google.com/file/d/12XvyppNYBGRsSwxUn2ZGXa5oJXnk4KaV/view?usp=drive_link

Downloading...
From (original): https://drive.google.com/uc?id=12XvyppNYBGRsSwxUn2ZGXa5oJXnk4KaV
From (redirected): https://drive.google.com/uc?id=12XvyppNYBGRsSwxUn2ZGXa5oJXnk4KaV&confirm=t&uuid=f0860b10-3716-4507-aab6-b4a9c60ac373
To: /content/PitVQA/PitVQA_endo18_weights.pth.tar
100% 4.07G/4.07G [00:52<00:00, 77.7MB/s]


In [3]:
!pip install -q transformers==4.36.2
!pip install -q timm==0.9.12
!pip install -q fairscale==0.4.13
!pip install -q scikit-learn==1.3.2

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m77.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m53.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m266.3/266.3 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for fairscale (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m44.3 MB/s[0m 

In [4]:
import torch
from dataloader import EndoVis18VQAGPTClassification
from utils import calc_acc, calc_precision_recall_fscore, calc_classwise_acc

from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch import nn
from tqdm import tqdm

import os
import sys
import random
import numpy as np


def seed_everything(seed=3407):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)


def validate(val_loader, model, criterion, device):
    model.eval()
    total_loss = 0.0
    label_true = None
    label_pred = None
    label_score = None

    with torch.no_grad():
        for i, (file_name, images, questions, labels) in enumerate(tqdm(val_loader),0):
            # label
            labels = labels.to(device)

            # model forward pass
            outputs = model(image=images.to(device), question=questions)

            # loss
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            scores, predicted = torch.max(F.softmax(outputs, dim=1).data, 1)
            label_true = labels.data.cpu() if label_true is None else torch.cat((label_true, labels.data.cpu()), 0)
            label_pred = predicted.data.cpu() if label_pred is None else torch.cat((label_pred, predicted.data.cpu()), 0)
            label_score = scores.data.cpu() if label_score is None else torch.cat((label_score, scores.data.cpu()), 0)

    acc = calc_acc(label_true, label_pred)
    precision, recall, f_score = calc_precision_recall_fscore(label_true, label_pred)
    print(f'\ntest acc: {acc} | test precision: {precision} | test recall: {recall} | test F1: {f_score}')
    return acc, precision, recall, f_score

  _torch_pytree._register_pytree_node(


### Please rename the model.py file to PitVQANet.py

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 16
seed_everything(3407)

# preapre model
model_path = 'PitVQA_endo18_weights.pth.tar'
checkpoint = torch.load(model_path, map_location='cpu')
model = checkpoint['model']
model.to(device)
criterion = nn.CrossEntropyLoss().to(device)

# prepare validation set
val_seq = [1, 5, 16]
folder_head = '/content/PitVQA/datasets/EndoVis-18-VQA/seq_'
folder_tail = '/vqa/Classification/*.txt'
val_dataset = EndoVis18VQAGPTClassification(val_seq, folder_head, folder_tail)
val_dataloader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False, num_workers=8)

test_acc, test_precision, test_recall, test_f_score = validate(val_loader=val_dataloader, model=model,
                                  criterion=criterion, device=device)

  _torch_pytree._register_pytree_node(


Total files: 447 | Total question: 2769


100%|██████████| 174/174 [02:11<00:00,  1.32it/s]


test acc: 0.6832791621524016 | test precision: 0.5417785409849711 | test recall: 0.48000010272744686 | test F1: 0.6210418143069747



