In [161]:
import numpy as np

from matplotlib import pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim

from torchvision import models
from torchvision import transforms

from medmnist import BloodMNIST

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from dataset import ALDataset
from query_strategy import MarginQS
from annotator_simulation import simulate_annotators

from PIL import Image

In [186]:
class ImagesDataset(Dataset):
    def __init__(self, X, y, transform=None):
        self.X = X
        self.y = y
        self.transform = transform

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        image = self.X[idx]
        label = self.y[idx]

        if self.transform:
            image = Image.fromarray(image)
            image = self.transform(image)
        label = torch.tensor(label, dtype=torch.float64).squeeze()
        return image, label

In [28]:
train_set = BloodMNIST(split="train", download=True)
test_set = BloodMNIST(split="test", download=True)

Using downloaded and verified file: /home/nico/.medmnist/bloodmnist.npz
Using downloaded and verified file: /home/nico/.medmnist/bloodmnist.npz


In [29]:
y_true = train_set.labels[:,0]
simulated = simulate_annotators(labels=y_true, n_annotators=5)

y = simulated['y']

In [78]:
X_train = train_set.imgs

In [79]:
X_train.shape

(11959, 28, 28, 3)

In [80]:
al_dataset = ALDataset(X_train, y_true, y)

In [148]:
idx = np.random.choice(len(al_dataset.y_DL), 1000, replace=False)
annotators = np.random.choice(np.arange(5), 1000, replace=True)

In [149]:
al_dataset.update_entries(idx, annotators)

In [150]:
X_train, y_train = al_dataset.get_train_set()

In [151]:
y_train.shape

(8666, 8)

In [152]:
np.where(np.sum(y_train,1) == 0)

(array([], dtype=int64),)

In [156]:
y_train[0]

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.33333333, 0.        , 0.66666667])

In [157]:
y_test = train_set.labels[:,0]
y_test = np.eye(10)[y_test]
X_test = train_set.imgs

In [187]:
train_dataset = ImagesDataset(X_train, y_train)
test_dataset = ImagesDataset(X_test, y_test)

In [188]:
transform = transforms.Compose([
	transforms.Resize((224, 224)),
	transforms.ToTensor(),
	transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataloaders = {}
dataloaders['train'] = DataLoader(ImagesDataset(X_train, y_train, transform=transform), batch_size=16, shuffle=True, num_workers=2)
dataloaders['test'] = DataLoader(ImagesDataset(X_test, y_test, transform=transform), batch_size=16, shuffle=True, num_workers=2)

In [189]:
input, labels = next(iter(dataloaders['train']))

In [190]:
labels[:]

tensor([[0.0000, 0.3333, 0.0000, 0.0000, 0.0000, 0.3333, 0.3333, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000],
        [1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.5000, 0.0000, 0.0000, 0.5000, 0.0000],
        [0.0000, 0.0000, 0.5000, 0.0000, 0.0000, 0.0000, 0.5000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000],
        [0.0000, 0.6667, 0.0000, 0.3333, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000],
        [0.0000, 0.0000, 0.0000, 0.000

In [172]:
np.where(np.sum(train_dataset.y, 1) == 0)

(array([], dtype=int64),)

In [173]:
train_dataset.__getitem__(10)

(array([[[201, 163, 150],
         [198, 160, 147],
         [198, 160, 147],
         ...,
         [253, 224, 210],
         [254, 225, 209],
         [253, 224, 206]],
 
        [[205, 166, 159],
         [203, 164, 157],
         [203, 164, 157],
         ...,
         [252, 223, 209],
         [253, 224, 210],
         [252, 223, 207]],
 
        [[203, 164, 165],
         [202, 163, 164],
         [204, 164, 165],
         ...,
         [248, 216, 203],
         [251, 219, 206],
         [252, 220, 205]],
 
        ...,
 
        [[255, 227, 205],
         [255, 229, 208],
         [255, 226, 206],
         ...,
         [225, 192, 183],
         [223, 190, 181],
         [220, 187, 178]],
 
        [[255, 228, 207],
         [255, 229, 208],
         [255, 226, 208],
         ...,
         [216, 183, 174],
         [217, 184, 175],
         [217, 184, 175]],
 
        [[253, 225, 201],
         [254, 226, 204],
         [254, 223, 203],
         ...,
         [216, 183, 174],
  

In [182]:
train_dataset.__getitem__(7)[1]

tensor([0, 0, 0, 0, 0, 0, 0, 0])

In [185]:
torch.tensor(y_train[7])

tensor([0.0000, 0.5000, 0.0000, 0.0000, 0.0000, 0.0000, 0.5000, 0.0000],
       dtype=torch.float64)