# Estimating the epistemic uncertainty in SMLM microscopy image classification
In this notebook we will be estimating the epistemic uncertainty in SMLM microscopy classification using Monte Carlo Dropout. The table of contents is the following:
- INSERT TOC

## Data preprocessing

In [39]:
import tifffile

import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from sklearn.model_selection import train_test_split

### Importing raw data

In [36]:
name2label = {'7art': 0, '7ary': 1, '7as5': 2, '7bho': 3, 'hemisphere': 4, 'icosahedron': 5, 'tetrahedron': 6} # Dictionary and mapping used for encoding class names to labels
vectorized_map = np.vectorize(name2label.get)

def load_train_SMLM(train_path):
  X_trainval_SMLM = []
  y_trainval_SMLM = []

  for class_folder in os.listdir(train_path):
      tif_file_train = os.path.join(train_path, class_folder)
      class_name = os.path.basename(tif_file_train)[:-4]

      if class_name[-4:] == 'SMLM':
        SMLM_image_data = tifffile.imread(tif_file_train)
        tif_file_size = len(SMLM_image_data)

        X_trainval_SMLM.append(SMLM_image_data)
        y_trainval_SMLM += tif_file_size * [class_name[:-5]]

      elif class_name[-2:] == 'EM':
        continue

      else:
        raise ValueError('Image must be SMLM image.')

  X_trainval_SMLM = np.concatenate(X_trainval_SMLM, axis=0)
  y_trainval_SMLM = np.array(y_trainval_SMLM)

  y_trainval_SMLM = vectorized_map(y_trainval_SMLM)

  return X_trainval_SMLM, y_trainval_SMLM

train_path = '/content/drive/MyDrive/FAIP_2022_data/train'
X_trainval_SMLM, y_trainval_SMLM = load_train_SMLM(train_path)

print(X_trainval_SMLM.shape)
print(y_trainval_SMLM.shape)
print(np.unique(y_trainval_SMLM))

(5040, 128, 128)
(5040,)
[0 1 2 3 4 5 6]


In [32]:
def load_test_SMLM(test_path):
  X_test_SMLM = []
  y_test_SMLM = []

  for class_folder in os.listdir(test_path):
      tif_file_test = os.path.join(test_path, class_folder)
      class_name = os.path.basename(tif_file_test)[:-4]

      if class_name[-4:] == 'SMLM':
        SMLM_image_data = tifffile.imread(tif_file_test)
        tif_file_size = len(SMLM_image_data)

        X_test_SMLM.append(SMLM_image_data)
        y_test_SMLM += tif_file_size * [class_name[:-5]]

      elif class_name[-2:] == 'EM':
        continue

      else:
        raise ValueError('Image must be SMLM image.')

  X_test_SMLM = np.concatenate(X_test_SMLM, axis=0)
  y_test_SMLM = np.array(y_test_SMLM)

  y_test_SMLM = vectorized_map(y_test_SMLM)

  return X_test_SMLM, y_test_SMLM

test_path = '/content/drive/MyDrive/FAIP_2022_data/test'
X_test_SMLM, y_test_SMLM = load_test_SMLM(test_path)

print(X_test_SMLM.shape)
print(y_test_SMLM.shape)
print(np.unique(y_test_SMLM))

(1260, 128, 128)
(1260,)
[0 1 2 3 4 5 6]


In [42]:
X_train_SMLM, X_val_SMLM, y_train_SMLM, y_val_SMLM = train_test_split(X_trainval_SMLM, y_trainval_SMLM, test_size=0.25, random_state=42)
print(X_train_SMLM.shape)
print(X_val_SMLM.shape)

(3780, 128, 128)
(1260, 128, 128)


### SMLMDataset class and creating dataloaders

In [43]:
# Calculate mean
mean = np.mean(X_train_SMLM, axis=(1, 2)) # This computes the mean for each image
overall_mean = np.mean(mean) # This computes the overall mean across all images

# Calculate standard deviation
std_dev = np.std(X_train_SMLM, axis=(1, 2)) # This computes the standard deviation for each image
overall_std_dev = np.mean(std_dev) # This computes the overall standard deviation across all images

print("Mean:", overall_mean)
print("Standard Deviation:", overall_std_dev)

Mean: 0.6123785755621693
Standard Deviation: 2.2223346004114672


In [45]:
batch_size = 32

class SMLMDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label


transform = transforms.Compose([
    # transforms.Resize((64, 64)), # Resizing image
    transforms.ToTensor(), # Converting to Tensor
    transforms.Normalize((0.61237,), (2.22233,)) # Normalizing pixels
])

train_dataset_SMLM = SMLMDataset(X_train_SMLM, y_train_SMLM, transform=transform)
val_dataset_SMLM = SMLMDataset(X_val_SMLM, y_val_SMLM, transform=transform)
test_dataset_SMLM = SMLMDataset(X_val_SMLM, y_val_SMLM, transform=transform)

train_loader = DataLoader(train_dataset_SMLM, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset_SMLM, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset_SMLM, batch_size=batch_size, shuffle=False)

## Neural Network Classes