In [2]:
from src.recourse_methods.revise import Revise

In [4]:
import numpy as np
import albumentations  as A

import torch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import matplotlib

from src.datasets.load_datasets import MedMNISTDataset
from src.utils.logger import setup_logger
from src.utils.generic_utils import seed_everything


In [5]:
logger = setup_logger(name='main')
seed_everything()

In [None]:
# preprocessing
download = False
BATCH_SIZE = 64
img_size = 28

data_dir = r"D:\PycharmProjects\Counterfactual-Robustness-Benchmark\data"
class_encodings = {0: 'normal', 1: 'pneumonia'} 

data_transform = A.Compose([A.Normalize(mean=0.0, std=1.0)])

data_flag = "pneumoniamnist"
train_dataset = MedMNISTDataset(data_flag, split='train', data_dir=data_dir,
                                transform=data_transform, size=img_size, download=download)
val_dataset = MedMNISTDataset(data_flag, split='val', data_dir=data_dir,
                              transform=data_transform, size=img_size, download=download)
test_dataset = MedMNISTDataset(data_flag, split='test', data_dir=data_dir, 
                               transform=data_transform, size=img_size, download=download)
logger.info("Pnemounia dataset has been successfully loaded")

[2025-02-07 12:49:57|INFO] - Pnemounia dataset has been successfully loaded


In [7]:
from imblearn.under_sampling import RandomUnderSampler

rus = RandomUnderSampler(sampling_strategy=0.7, random_state=0)
reshaped_images = train_dataset.data.imgs.reshape(train_dataset.data.imgs.shape[0], -1)

train_images_res, train_labels_res = rus.fit_resample(reshaped_images, train_dataset.data.labels)
print('Number of instances in each class after random undersampling')
values, counts = np.unique_counts(train_labels_res)
print('Class {}: {} | Class {}: {}'.format(values[0], counts[0], values[1], counts[1]))

Number of instances in each class after random undersampling
Class 0: 1214 | Class 1: 1734


In [8]:
# Add a channel dimension to the train set

train_images_res = train_images_res.reshape(train_images_res.shape[0], img_size, img_size)
train_images_res = np.expand_dims(train_images_res, axis=1)

# Expand dimension of the train labels

if len(train_labels_res.shape) < 2:
    train_labels_res = np.expand_dims(train_labels_res, axis=-1) 

train_images_res.shape, train_labels_res.shape

((2948, 1, 28, 28), (2948, 1))

In [9]:
from torch.utils.data import TensorDataset


train_tensor_set = TensorDataset(torch.Tensor(train_images_res), 
                                 torch.Tensor(train_labels_res))

train_loader = DataLoader(dataset=train_tensor_set, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=True)

# Load a classifier and VAE