In [1]:
import os
import pandas as pd
import torchvision.transforms as transforms
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from sklearn.model_selection import train_test_split

In [2]:
# organizing the classes in 3 folders
lung_opacity_path = 'data/Lung_Opacity'
normal_path = 'data/Normal'
viral_pneumonia_path = 'data/Viral Pneumonia/'
data_paths = [normal_path, lung_opacity_path, viral_pneumonia_path]

file_paths_list = []
labels_list = []
class_labels = ['Normal', 'Lung Opacity','Viral Pneumonia']

In [3]:
# adjusting the dataset into a pandas dataframe
for i, data_path in enumerate(data_paths):
    files = os.listdir(data_path)
    for filename in files:
        file_path = os.path.join(data_path, filename)
        file_paths_list.append(file_path)
        labels_list.append(class_labels[i])

file_paths_series = pd.Series(file_paths_list, name="filepaths")
labels_series = pd.Series(labels_list, name="labels")
data = pd.concat([file_paths_series, labels_series], axis=1)
df = pd.DataFrame(data)

In [4]:
train_df, test_df = train_test_split(df, test_size=0.25, random_state=42, stratify=df.labels)

train_df, val_df = train_test_split(train_df, test_size=0.15, random_state=42, stratify=train_df.labels)

print(f'train: {train_df.shape}, test: {test_df.shape}, validation: {val_df.shape}')

train: (2215, 2), test: (869, 2), validation: (391, 2)


In [5]:
class ImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx, 0]
        img_label = self.dataframe.iloc[idx, 1]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

train_dataset = ImageDataset(train_df, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)


In [6]:
def calculate_mean_std(loader):
    channels_sum, channels_squared_sum, num_batches = 0, 0, 0

    for data in loader:
        channels_sum += torch.mean(data, dim=[0, 2, 3])
        channels_squared_sum += torch.mean(data**2, dim=[0, 2, 3])
        num_batches += 1

    mean = channels_sum / num_batches
    std = (channels_squared_sum / num_batches - mean**2)**0.5
    return mean, std

mean, std = calculate_mean_std(train_loader)
print(f'Mean: {mean}')
print(f'Std: {std}')


Mean: tensor([0.2860, 0.5660, 0.4400])
Std: tensor([0.1837, 0.2486, 0.1257])
