In [1]:
from random import seed, shuffle
import warnings
from tqdm import tqdm
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import torch.optim as optim
import torch.nn as nn
import torch
import cv2
import pandas as pd
from sklearn.model_selection import KFold
import numpy as np
import wandb
import pathlib
import os
import time
from torchvision.models import resnet18, resnet50
import matplotlib.pyplot as plt
from datetime import datetime
import transformers
import albumentations as A
import skimage.io
from albumentations.pytorch import ToTensorV2
warnings.filterwarnings('ignore')

In [2]:
def seed_everything(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [3]:
def get_transforms():
    image_size = 1024
    return A.Compose([A.Resize(height = image_size, 
                           width  = image_size),
                  A.Normalize(mean = (0, 0, 0),
                              std  = (1, 1, 1)),
                  ToTensorV2()])

In [4]:
class PandasDataset(Dataset):
    def __init__(self,df,root_dir,transforms=None):
        self.df = df
        self.root_dir = root_dir
        self.transforms = transforms
    def __len__(self):
        return len(self.df)
    def __getitem__(self,index):
        image_id = self.df.iloc[index].image_id
        label = self.df.iloc[index].isup_grade
        file_path = f"{self.root_dir}/{image_id}.tiff"
        image = skimage.io.MultiImage(file_path)
#         image = cv2.cvtColor(image[-1], cv2.COLOR_BGR2RGB)
        image = image[-1]
        if self.transforms is not None:
            image = self.transforms(image=image)['image']
        return image, torch.tensor(label)

In [5]:
df = pd.read_csv('/kaggle/input/prostate-cancer-grade-assessment/train.csv')
root_dir = '/kaggle/input/prostate-cancer-grade-assessment/train_images'

In [6]:
transforms = get_transforms()

In [7]:
df = df[:200]

In [8]:
train_dataset = PandasDataset(df,root_dir,transforms)

In [9]:
train_dataset[0][0].size()

torch.Size([3, 1024, 1024])

In [10]:
BATCH_SIZE=4

In [11]:
train_loader = DataLoader(train_dataset,batch_size=BATCH_SIZE,shuffle=True)

In [12]:
psum    = torch.tensor([0.0, 0.0, 0.0])
psum_sq = torch.tensor([0.0, 0.0, 0.0])

# loop through images
for inputs in tqdm(train_loader):
    inputs = inputs[0]
    psum    += inputs.sum(axis        = [0, 2, 3])
    psum_sq += (inputs ** 2).sum(axis = [0, 2, 3])

100%|██████████| 50/50 [11:56<00:00, 14.34s/it]


In [13]:
####### FINAL CALCULATIONS

# pixel count
count = len(df) * 1024 * 1024

# mean and std
total_mean = psum / count
total_var  = (psum_sq / count) - (total_mean ** 2)
total_std  = torch.sqrt(total_var)

# output
print('mean: '  + str(total_mean))
print('std:  '  + str(total_std))

mean: tensor([0.9770, 0.9550, 0.9667])
std:  tensor([0.0783, 0.1387, 0.1006])
