## A simple way to calculating image stats using Cassava dataset.

The image size used in this notebook is **512 x 512**.

You can also use merged dataset([Discussion](http://www.kaggle.com/c/cassava-leaf-disease-classification/discussion/200201)).

I am not sure if LB socre will increase after replacing imagenet mean&std with recalculated mean&std of Cassava dataset.

I am still doing some experiments on local CV.

In [None]:
import os
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import sys
import math
import random
import shutil
from tqdm.auto import tqdm
import cv2
from PIL import Image
import torch
from torch.utils.data import DataLoader, Dataset
import albumentations as A
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

import warnings 
warnings.filterwarnings('ignore')

In [None]:
IMAGE_SIZE = 512
N_CHANNELS = 3

In [None]:
## original data
train = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')

## merged previous competition data
# train= pd.read_csv('../input/cassava-leaf-disease-merged/merged.csv')

In [None]:
TRAIN_PATH = '../input/cassava-leaf-disease-classification/train_images'
# TRAIN_PATH = '../input/cassava-leaf-disease-merged/train'
TEST_PATH = '../input/cassava-leaf-disease-classification/test_images'

In [None]:
# refer: https://www.kaggle.com/yasufuminakama/cassava-resnext50-32x4d-starter-training

## dataset

class CassavaDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['image_id'].values
        self.labels = df['label'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{TRAIN_PATH}/{file_name}'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = image / 255
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        label = torch.tensor(self.labels[idx]).long()
        return image, label
    
## Transforms
def get_transforms():
    return A.Compose([
            A.Resize(IMAGE_SIZE, IMAGE_SIZE),
            ToTensorV2(),
        ])

In [None]:
## display image without transform
train_dataset = CassavaDataset(train, transform=None)

for i in range(1):
    image, label = train_dataset[i]
    plt.imshow(image)
    plt.title(f'label: {label}')
    plt.show() 

In [None]:
## display image with transform
train_dataset = CassavaDataset(train, transform=get_transforms())

for i in range(1):
    image, label = train_dataset[i]
    plt.imshow(image[0])
    plt.title(f'label: {label}')
    plt.show() 

In [None]:
train_dataset = CassavaDataset(train,transform=get_transforms())
train_loader = DataLoader(train_dataset, 
                              batch_size=1,
                              shuffle=False, 
                              num_workers=os.cpu_count(), pin_memory=True, drop_last=False)

mean = torch.zeros(N_CHANNELS)
std = torch.zeros(N_CHANNELS)
print('==> Computing mean and std..')
for inputs, _labels in tqdm(train_loader):
    for i in range(N_CHANNELS):
        mean[i] += torch.Tensor.float(inputs[:,i,:,:]).mean()
        std[i] += torch.Tensor.float(inputs[:,i,:,:]).std()
mean.div_(len(train))
std.div_(len(train))
print(f'mean={mean}, std={std}')