# Introduction
Whenever you have to decide which transformations to apply on image data, you shold keep the structure of you dataset in mind. 
 
I will demonstrate some `torchvision.transforms` transformations on image data to provide the necessary knowledge about what the transformations actually do. 
Take a look at the [docs](https://pytorch.org/vision/0.8/transforms.html) to get some further infomration.
Furthermore, I will a custom Dataset and loader for image data and perform some file handling.

In [None]:
import numpy as np
import pandas as pd
import pandas as pd
import torch
from torchvision import transforms
import torch.nn as nn
from torch.utils.data import DataLoader, SequentialSampler
from transformers import AdamW
import matplotlib.pyplot as plt
np.random.seed(111)

data = pd.read_csv("../input/litter-on-forest-floor/labels.csv")

This data format is suitable for `ImageFolder`.
However, instead of using that, I prefer using a custom dataset for image data.
Thus, I need to change the way the data is stored:

In [None]:
import os
import shutil

def encode(observation):
    """
    Encodes the .csv file. 
    """
    if observation["label"] == "clean":
        observation["label"] = 0
        observation["file"] = observation["file"][6:]
        return observation
    observation["label"] = 1
    observation["file"] = observation["file"][7:]
    return observation

data = data.apply(encode, axis=1)
data = data.sample(frac=1)  # shuffle
data.index = range(len(data))
train_data = data.head(150)
test_data = data = data.tail(16)

# reorganize the images and copy them to one directory
clean_path = "../input/litter-on-forest-floor/clean"
clean_files = os.listdir(clean_path)
litter_path = "../input/litter-on-forest-floor/litter"
litter_files = os.listdir(litter_path)
new_path = "img"
if not os.path.exists(new_path):
    os.makedirs(new_path)

for file in litter_files:
    shutil.copy(litter_path + "/" + file, new_path)
    
for file in clean_files:
    shutil.copy(clean_path + "/" + file, new_path)

Let's create the custom dataset for image data:

In [None]:
from torch.utils.data import Dataset
from PIL import Image

class CustomDataset(Dataset):
    """
    A custom Image Dataset that performs transformations on the images contained in it and shifts them to
    a given device.
    """

    def __init__(self, data, transform_pipe, x_name, y_name, device):
        """
        Constructor.

        :param pd.DataFrame data: A DataFrame containing one column of image paths and another columns of image labels.
        :param transform_pipe: a transform:Composition of all transformations that have to be applied to the images
        :param str x_name: name of the image column
        :param str y_name: name of the label column
        :param str device: name of the device that has to be used
        """
        self.data = data
        self.transform_pipe = transform_pipe
        self.x_name = x_name
        self.y_name = y_name
        self.device = device

    def __len__(self):
        """
        Returns the number of observations in the whole dataset

        :return: the length of the dataset
        """
        return len(self.data)

    def __getitem__(self, i):
        """
        Is used by DataLoaders to draw the observation at index i in the dataset.

        :param int i: index of an observation
        :return: a list containing the image-data and the label of one observation
        """
        img_path = "img/" + self.data[self.x_name].iloc[i]
        x = self.transform_pipe(Image.open(img_path)).to(self.device)
        y = torch.tensor(self.data[self.y_name][i], dtype=torch.float).to(self.device)
        return [x, y]

Let's create a function that creates a DataLoader:

In [None]:
def create_loader(data, transform_pipe, batch_size, x_name, y_name, device):
    """
    Creates a DataLoader for image data.
    """
    custom_dataset = CustomDataset(data=data,
                                   transform_pipe=transform_pipe,
                                   x_name=x_name,
                                   y_name=y_name,
                                   device=device)
    sampler = SequentialSampler(data_source=custom_dataset)
    loader = DataLoader(dataset=custom_dataset,
                        batch_size=batch_size, 
                        sampler=sampler)
    return loader

In [None]:
def display_image(transform_pipe, title, verbose=0):
    """
    Displays an image after applying a number of transformations.
    """
    loader = create_loader(data=train_data, 
                       transform_pipe=transform_pipe, 
                       batch_size=1, 
                       x_name="file", 
                       y_name="label", 
                       device="cpu")
    example_batch = next(iter(loader))
    example_image = example_batch[0]
    if verbose > 0:
        print("tensor size:", example_image.size())
        print("size after applying squeeeze:", example_image.squeeze().size())
        print("size after applying permutate:", example_image.squeeze().permute(1, 2, 0).size())

    plt.title(title)
    plt.imshow(example_image.squeeze().permute(1, 2, 0))

Let's display what our DataLoader is actually doing without performing any further transformations:

In [None]:
transform_pipe = transforms.Compose([transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="Identity mapping", verbose=1)

# Transformation Visualization
Let's finally investigate the impact of some [transforms.transformations](https://pytorch.org/vision/0.8/transforms.html)!

# CenterCrop

In [None]:
transform_pipe = transforms.Compose([transforms.CenterCrop(size=1024), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="Symmetric CenterCrop", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.CenterCrop(size=[1024, 256*2]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="Non-symmetric CenterCrop", verbose=1)

# ColorJitter
The ColorJitter randomly adds some noise to the data (one form of [Data Augmentation](https://en.wikipedia.org/wiki/Data_augmentation)). 

To display the randomness, I will perform each transformation twice given the same values! You will see the differences.

In [None]:
transform_pipe = transforms.Compose([transforms.ColorJitter(brightness=[0, 10]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="ColorJitter for brightness; first time", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.ColorJitter(brightness=[0, 10]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="ColorJitter for brightness; second time", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.ColorJitter(contrast=[0, 10]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="ColorJitter for contrast; first time", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.ColorJitter(contrast=[0, 10]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="ColorJitter for contrast; second time", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.ColorJitter(saturation=[0, 10]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="ColorJitter for saturation; first time", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.ColorJitter(saturation=[0, 10]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="ColorJitter for saturation; second time", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.ColorJitter(hue=[-0.5, 0.5]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="ColorJitter for hue; first time", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.ColorJitter(hue=[-0.5, 0.5]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="ColorJitter for hue; second time", verbose=1)

# Resize

In [None]:
transform_pipe = transforms.Compose([transforms.Resize(size=[256, 256]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="Resize to smaller size, symmetric", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.Resize(size=[2**12, 2**10]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="Resize to larger size, non-symmetric", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.Resize(size=256), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="Resize smaller axis and the other one accordingly", verbose=1)

# Pad
Default `padding_mode` is `constant`, which pads using some constant value

In [None]:
transform_pipe = transforms.Compose([transforms.Pad(padding=256), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="Pad", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.Pad(padding=[0, 256, 1024, 562], fill=64), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="Pad, custom fill value", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.Pad(padding=256, padding_mode="reflect"), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="Reflected Padding", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.Pad(padding=256, padding_mode="edge"), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="Edge Padding", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.Pad(padding=256, padding_mode="symmetric"), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="Symmetric Padding", verbose=1)

note: The only difference between symmetric padding and reflected padding is that symmetric padding reflects the edge pixels as well.

# RandomAffine

In [None]:
transform_pipe = transforms.Compose([transforms.RandomAffine(degrees=[0,120]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="RandomAffine rotation; first time", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.RandomAffine(degrees=[0,120]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="RandomAffine rotation; second time", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.RandomAffine(degrees=0, translate=[0.9, 0.1]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="RandomAffine translation, first time", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.RandomAffine(degrees=0, translate=[0.9, 0.1]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="RandomAffine translation, second time", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.RandomAffine(degrees=0, scale=[0.1, 0.9]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="RandomAffine scaling, first time", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.RandomAffine(degrees=0, scale=[0.1, 0.9]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="RandomAffine scaling, second time", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.RandomAffine(degrees=0, shear=[0, 64]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="RandomAffine translation, first time", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.RandomAffine(degrees=0, shear=[0, 64]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="RandomAffine translation, second time", verbose=1)

Combined:

In [None]:
transform_pipe = transforms.Compose([transforms.RandomAffine(degrees=[0, 15], translate=[0.1, 0.1], scale=[0.8, 1], shear=[0, 32]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="RandomAffine translation, second time", verbose=1)

# RandomCrop
Difference to CenterCrop: doesn't necessarily crop at the center!

In [None]:
transform_pipe = transforms.Compose([transforms.RandomCrop(size=[1024, 1024]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="RandomCrop, first time", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.RandomCrop(size=[1024, 1024]), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="RandomCrop, second time", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.RandomCrop(size=[1024*4, 1024*4], pad_if_needed=True, padding_mode="constant"), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="RandomCrop padded, first time", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.RandomCrop(size=[1024*4, 1024*4], pad_if_needed=True, padding_mode="constant"), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="RandomCrop padded, second time", verbose=1)

# RandomHorizontalFlip
In this case I once chose `p=1` to display the (randomly occurring) outcome for sure. In a real context, a moderate value would make more sense. Analogously RandomVerticalFlip.

In [None]:
transform_pipe = transforms.Compose([transforms.RandomHorizontalFlip(p=1), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="RandomHorizontalFlip, second time", verbose=1)

# RandomPerspective
In this case I once chose `p=1` to display the (randomly occurring) outcome for sure. In a real context, a moderate value would make more sense.

In [None]:
transform_pipe = transforms.Compose([transforms.RandomPerspective(distortion_scale=0.2, p=1), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="RandomPerspective, small distortion", verbose=1)

In [None]:
transform_pipe = transforms.Compose([transforms.RandomPerspective(distortion_scale=0.8, p=1), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="RandomPerspective, large distortion", verbose=1)

# GaussianBlur
vary the kenrel size to change the affected field - and sigma to cahnge the strength of the blur effect.

In [None]:
transform_pipe = transforms.Compose([transforms.GaussianBlur(kernel_size=33, sigma=100000), transforms.ToTensor()])
display_image(transform_pipe=transform_pipe, title="GaussianBlurr", verbose=1)

# Normalize
note: can only be performed on `Tensors` $\rightarrow$ perform `ToTensor` before applying this transformation!

Normalization might vastly improve the performance of your model. 
Normalizes each channel individually. Since the given dataset contains `RGB` images, we have to define 3 ways of normalization. YOu might need to calculate the mean and the std of your dataset before applying this function:

In [None]:
def find_mean_std(data):
    """
    Calculates the averaged mean and std for all channels 
    of a given RGB-image dataset.
    """
    mean_ch1 = 0
    mean_ch2 = 0
    mean_ch3 = 0
    std_ch1 = 0
    std_ch2 = 0
    std_ch3 = 0

    loader = create_loader(data=data, 
                       transform_pipe=transforms.ToTensor(), 
                       batch_size=1, 
                       x_name="file", 
                       y_name="label", 
                       device="cpu")

    for image in loader:
        means = torch.mean(input=image[0], dim=[2, 3])[0] # mean for each channel
        mean_ch1 += means[0].item()
        mean_ch2 += means[1].item()
        mean_ch3 += means[2].item()

        stds = torch.std(input=image[0], dim=[2, 3])[0] # std for each channel
        std_ch1 += stds[0].item()
        std_ch2 += stds[1].item()
        std_ch3 += stds[2].item()
        
    mean_ch1 /= len(loader)
    mean_ch2 /= len(loader)
    mean_ch3 /= len(loader)
    std_ch1 /= len(loader)
    std_ch2 /= len(loader)
    std_ch3 /= len(loader)
    return {"mean": [mean_ch1, mean_ch2, mean_ch3], "std": [std_ch1, std_ch2, std_ch3]}

stats = find_mean_std(data=train_data)
print(stats)

If you are too lazy to calculate them on your own, using mean = [0.485, 0.456, 0.406] and std = [0.229, 0.224, 0.225] is a valid option if you have regular images of objects/nature. Those are the value sof the `ImageNet` dataset. 

In [None]:
transform_pipe = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=stats["mean"], std=stats["std"])])
display_image(transform_pipe=transform_pipe, title="Normalize", verbose=1)