In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pydicom
import os
import albumentations as A
import torch
train = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv')
gpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
gpu

# OSIC: augmentations tutorial w/ albumentations 

In any good deep learning model pipeline, it is **always** essential to be able to augment data, as it gives you the advantage of your model generalizing well. As such, here is a tutorial on data augmentations (especially those for images) with the library albumentations developed by notable Kagglers such as Dr. Vladimir Iglovikov and Eugene Khvedchenya.

Albumentations themselves quote:
> Image augmentation is a process of creating new training examples from the existing ones. To make a new sample, you slightly change the original image. For instance, you could make a new image a little brighter; you could cut a piece from the original image; you could make a new image by mirroring the original one, etc.



<hr>

### TOC:

+ <a href="#blur">Blur-based Augmentations </a><br>
+ <a href="#dropout">Dropout Augmentations </a><br>
+ <a href="#cbt">Crop-based Augmentations </a><br>
+ <a href="#rt">Rotation-based Augmentations</a><br>
+ <a href="#snbt">Scaling and normalization-based transforms</a>
+ <a href="#ns">Noise-based transforms</a>

In [None]:
def show_images(images, read_region=(1780,1950)):
    f, ax = plt.subplots(3,3, figsize=(16,18))
    
    for i, image in enumerate(images):
        image_path = os.path.join('../input/osic-pulmonary-fibrosis-progression',"train/ID00007637202177411956430",image+'.dcm')
        image = pydicom.dcmread(image_path)
        ax[i//3, i%3].imshow(image.pixel_array) 
        ax[i//3, i%3].axis('off')

    plt.show()
images = [
    '1', '2', '3',
    '4', '5', '6',
    '7', '8', '9']   
show_images(images)


These are a few of our sample images present in the dataset. Let us now commence our albumentations tutorial, with simple blur-based augmentations.

<h1 id="blur">Blur-based augs</h1>

This is a simple set of:
+ Blur
+ GaussianBlur
+ MedianBlur
+ MotionBlur

In [None]:
import cv2
def show_images(images, aug_dict, read_region=(1780,1950)):
    f, axes = plt.subplots(len(augs),6, figsize=(20, 2 * len(augs)), squeeze=False);
    
    for i, im in enumerate(images):
        
        for i, (key, aug) in enumerate(aug_dict.items()):
            for j in range(6):
                ax = axes[i, j]
                if j == 0:
                    ax.text(0.5, 0.5, key, horizontalalignment='center', verticalalignment='center', fontsize=15)
                    ax.get_xaxis().set_visible(False);
                    ax.get_yaxis().set_visible(False);
                    ax.axis('off')
                else:
                    image_path = '../input/osic-pulmonary-fibrosis-progression/' + "train/ID00007637202177411956430/" + str(j) +'.dcm'
                    image = pydicom.dcmread(image_path).pixel_array
                    if aug is not None:
                        image = aug(image=image)['image']
                    ax.imshow(image, cmap='bone');
        plt.tight_layout()
        plt.show();
        plt.close()

    plt.show();
images = [
    '1', '19', '13',
    '11', '5']   
augs = {'Original': None,
             'Blur': A.Blur(p=1.0),
             'MedianBlur': A.MedianBlur(blur_limit=5, p=1.0),
             'GaussianBlur': A.GaussianBlur(p=1.0),
             'MotionBlur': A.MotionBlur(p=1.0)
       }
show_images(images, augs)


<h1 id="dropout">Dropout augmentations</h1>

Dropout augmentations we're going to be applying here (with their description from the [albumentations docs](https://albumentations.readthedocs.io/en/latest/api/augmentations.html)) are:

+ GridDropout
> GridDropout, drops out rectangular regions of an image and the corresponding mask in a grid fashion.
+ ChannelDropout
> Randomly Drop Channels in the input Image.


In [None]:
augs = {'Original': None,
             'GridDropout': A.GridDropout(p=1.0),
        #     'ChannelDropout': A.ChannelDropout(p=1.0)
       }
show_images(images, augs)

<h1 id="cbt">Crop-based augmentations</h1>

This kind of augmentations are primarily oriented towards croppnig an image in its many forms and sizes.

In [None]:
augs = {'Original': None,
             'RandomCrop': A.RandomCrop(height=64, width=64, p=1.0),
         'CenterCrop': A.CenterCrop(height=64, width=64, p=1.0),
        'RandomSizedCrop': A.RandomSizedCrop((90, 100), 64, 64)
       }
show_images(images, augs)

<h1 id="rt">Rotation-based transforms</h1>

These are primarily oriented towards rotation of the image.

In [None]:
augs = {'Original': None,
             'RandomRotate90': A.RandomRotate90(p=1.0),
         'ShiftScaleRotate': A.ShiftScaleRotate(p=1.0),
        'Rotate': A.Rotate()
       }
show_images(images, augs)

<h1 id="snbt">Scaling and normalization-based transforms</h1>

These principally revolve around transforms which scale the image, normalize it, invert pixel values etc.

In [None]:
def show_imagesunit8(images, aug_dict, read_region=(1780,1950)):
    f, axes = plt.subplots(len(augs),6, figsize=(20, 2 * len(augs)), squeeze=False);
    
    for i, im in enumerate(images):
        
        for i, (key, aug) in enumerate(aug_dict.items()):
            for j in range(6):
                ax = axes[i, j]
                if j == 0:
                    ax.text(0.5, 0.5, key, horizontalalignment='center', verticalalignment='center', fontsize=15)
                    ax.get_xaxis().set_visible(False);
                    ax.get_yaxis().set_visible(False);
                    ax.axis('off')
                else:
                    image_path = '../input/osic-pulmonary-fibrosis-progression/' + "train/ID00007637202177411956430/" + str(j) +'.dcm'
                    image = pydicom.dcmread(image_path).pixel_array.astype("uint8")
                    if aug is not None:
                        image = aug(image=image)['image']
                    ax.imshow(image, cmap='bone');
        plt.tight_layout()
        plt.show();
        plt.close()

augs = {'Original': None,
             'Solarize': A.Solarize(p=1.0),
         'Posterize': A.Posterize(p=1.0),
        'Equalize': A.Equalize(),
        'Downscale': A.Downscale()
       }
show_imagesunit8(images, augs)

<h1 id="ns">Noise based transforms</h1>

These primarily are meant to add some sort of artificial noise to our image.

In [None]:
augs = {'Original': None,
             'GaussNoise': A.GaussNoise(p=1.0)
       }
show_imagesunit8(images, augs)

# Further Resources

This is the end of my not-so-pedagogical notebook which takes a surface level scan of these augmentations to introduce them to people who aren't familiar.

There are some other resources for learning about this are:
+ https://albumentations.readthedocs.io/en/latest/api/augmentations.html <br>
**These are the official docs for albumentations, please read.** <br>
+ https://github.com/albumentations-team/albumentations <br>
**This is the GitHub for albumentations.** <br>
+ https://www.kaggle.com/shonenkov/nlp-albumentations <br>
**Perhaps the most creative use of any sort of albumentations I have ever seen.** <br>
+ https://albumentations.ai/ <br>
**The official albumentations website.** <br>
