# Library

### Install albumentations for data augmentation

In [None]:
!pip install albumentations

In [None]:
from PIL import Image
import cv2
import numpy as np
import time
import torch
import torchvision
from torch.utils.data import Dataset
from torchvision import transforms
import albumentations
import albumentations.pytorch
from matplotlib import pyplot as plt
import pandas as pd
import os
import shutil

# Data augmentation test

https://github.com/albumentations-team/albumentations_examples/blob/master/notebooks/example.ipynb
https://hoya012.github.io/blog/albumentation_tutorial/

### Read the image from the disk and convert it from the BGR color space to the RGB color space

In [None]:
image = cv2.imread('../../data/.train/.task153/data/train/11.tif')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)

### Horizontal Flip

In [None]:
transform = albumentations.HorizontalFlip(p=1)
augmented_image = transform(image=image)['image']
plt.imshow(augmented_image)

### Rotation

In [None]:
transform = albumentations.RandomRotate90(p=1)
augmented_image = transform(image=image)['image']
plt.imshow(augmented_image)

### Vertical Flip

In [None]:
transform = albumentations.VerticalFlip(p=1)
augmented_image = transform(image=image)['image']
plt.imshow(augmented_image)

### Blur

In [None]:
transform = albumentations.MotionBlur(p=1)
augmented_image = transform(image=image)['image']
plt.imshow(augmented_image)

### Distortion

In [None]:
transform = albumentations.OpticalDistortion(p=1)
augmented_image = transform(image=image)['image']
plt.imshow(augmented_image)

### Noise

In [None]:
transform = albumentations.GaussNoise(p=1)
augmented_image = transform(image=image)['image']
plt.imshow(augmented_image)

### RandomBrightness

In [None]:
transform = albumentations.RandomBrightness(p=1)
augmented_image = transform(image=image)['image']
plt.imshow(augmented_image)

### Show test images

In [None]:
fig, ax = plt.subplots(1, 10, figsize=(25, 25))
for i in range(10):
    path = '../../data/.train/.task153/data/test/' + str(i) + '.tif'
    img = plt.imread(path)
    ax[i].imshow(img)
    ax[i].axis('off')

# Data Augmentation

### Read train.csv as data frame

In [None]:
train_csv_path = '../../data/.train/.task153/data/train/train.csv'
train_df = pd.read_csv(train_csv_path)
train_df.head()

In [None]:
file_name_list = list(train_df['file_name'])
file_name_list[:3]

In [None]:
img_idx_list = [int(file_name.split('.')[0]) for file_name in file_name_list]
img_idx_list[:5], len(img_idx_list)

In [None]:
train_df.tail()

In [None]:
len(train_df)

In [None]:
img_idx_list[-5:]

### Copy train.csv to my workspace

In [None]:
train_df.to_csv("/home/workspace/user-workspace/leeejihyun/data/train/train.csv", index=False)
# shutil.copy("../../data/.train/.task153/data/train/train.csv", "/home/workspace/user-workspace/leeejihyun/data/train/train.csv")

### Copy train images to my workspace

In [None]:
for file_name in file_name_list:
    shutil.copy('../../data/.train/.task153/data/train/' + file_name, "/home/workspace/user-workspace/leeejihyun/data/train/" + file_name)

In [None]:
transform = albumentations.Compose([
    albumentations.Resize(68, 68), 
    albumentations.RandomCrop(66, 66),
    albumentations.OneOf([
                          albumentations.HorizontalFlip(p=1),
                          albumentations.RandomRotate90(p=1),
                          albumentations.VerticalFlip(p=1)            
    ], p=1),
    albumentations.OneOf([
                          albumentations.MotionBlur(p=1),
                          albumentations.OpticalDistortion(p=1),
                          albumentations.GaussNoise(p=1),
                          albumentations.RandomBrightness(p=1)
    ], p=1)
])

### Data augmentation by 4 per image

In [None]:
num_samples = 4

for img_idx in img_idx_list:
    img = cv2.imread('/home/workspace/user-workspace/leeejihyun/data/train/{}.tif'.format(img_idx))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    for aug_img_idx in range(num_samples):
        aug_img = transform(image=img)['image']
        cv2.imwrite('/home/workspace/user-workspace/leeejihyun/data/train/{}_aug{}.tif'.format(img_idx, aug_img_idx), aug_img)

### Check the number of files to see if they are augmented correctly

14073(train) + 14073(train) * 4(augmented data) + 1(train.csv) = 70366

In [None]:
file_names = os.listdir('/home/workspace/user-workspace/leeejihyun/data/train/')
num_files = len(file_names)
print(num_files)

### Rewrite train.csv according to the augmented data

In [None]:
lines = []

with open('/home/workspace/user-workspace/leeejihyun/data/train/train.csv', 'r') as fr:
    header = fr.readline()
    lines.append(header)
    for line in range(len(train_df)):
        line = fr.readline()
        file_name = line.split(',')[0]
        title_name = line.split(',')[1]
        label = line.split(',')[2]
        img_idx = int(file_name.split('.')[0])
        for aug_img_idx in range(num_samples):
            new_file_name = '{}_aug{}.tif'.format(img_idx, aug_img_idx)
            new_line = '{},{},{}'.format(new_file_name, title_name, label)
            line += new_line
        lines.append(line)
    
lines

In [None]:
with open('/home/workspace/user-workspace/leeejihyun/data/train/train.csv', 'w') as fw:
    for line in lines:
        fw.write(line)