In [None]:
import numpy as np
import pandas as pd
import cv2
import os

In [None]:
label_names = {
    0:  "Nucleoplasm",
    1:  "Nuclear membrane",
    2:  "Nucleoli",
    3:  "Nucleoli fibrillar center",
    4:  "Nuclear speckles",
    5:  "Nuclear bodies",
    6:  "Endoplasmic reticulum",
    7:  "Golgi apparatus",
    8:  "Peroxisomes",
    9:  "Endosomes",
    10:  "Lysosomes",
    11:  "Intermediate filaments",
    12:  "Actin filaments",
    13:  "Focal adhesion sites",
    14:  "Microtubules",
    15:  "Microtubule ends",
    16:  "Cytokinetic bridge",
    17:  "Mitotic spindle",
    18:  "Microtubule organizing center",
    19:  "Centrosome",
    20:  "Lipid droplets",
    21:  "Plasma membrane",
    22:  "Cell junctions",
    23:  "Mitochondria",
    24:  "Aggresome",
    25:  "Cytosol",
    26:  "Cytoplasmic bodies",
    27:  "Rods & rings"
}

In [None]:
import shutil
train_labels = pd.read_csv("./data/full_dev_train.csv")
#output_name = './data/rare_data/fold0/Rods_rings.csv'
save_path = './data/aug_train'
if not os.path.exists(save_path):
    os.makedirs(save_path)
else:
    shutil.rmtree(save_path)
    os.makedirs(save_path)

In [None]:
train_labels.shape[0]

In [None]:
def do_shift_scale_rotate(image, dx=0, dy=0, scale=1, angle=0):
    borderMode = cv2.BORDER_REFLECT_101
    # cv2.BORDER_REFLECT_101  cv2.BORDER_CONSTANT

    height, width = image.shape[:2]
    sx = scale
    sy = scale
    cc = math.cos(angle / 180 * math.pi) * (sx)
    ss = math.sin(angle / 180 * math.pi) * (sy)
    rotate_matrix = np.array([[cc, -ss], [ss, cc]])

    box0 = np.array([[0, 0], [width, 0], [width, height], [0, height], ], np.float32)
    box1 = box0 - np.array([width / 2, height / 2])
    box1 = np.dot(box1, rotate_matrix.T) + np.array([width / 2 + dx, height / 2 + dy])

    box0 = box0.astype(np.float32)
    box1 = box1.astype(np.float32)
    mat = cv2.getPerspectiveTransform(box0, box1)

    image = cv2.warpPerspective(image, mat, (width, height), flags=cv2.INTER_LINEAR,
                                borderMode=borderMode, borderValue=(
        0, 0, 0,))  # cv2.BORDER_CONSTANT, borderValue = (0, 0, 0))  #cv2.BORDER_REFLECT_101
    return image


# https://www.kaggle.com/ori226/data-augmentation-with-elastic-deformations
# https://github.com/letmaik/lensfunpy/blob/master/lensfunpy/util.py
def do_elastic_transform(image, grid=32, distort=0.2):
    borderMode = cv2.BORDER_REFLECT_101
    height, width = image.shape[:2]

    x_step = int(grid)
    xx = np.zeros(width, np.float32)
    prev = 0
    for x in range(0, width, x_step):
        start = x
        end = x + x_step
        if end > width:
            end = width
            cur = width
        else:
            cur = prev + x_step * (1 + np.random.uniform(-distort, distort))

        xx[start:end] = np.linspace(prev, cur, end - start)
        prev = cur

    y_step = int(grid)
    yy = np.zeros(height, np.float32)
    prev = 0
    for y in range(0, height, y_step):
        start = y
        end = y + y_step
        if end > height:
            end = height
            cur = height
        else:
            cur = prev + y_step * (1 + np.random.uniform(-distort, distort))

        yy[start:end] = np.linspace(prev, cur, end - start)
        prev = cur

    # grid
    map_x, map_y = np.meshgrid(xx, yy)
    map_x = map_x.astype(np.float32)
    map_y = map_y.astype(np.float32)

    # image = map_coordinates(image, coords, order=1, mode='reflect').reshape(shape)
    image = cv2.remap(image, map_x, map_y, interpolation=cv2.INTER_LINEAR, borderMode=borderMode,
                      borderValue=(0, 0, 0,))

    return image

###### img = do_shift_scale_rotate(img, dx=0, dy=0, scale=1, angle=np.random.uniform(0, 15))  # 10
###### img = do_elastic_transform(img, grid=10, distort=np.random.uniform(0, 0.15))  # 0.10

In [None]:
def do_flip_transpose(image, type=0):
    # choose one of the 8 cases

    if type == 0:  # rotate90
        image = image.transpose(1, 0, 2)
        image = cv2.flip(image, 1)

    if type == 1:  # rotate180
        image = cv2.flip(image, -1)

    if type == 2:  # rotate270
        image = image.transpose(1, 0, 2)
        image = cv2.flip(image, 0)

    if type == 3:  # flip left-right
        image = cv2.flip(image, 1)

    if type == 4:  # flip up-down
        image = cv2.flip(image, 0)

    if type == 5:
        image = cv2.flip(image, 1)
        image = image.transpose(1, 0, 2)
        image = cv2.flip(image, 1)

    if type == 6:
        image = cv2.flip(image, 0)
        image = image.transpose(1, 0, 2)
        image = cv2.flip(image, 1)

    return image

In [None]:
def fill_targets(row):
    tmp = np.array(row.Target.split(" ")).astype(np.int)
    for num in tmp:
        name = label_names[int(num)]
        row.loc[name] = 1
    return row

for key in label_names.keys():
    train_labels[label_names[key]] = 0

train_labels = train_labels.apply(fill_targets, axis=1)
train_labels.head()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

target_counts = train_labels.drop(["Id", "Target"],axis=1).sum(axis=0).sort_values(ascending=False)
plt.figure(figsize=(15,15))
sns.barplot(y=target_counts.index.values, x=target_counts.values, order=target_counts.index)

In [None]:
target_counts

In [None]:
def augmentor(img_path, img_id, save_path, times):
    colors = ['red','green','blue','yellow']
    flags = cv2.IMREAD_GRAYSCALE
    rgba = []
    for color in colors:        
        img = cv2.imread(os.path.join(img_path, img_id + '_' + color + '.png'), flags)
        rgba.append(img)
    rgba = np.stack(rgba).transpose(1, 2, 0)
    rgba = do_shift_scale_rotate(rgba, dx=0, dy=0, scale=1, angle=np.random.uniform(0, 1))  # 10
    rgba = do_elastic_transform(rgba, grid=10, distort=np.random.uniform(0, 0.1))  # 0.10
    c_ = np.random.choice(7)
    rgba = do_flip_transpose(rgba, type=c_)
    new_id = img_id + '_' + str(times)
    cv2.imwrite(os.path.join(save_path, new_id + '_' + colors[0]+'.png'), rgba[:, :, 0])
    cv2.imwrite(os.path.join(save_path, new_id + '_' + colors[1]+'.png'), rgba[:, :, 1])
    cv2.imwrite(os.path.join(save_path, new_id + '_' + colors[2]+'.png'), rgba[:, :, 2])
    cv2.imwrite(os.path.join(save_path, new_id + '_' + colors[3]+'.png'), rgba[:, :, 3])
    return new_id

In [None]:
import math
total_id = []
img_path = './data/full_train/'
for name in ['Mitotic spindle','Cytoplasmic bodies','Aggresome','Lipid droplets','Endosomes','Lysosomes','Peroxisomes','Rods & rings','Microtubule ends']:
    labels = train_labels.set_index(name)
    img_id = labels.loc[1]['Id']
    img_id_list = [tmp for tmp in img_id]
    lens = len(img_id_list)
    print(lens)
    if lens>800 and lens<1000:
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=0)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=1)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=2)
            total_id.append(new_id)
    elif lens>400 and lens<800:
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=0)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=1)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=2)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=3)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=4)
            total_id.append(new_id)
    elif lens>300 and lens<400:
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=0)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=1)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=2)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=3)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=4)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=5)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=6)
            total_id.append(new_id)
    elif lens>100 and lens<300:
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=0)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=1)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=2)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=3)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=4)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=5)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=6)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=7)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=8)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=9)
            total_id.append(new_id)
    elif lens<100:
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=0)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=1)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=2)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=3)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=4)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=5)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=6)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=7)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=8)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=9)
            total_id.append(new_id)        
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=10)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=11)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=12)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=13)
            total_id.append(new_id)
        for id_ in img_id_list:
            new_id = augmentor(img_path, id_, save_path, times=14)
            total_id.append(new_id)

print(len(total_id))

In [None]:
colors = ['red','green','blue','yellow']
flags = cv2.IMREAD_GRAYSCALE
rgba1 = []
rgba2 = []
img_id = ''
for color in colors:        
    img1 = cv2.imread(os.path.join(img_path, img_id + '_' + color + '.png'), flags)
    rgba1.append(img1)
rgba1 = np.stack(rgba1).transpose(1, 2, 0)
for color in colors:        
    img2 = cv2.imread(os.path.join(save_path, img_id + '_' + str(0) + '_' + color+'.png'), flags)
    rgba2.append(img2)
rgba2 = np.stack(rgba2).transpose(1, 2, 0)
import matplotlib.pyplot as plt
fig, axs = plt.subplots(2,4, figsize=(20,20))
axs[0, 0].imshow(rgba1[:,:,0], cmap='Reds')
axs[0, 1].imshow(rgba1[:,:,1], cmap='Greens')
axs[0, 2].imshow(rgba1[:,:,2], cmap='Blues')
axs[0, 3].imshow(rgba1[:,:,3], cmap='Oranges')
axs[1, 0].imshow(rgba2[:,:,0], cmap='Reds')
axs[1, 1].imshow(rgba2[:,:,1], cmap='Greens')
axs[1, 2].imshow(rgba2[:,:,2], cmap='Blues')
axs[1, 3].imshow(rgba2[:,:,3], cmap='Oranges')

In [None]:
labels = train_labels.set_index('Id')
labels.index
labels.loc['00070df0-bbc3-11e8-b2bc-ac1f6b6435d0']

In [None]:
for i, index in enumerate(total_id):
    if i > 25000:
        break

    if int(index.split('_')[-1]) > 9:
        print(index)
        print(index.split('_')[-1])
        print(index[:-3])
    if int(index.split('_')[-1]) <= 9:
        print(index)
        print(index.split('_')[-1])
        print(index[:-2])

In [None]:
total = pd.DataFrame(total_id)
total.to_csv('./data/total.csv', index=False)

In [None]:
labels = labels.drop_duplicates()

In [None]:
new_target = []
for index in total_id:
    if int(index.split('_')[-1]) > 9:
        t = index[:-3]
        print(index)
        print(t)
        target = labels.loc[t].Target
        print(target)
#         new_target.append([index, target])
#     if int(index.split('_')[-1]) <= 9:
#         t = index[:-2]
#         target = labels.loc[t].Target
#         new_target.append([index, target])
# print(new_target)

In [None]:
save_target = './data'
submit = pd.DataFrame(new_target, columns=['Id', 'Target']).astype(str)
submit.to_csv(os.path.join(save_target, 'aug_train.csv'), index=None)

In [None]:
os.listdir('./data/aug_train')

In [None]:
import shutil
from tqdm import tqdm_notebook
for i, id_ in enumerate(tqdm_notebook(os.listdir('./data/aug_train'))):
#     print(i, id_)
    shutil.copyfile('./data/aug_train/' + id_, './data/full_train/' + id_)