Generate new images using CutMix(A beginner guide for other beginners)

Import needed packages

In [None]:
import os
import random
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from PIL import Image

import tensorflow as tf


Original paper : https://arxiv.org/pdf/1905.04899.pdf

CutMix augmentation strategy: patches are cut and pasted among training images where the ground truth labels are also mixed proportionally to the area of the patches. 

In [None]:
def generate_images(imgPath1,imgPath2,label1, label2, crop_size=256):
       
    img1 = Image.open(imgPath1)
    img1 = np.array(img1)
    img1_height, img1_width = img1.shape[:-1]
    
    img2 = Image.open(imgPath2)
    img2 = np.array(img2)
    img2_height, img2_width = img2.shape[:-1]
    
    
    x1 = random.randint(0,img1_height-crop_size)
    y1 = random.randint(0,img1_width-crop_size)
    

    x2 = random.randint(0,img2_height-crop_size)
    y2 = random.randint(0,img2_width-crop_size)
    

    cropped1 = img1.copy()[x1:x1+crop_size , y1:y1+crop_size,:]
    
    cropped2 = img2.copy()[x2:x2+crop_size , y2:y2+crop_size,:]
        
        
    img1[x1:x1+crop_size , y1:y1+crop_size,:] = cropped2
    img2[x2:x2+crop_size , y2:y2+crop_size,:] = cropped1
    
    alpha1 = (crop_size * crop_size) / (img1_height * img1_width)
    beta1 = 1 - alpha1
    
    alpha2 = (crop_size * crop_size) / (img2_height * img2_width)
    beta2 = 1 - alpha2
    
    newlabel1 = beta1 * label1 + alpha1 * label2
    
    newlabel2 = alpha2 * label1 + beta2 * label2
    
    return  img1, img2, newlabel1, newlabel2

In [None]:
imgpath1 = "../input/cassava-leaf-disease-classification/train_images/1000015157.jpg"
imgpath2 = "../input/cassava-leaf-disease-classification/train_images/1002394761.jpg"

#these labels are just some example, they doesn't reflect these images
l1 = np.array([0, 0, 0, 0, 1])
l2 = np.array([0, 0, 0, 1, 0])

img1, img2, l1, l2 = generate_images(imgpath1, imgpath2,l1, l2)

plt.figure(figsize = (20, 20))
plt.subplot(2, 1, 1)
plt.imshow(img1)

plt.subplot(2, 1, 2)
plt.imshow(img2)


In [None]:
print("New labels are: ")
print(l1, l2, sep = "\n")

***Or we can apply Cutout to images***

In [None]:
def cutout(imagePath, num_holes, hole_size, rescale_to = 512):
    
    img = Image.open(imgPath)
    img = np.array(img)
    img_height, img_width = img.shape[:-1]
    
    for i in range(num_holes):    
        x = random.randint(0,img_height-hole_size)
        y = random.randint(0,img_width-hole_size)
        img[x:x+hole_size , y:y+hole_size,:] = 0
        
    #random crop a patch of rescale_to x rescale_to    
    x = random.randint(0,img_height-rescale_to)
    y = random.randint(0,img_width-rescale_to)
    return  img[x:x+rescale_to , y:y+rescale_to,:]
    
    
    

In [None]:
imgPath = "../input/cassava-leaf-disease-classification/train_images/1000015157.jpg"
cutoutImg = cutout(imgPath, 30, 20)
plt.imshow(cutoutImg)

In [None]:
cutoutImg = cutout(imgPath, 5 , 80)
plt.imshow(cutoutImg)

In [None]:
IMAGES = "../input/cassava-leaf-disease-classification/train_images/"

df = pd.read_csv("../input/cassava-leaf-disease-classification/train.csv")
df['path'] = IMAGES + df['image_id']

df = df.sample(frac = 1) 
df.head()

**Get the labels**

In [None]:
labels = tf.keras.utils.to_categorical(df['label'])

Make a new directory, to save images in it

In [None]:
!mkdir CutMixImages

Let's generate some data

In [None]:
newlabels = []
newpaths  = []
id = 0

# generate just 10 images in this case
for i in range(0, 10, 2):
    
    l1 = labels[i]
    l2 = labels[i+1]
    
    img1, img2, l1, l2 = generate_images(df['path'][i],df['path'][i+1],l1, l2)

    id += 1 
    img1 = Image.fromarray(np.uint8(img1)).convert('RGB')
    img1.save("./CutMixImages/image"+str(id)+".jpg")
    newpaths.append("image"+str(id)+".jpg")  
    
    id += 1
    img2 = Image.fromarray(np.uint8(img2)).convert('RGB')
    img2.save("./CutMixImages/image"+str(id)+".jpg")
    newpaths.append("image"+str(id)+".jpg")  
    
    newlabels.append(l1)
    newlabels.append(l2)    
    

**Visualize the new generated images**

In [None]:
plt.figure(figsize = (20, 20))
image = Image.open("./CutMixImages/image2.jpg")
plt.subplot(2, 1, 1)
plt.imshow(image)

image = Image.open("./CutMixImages/image1.jpg")
plt.subplot(2, 1, 2)
plt.imshow(image)

In [None]:
generated_data = pd.DataFrame(columns = ["path", "l1", "l2", "l3", "l4", "l5"])

In [None]:
generated_data['path'] = newpaths
generated_data['l1'] = [label[0] for label in newlabels]
generated_data['l2'] = [label[1] for label in newlabels]
generated_data['l3'] = [label[2] for label in newlabels]
generated_data['l4'] = [label[3] for label in newlabels]
generated_data['l5'] = [label[4] for label in newlabels]

generated_data.head()

In [None]:
import shutil
shutil.make_archive("CutMixImages", 'zip', "/kaggle/working/CutMixImages/")


Download the zip archive and the csv 

In [None]:
from IPython.display import FileLink
FileLink('./CutMixImages.zip')

In [None]:
generated_data.to_csv("newdata.csv", index=False)