In [1]:
import os
import random
from scipy import ndarray
import pandas as pd

In [2]:
# image processing library
import skimage as sk
from skimage import transform
from skimage import util
from skimage import io
import numpy as np

In [3]:
def random_rotation(image_array: ndarray):
    # pick a random degree of rotation between 25% on the left and 25% on the right
    random_degree = random.uniform(-25, 25)
    return sk.transform.rotate(image_array, random_degree)

def random_noise(image_array: ndarray):
    # add random noise to the image
    return sk.util.random_noise(image_array)

def horizontal_flip(image_array: ndarray):
    # horizontal flip doesn't need skimage, it's easy as flipping the image array of pixels !
    return image_array[:, ::-1]

def gray_scale(image_array: ndarray):
    
    return sk.color.rgb2gray(image_array)

def color_inversion(image_array: ndarray):
  
    return sk.util.invert(image_array)

def rescale_intensity(image_array: ndarray):
    v_min,v_max = np.percentile(image_array , tuple(np.random.uniform(low = 0.2,high=99.8,size=2)))
    return sk.exposure.rescale_intensity(image_array,in_range = (v_min,v_max))

def gamma_adjust(image_array: ndarray):
    
    return sk.exposure.adjust_gamma(image_array,gamma = random.uniform(0,1), gain = random.uniform(.5,1))

def log_adjust(image_array: ndarray):
    
    return sk.exposure.adjust_log(image_array)

def sigmoid_adjust(image_array: ndarray):
    
    return sk.exposure.adjust_sigmoid(image_array)


In [4]:
# dictionary of the transformations we defined earlier
available_transformations = {
    'rotate': random_rotation,
    'noise': random_noise,
    'horizontal_flip': horizontal_flip,
    'color_inversion': color_inversion,
    'rescale_intensity':rescale_intensity,
    'gamma_adjust':gamma_adjust,
    'log_adjust':log_adjust,
    'sigmoid_adjust': sigmoid_adjust
}
global numm
numm = 13001
folder_path = 'train'




In [5]:
def generate(images,num_files_desired,num):
    num_generated_files = 1
    while num_generated_files <= num_files_desired:
        # random image from the folder
        image_path = random.choice(images)
        # read image as an two dimensional array of pixels
        image_to_transform = sk.io.imread(image_path)
        # random num of transformation to apply
        num_transformations_to_apply = random.randint(1, len(available_transformations))

        num_transformations = 0
        transformed_image = None
        while num_transformations <= num_transformations_to_apply:
            # random transformation to apply for a single image
            key = random.choice(list(available_transformations))
            transformed_image = available_transformations[key](image_to_transform)
            num_transformations += 1

        new_file_path = '%s/Img-%s.jpg' % ("new", num)
        num+=1
        # write image to the disk
        io.imsave(new_file_path, transformed_image)
        num_generated_files += 1
    return num

In [6]:
def image_path(key,dataframe):
    image = [folder_path+'/'+file for file , animal in dataframe.values if animal == key]
#     filename = []
#     for file,animal in tqdm(dataframe.items,miniters = 1):
#         if(animal==key):
#             filename.append(file)
            
    return image


In [7]:
# find all files paths from the folder
# images = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
train = pd.read_csv('train.csv')
category = pd.read_csv('category_list.csv')
count = train['Animal'].value_counts()

In [8]:
print(category.head())

   category    animal
0         0  antelope
1         1       bat
2         2    beaver
3         3    bobcat
4         4   buffalo


In [9]:
print(train.head())

    Image_id           Animal
0  Img-1.jpg     hippopotamus
1  Img-2.jpg         squirrel
2  Img-3.jpg     grizzly+bear
3  Img-4.jpg               ox
4  Img-5.jpg  german+shepherd


In [None]:
for i,j in category.values:
    c = count.loc[j]
    print(c)
    if c<1000:
        images = image_path(j,train)
        numm = generate(images,1000-c,numm)