In [22]:
#preparing the train directory for keras

import pandas as pd
import shutil
import os
import sys

labels = pd.read_csv('train.csv')

In [23]:
#preparing the train directory for keras create individual subfolder as per snake classes
# Create `train_sep` directory

train_dir = 'train/' #original train data with images
train_sep_dir = 'train_sep/' #create a new train_sep directory with sub folder as classes

if not os.path.exists(train_sep_dir):
    os.mkdir(train_sep_dir)
    
for filename, class_name in labels.values:
    # Create subdirectory with `class_name`
    if not os.path.exists(train_sep_dir + class_name):
        os.mkdir(train_sep_dir + class_name)
    src_path = train_dir + filename + '.jpg'
    dst_path = train_sep_dir + class_name + '/' + filename + '.jpg'
    try:
        shutil.copy(src_path, dst_path)
    except IOError as e:
        print('Unable to copy file {} to {}'
              .format(src_path, dst_path))
    except:
        print('When try copy file {} to {}, unexpected error: {}'
              .format(src_path, dst_path, sys.exc_info()))


In [24]:
#create a dictionary of class and lable count
res_dct = dict(zip(labels.breed.value_counts().index, labels.breed.value_counts()))
print (res_dct)

{'thamnophis-sirtalis': 509, 'storeria-dekayi': 412, 'pantherophis-obsoletus': 366, 'crotalus-atrox': 354, 'pituophis-catenifer': 298, 'nerodia-sipedon': 259, 'agkistrodon-contortrix': 206, 'lampropeltis-triangulum': 197, 'diadophis-punctatus': 184, 'pantherophis-alleghaniensis': 172, 'nerodia-erythrogaster': 171, 'lampropeltis-californiae': 166, 'opheodrys-aestivus': 152, 'agkistrodon-piscivorus': 147, 'crotalus-horridus': 142, 'coluber-constrictor': 129, 'heterodon-platirhinos': 118, 'thamnophis-proximus': 112, 'nerodia-rhombifer': 104, 'thamnophis-radix': 100, 'pantherophis-guttatus': 100, 'haldea-striatula': 98, 'pantherophis-spiloides': 92, 'rhinocheilus-lecontei': 91, 'storeria-occipitomaculata': 87, 'crotalus-viridis': 83, 'masticophis-flagellum': 83, 'thamnophis-marcianus': 82, 'crotalus-ruber': 77, 'nerodia-fasciata': 74, 'pantherophis-vulpinus': 73, 'natrix-natrix': 72, 'thamnophis-elegans': 69, 'pantherophis-emoryi': 66, 'crotalus-scutulatus': 63}


In [25]:
#load the library 
import cv2
import numpy as np
from skimage import io 
from skimage.transform import rotate, AffineTransform, warp
import matplotlib.pyplot as plt
import random
from skimage import img_as_ubyte
import os
from skimage.util import random_noise

In [26]:
#Lets define functions for each operation
def anticlockwise_rotation(image):
    angle= random.randint(0,180)
    return rotate(image, angle)

def clockwise_rotation(image):
    angle= random.randint(0,180)
    return rotate(image, -angle)

def h_flip(image):
    return  np.fliplr(image)

def v_flip(image):
    return np.flipud(image)

def add_noise(image):
    return random_noise(image)

def blur_image(image):
    return cv2.GaussianBlur(image, (9,9),0)

#I would not recommend warp_shifting, because it distorts image, but can be used in many use case like 
#classifying blur and non-blur images
def warp_shift(image): 
    transform = AffineTransform(translation=(0,40))  #chose x,y values according to your convinience
    warp_image = warp(image, transform, mode="wrap")
    return warp_image

In [29]:
transformations = {'rotate anticlockwise': anticlockwise_rotation,
                   'rotate clockwise': clockwise_rotation,
                   'horizontal flip': h_flip, 
                   'vertical flip': v_flip,
                   'warp shift': warp_shift,
                   'adding noise': add_noise,
                   'blurring image':blur_image
                 }                #use dictionary to store names of functions 

In [31]:
#create new augmented images to balance all the classes.

for keys in res_dct:
    images_path="./train_sep/" + keys #path to original images
    augmented_path="./train_sep/" + keys # path to store aumented images
    images=[] # to store paths of images from folder

    if not os.path.exists(augmented_path):
        os.mkdir(augmented_path)

    for im in os.listdir(images_path):  # read image name from folder and append its path into "images" array     
        images.append(os.path.join(images_path,im))

    images_to_generate=509-int(res_dct[keys])  #you can change this value according to your requirement
    i=1                        # variable to iterate till images_to_generate

    while i<=images_to_generate:    
        image=random.choice(images)
        original_image = io.imread(image)
        transformed_image=None
    #     print(i)
        n = 0       #variable to iterate till number of transformation to apply
        transformation_count = random.randint(1, len(transformations)) #choose random number of transformation to apply on the image

        while n <= transformation_count:
            key = random.choice(list(transformations)) #randomly choosing method to call
            transformed_image = transformations[key](original_image)
            n = n + 1

        new_image_path= "%s/%s_%s.jpg" %(augmented_path,keys, i)
        transformed_image = img_as_ubyte(transformed_image)  #Convert an image to unsigned byte format, with values in [0, 255].
        transformed_image=cv2.cvtColor(transformed_image, cv2.COLOR_BGR2RGB) #convert image to RGB before saving it
        cv2.imwrite(new_image_path, transformed_image) # save transformed image to path
        i =i+1


In [35]:
#create new data-train-new.csv 
import os
import pandas as pd
df = pd.DataFrame()

BASE_DIR = './train_sep'
rows = []
for folder in os.listdir(BASE_DIR):
 for images in os.listdir(os.path.join(BASE_DIR,folder)):
    rows.append([images, folder])

df = pd.DataFrame(rows, columns=["image_id", "breed"])
print(df)
df.to_csv('train_new.csv', index=None)

                            image_id                  breed
0                     85090e4cc9.jpg      crotalus-horridus
1          crotalus-horridus_327.jpg      crotalus-horridus
2          crotalus-horridus_309.jpg      crotalus-horridus
3                     952eeb79e2.jpg      crotalus-horridus
4                     0a55e138dd.jpg      crotalus-horridus
...                              ...                    ...
17810  pantherophis-vulpinus_387.jpg  pantherophis-vulpinus
17811  pantherophis-vulpinus_392.jpg  pantherophis-vulpinus
17812  pantherophis-vulpinus_430.jpg  pantherophis-vulpinus
17813  pantherophis-vulpinus_159.jpg  pantherophis-vulpinus
17814  pantherophis-vulpinus_396.jpg  pantherophis-vulpinus

[17815 rows x 2 columns]


In [33]:
df.breed.value_counts()

lampropeltis-triangulum        509
crotalus-atrox                 509
thamnophis-radix               509
storeria-occipitomaculata      509
natrix-natrix                  509
pantherophis-guttatus          509
nerodia-rhombifer              509
agkistrodon-piscivorus         509
lampropeltis-californiae       509
rhinocheilus-lecontei          509
opheodrys-aestivus             509
crotalus-viridis               509
pantherophis-vulpinus          509
crotalus-ruber                 509
haldea-striatula               509
pantherophis-spiloides         509
diadophis-punctatus            509
pituophis-catenifer            509
thamnophis-proximus            509
crotalus-horridus              509
nerodia-fasciata               509
masticophis-flagellum          509
nerodia-erythrogaster          509
coluber-constrictor            509
pantherophis-alleghaniensis    509
thamnophis-elegans             509
crotalus-scutulatus            509
thamnophis-sirtalis            509
nerodia-sipedon     

In [34]:
df

Unnamed: 0,image_id,breed
0,85090e4cc9.jpg,crotalus-horridus
1,crotalus-horridus_327.jpg,crotalus-horridus
2,crotalus-horridus_309.jpg,crotalus-horridus
3,952eeb79e2.jpg,crotalus-horridus
4,0a55e138dd.jpg,crotalus-horridus
...,...,...
17810,pantherophis-vulpinus_387.jpg,pantherophis-vulpinus
17811,pantherophis-vulpinus_392.jpg,pantherophis-vulpinus
17812,pantherophis-vulpinus_430.jpg,pantherophis-vulpinus
17813,pantherophis-vulpinus_159.jpg,pantherophis-vulpinus


In [39]:
 #create new data-train-new.csv 
import os
import pandas as pd
df = pd.DataFrame()

Source_DIR = './train_sep/'
Target_DIR = './train_new/'
rows = []
for folder in os.listdir(Source_DIR):
    for images in os.listdir(os.path.join(Source_DIR,folder)):
        shutil.copy(Source_DIR+folder+"/"+images, Target_DIR+images)
