In [49]:
import os
import cv2
import glob
import h5py
import shutil
import imgaug as aug
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import imgaug.augmenters as iaa
from os import listdir, makedirs, getcwd, remove
from os.path import isfile, join, abspath, exists, isdir, expanduser
from pathlib import Path
from skimage.io import imread
from skimage.transform import resize
from keras.models import Sequential, Model, load_model
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten,Activation
from keras.models import Sequential
from keras.optimizers import Adam, SGD, RMSprop
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from mlxtend.plotting import plot_confusion_matrix
from sklearn.metrics import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from PIL import Image
import PIL
import scipy as sp 
import scipy.ndimage as spi
%matplotlib inline

color = sns.color_palette()
%matplotlib inline
%config InlineBackend.figure_format="svg"

import tensorflow as tf

In [2]:
# Set the seed for hash based operations in python
os.environ['PYTHONHASHSEED'] = '0'

seed=1234

# Set the numpy seed
np.random.seed(seed)

# Set the random seed in tensorflow at graph level
tf.set_random_seed(seed)

# Make the augmentation sequence deterministic
aug.seed(seed)

In [3]:
training_data = Path('training') 
validation_data = Path('validation') 
labels_path = Path('monkey_labels.txt')

In [4]:
monkey_labels = []

# Read the file
lines = labels_path.read_text().strip().splitlines()[1:]

In [5]:
lines

['n0   , alouatta_palliata\t , mantled_howler                , 131          , 26',
 'n1   , erythrocebus_patas\t , patas_monkey                  , 139          , 28',
 'n2   , cacajao_calvus\t     , bald_uakari                   , 137          , 27',
 'n3   , macaca_fuscata\t     , japanese_macaque              , 152          , 30',
 'n4   , cebuella_pygmea\t     , pygmy_marmoset                , 131          , 26',
 'n5   , cebus_capucinus\t     , white_headed_capuchin         , 141          , 28',
 'n6   , mico_argentatus\t     , silvery_marmoset              , 132          , 26',
 'n7   , saimiri_sciureus\t     , common_squirrel_monkey        , 142          , 28',
 'n8   , aotus_nigriceps\t     , black_headed_night_monkey     , 133          , 27',
 'n9   , trachypithecus_johnii , nilgiri_langur                , 132          , 26']

In [6]:
for line in lines:
    line = line.split(',')
    line = [x.strip(' \n\t\r') for x in line]
    line[3], line[4] = int(line[3]), int(line[4])
    line = tuple(line)
    monkey_labels.append(line)

In [7]:
monkey_labels = pd.DataFrame(monkey_labels, columns=['Label', 'Latin Name', 'Common Name','Train Images', 'Validation Images'], index=None)


In [29]:
monkey_labels.columns

Index(['Label', 'Latin Name', 'Common Name', 'Train Images',
       'Validation Images'],
      dtype='object')

In [34]:
labels=pd.DataFrame()
labels["id"] = monkey_labels["Label"].str.strip()
labels["name"] = monkey_labels["Common Name"].str.strip()
labels

Unnamed: 0,id,name
0,n0,mantled_howler
1,n1,patas_monkey
2,n2,bald_uakari
3,n3,japanese_macaque
4,n4,pygmy_marmoset
5,n5,white_headed_capuchin
6,n6,silvery_marmoset
7,n7,common_squirrel_monkey
8,n8,black_headed_night_monkey
9,n9,nilgiri_langur


In [36]:
# Create a dictionary to map the labels to integers
m_id= labels["id"]
m_id

0    n0
1    n1
2    n2
3    n3
4    n4
5    n5
6    n6
7    n7
8    n8
9    n9
Name: id, dtype: object

In [37]:
entries = os.listdir(training_data)
for entry in entries:
    for f in glob.glob(os.path.join(os.path.join(training_data,entry),"*.jpg")):
        W = 250.
        oriimg = cv2.imread(f,cv2.IMREAD_COLOR)
        depth = oriimg.shape
        imgScale = W/250
        newX,newY = 250*imgScale, 250*imgScale
        newimg = cv2.resize(oriimg,(int(newX),int(newY)))
        cv2.imwrite(f,newimg)        

entries2 = os.listdir(validation_data)

for entry in entries2:
    for f in glob.glob(os.path.join(os.path.join(validation_data,entry),"*.jpg")):
        W = 250.
        oriimg = cv2.imread(f,cv2.IMREAD_COLOR)
        depth = oriimg.shape
        imgScale = W/250
        newX,newY = 250*imgScale, 250*imgScale
        newimg = cv2.resize(oriimg,(int(newX),int(newY)))
        cv2.imwrite(f,newimg)
        
        

In [51]:
# This function prepares a random batch from the dataset
def load_batch(dataset_df, batch_size = 25):
    batch_df = dataset_df.loc[np.random.permutation(np.arange(0,
                                                              len(dataset_df)))[:batch_size],:]
    return batch_df

In [None]:
# This function plots sample images in specified size and in defined grid
def plot_batch(images_df, grid_width, grid_height, im_scale_x, im_scale_y):
    f, ax = plt.subplots(grid_width, grid_height)
    f.set_size_inches(12, 12)
    
    img_idx = 0
    for i in range(0, grid_width):
        for j in range(0, grid_height):
            ax[i][j].axis('off')
            ax[i][j].set_title(images_df.iloc[img_idx]['breed'][:10])
            ax[i][j].imshow(sp.misc.imresize(spi.imread(os.path.join(training_data,rand(m_id)) + images_df.iloc[img_idx]['id']+'.jpg'),
                                             (im_scale_x,im_scale_y)))
            img_idx += 1
            
    plt.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0.25)

# load dataset and visualize sample data
dataset_df = pd.read_csv(LABEL_PATH)
batch_df = load_batch(dataset_df, batch_size=36)
plot_batch(batch_df, grid_width=6, grid_height=6,
           im_scale_x=64, im_scale_y=64)

In [38]:
image_width=250
image_height=250
batch_size= 16


In [41]:
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   fill_mode='nearest')

In [42]:
validation_datagen = ImageDataGenerator(rescale=1./255)

In [45]:
train_generator = train_datagen.flow_from_directory(training_data, 
                                                    target_size=(image_width, image_height), 
                                                    batch_size = batch_size, 
                                                    shuffle=True, # By shuffling the images we add some randomness and prevent overfitting
                                                    class_mode="categorical")

Found 1096 images belonging to 10 classes.


In [46]:
validation_generator = validation_datagen.flow_from_directory(validation_data, 
                                                    target_size=(image_width, image_height), 
                                                    batch_size = batch_size, 
                                                    shuffle=True,
                                                    class_mode="categorical")

Found 272 images belonging to 10 classes.


In [47]:
training_samples = 1097
validation_samples = 272
total_steps = training_samples // batch_size

In [48]:
model = VGG16(weights='imagenet', include_top=False, input_shape=(image_width, image_height, 3), pooling="max")

Instructions for updating:
Colocations handled automatically by placer.
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
