In [1]:
import numpy as np 
import pandas as pd 
import os
import matplotlib.pyplot as plt
import matplotlib.image as mplimg
from matplotlib.pyplot import imshow

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

from keras import layers
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
from keras.layers import Input, Dense, Activation, BatchNormalization, Flatten, Conv2D, Convolution2D, ZeroPadding2D, MaxPooling2D
from keras.layers import AveragePooling2D, MaxPooling2D, Dropout, MaxPool2D
from keras.models import Model
from keras.models import load_model

import keras.backend as K
from keras.models import Sequential

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import gc

from keras.callbacks import ReduceLROnPlateau
from PIL import Image

import keras

from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier


from keras.callbacks import EarlyStopping


np.random.seed(2)


import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from sklearn.utils import shuffle


from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img, save_img


Using TensorFlow backend.


## Pre-processing data / agumment data

In [2]:
# Reading our train data
train_df = pd.read_csv("./train.csv")

### Collecting top 10 whales that are not new_whale, setting to new_df

In [3]:
# collecting whales that are not new_whale
whale_df = train_df[train_df.Id != 'new_whale']
# getting top ten whales
top_ten = whale_df["Id"].value_counts().head(10)
print(top_ten)

# making a new df with top 10
columns = ['Image', 'Id']
new_df = pd.DataFrame(columns=columns)

for i in range(len(train_df['Id'])):
    if train_df['Id'].loc[i] in top_ten:
        new_df.loc[i] = (train_df['Image'].loc[i], train_df['Id'].loc[i])
new_df[:2]

w_23a388d    73
w_9b5109b    65
w_9c506f6    62
w_0369a5c    61
w_700ebb4    57
w_3de579a    54
w_564a34b    51
w_fd3e556    50
w_88e4537    49
w_2b069ba    48
Name: Id, dtype: int64


Unnamed: 0,Image,Id
26,004e8ad5b.jpg,w_3de579a
32,00570db6b.jpg,w_9c506f6


### Splitting df so we can have normal inputs for images.

In [4]:
X_train, X_test, y_train, y_test = train_test_split(new_df['Image'], new_df['Id'], test_size=0.20)

print("Shape of X_train: ", X_train.shape[0])
print("Shape of X_test: ", X_test.shape[0])
print("Shape of y_train: ", y_train.shape[0])
print("Shape of y_test: ", y_test.shape[0])
print(X_train[:2])
print(y_train[:2])

Shape of X_train:  456
Shape of X_test:  114
Shape of y_train:  456
Shape of y_test:  114
25035    fc5c296f8.jpg
19678    c669a3706.jpg
Name: Image, dtype: object
25035    w_0369a5c
19678    w_700ebb4
Name: Id, dtype: object


Our image data generator ( agumentation )

In [None]:
gen = ImageDataGenerator(rotation_range=10, width_shift_range=0.1,
                         height_shift_range=0.1, shear_range=0.15,
                        zoom_range=0.1, channel_shift_range=10,
                        horizontal_flip=True)


Processing every image from data set and resizing / agumentating

In [None]:
def processImages(data, dataset):
    i = 0
    for fig in data['Image']:
        img = image.load_img("./input/"+dataset+"/"+fig, target_size=(224, 224, 3))
        img_arr = image.img_to_array(img)
        image.save_img('./processed/224_224_0_'+fig, img_arr)
        temp_image = np.expand_dims(mpimg.imread('./processed/224_224_0_'+fig), 0)
        aug_iter = gen.flow(temp_image)
        aug_images = [next(aug_iter)[0].astype(np.uint8) for i in range(8)]
        for x in range(len(aug_images)):
            image.save_img('./processed/224_224_'+str(x+1)+'_'+fig, aug_images[x])
        if (i%100 == 0):
            print("Processing image: ", i+1, ", ", fig)
        i += 1
        

In [None]:
processImages(new_df, 'train')

In [5]:
def prepareImages(data, m, type_of_data):
    
    print("Preparing images")
    
    if type_of_data == 'training':
        X = np.zeros((m*6, 224, 224, 3))
        count = 0
        for fig in data:
            #load images into images of size 100x100x3
            for i in range(6):
                img = image.load_img('./processed/224_224_' + str(i) + '_' + fig, target_size=(224, 224, 3))
                x = image.img_to_array(img)
                x = preprocess_input(x)
                X[count] = x
                count += 1
                if (count%100 == 0):
                    print("Processing image: ", count+1, ", ", fig)
    elif type_of_data == 'testing':
        X = np.zeros((m, 224, 224, 3))
        count = 0
        for fig in data:
            img = image.load_img('./processed/224_224_0_' + fig, target_size=(224, 224, 3))
            x = image.img_to_array(img)
            x = preprocess_input(x)
            X[count] = x
            count += 1
            if (count%50 == 0):
                print("Processing image: ", count+1, ", ", fig)    
    return X

def prepare_labels(y):
    values = np.array(y)
    label_encoder = LabelEncoder()
    integer_encoded = label_encoder.fit_transform(values)
    # print(integer_encoded)

    onehot_encoder = OneHotEncoder(sparse=False)
    integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
    onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
    # print(onehot_encoded)

    y = onehot_encoded
    # print(y.shape)
    return y, label_encoder

In [6]:
X_train_processed = prepareImages(X_train, X_train.shape[0], 'training')
X_test_processed = prepareImages(X_test, X_test.shape[0], 'testing')
X_train_processed /= 255
X_test_processed /= 255

# X = prepareImages(new_df['Image'], new_df['Image'].shape[0], 'training')
# X /= 255

Preparing images
Processing image:  101 ,  36ef0f133.jpg
Processing image:  201 ,  aa5dc4076.jpg
Processing image:  301 ,  ba50a7085.jpg
Processing image:  401 ,  9f6cc357a.jpg
Processing image:  501 ,  258679fcf.jpg
Processing image:  601 ,  e6bfde781.jpg
Processing image:  701 ,  d4dd371d1.jpg
Processing image:  801 ,  becb0e860.jpg
Processing image:  901 ,  5336ac99b.jpg
Processing image:  1001 ,  54f1d85a8.jpg
Processing image:  1101 ,  c79ce1999.jpg
Processing image:  1201 ,  317a126f4.jpg
Processing image:  1301 ,  c4e291ada.jpg
Processing image:  1401 ,  24100a9fc.jpg
Processing image:  1501 ,  188b04c45.jpg
Processing image:  1601 ,  b3a043eaf.jpg
Processing image:  1701 ,  13a7495d5.jpg
Processing image:  1801 ,  471e75940.jpg
Processing image:  1901 ,  89afd1952.jpg
Processing image:  2001 ,  462152c16.jpg
Processing image:  2101 ,  a7e25cad5.jpg
Processing image:  2201 ,  c5cc80b0c.jpg
Processing image:  2301 ,  beb52d9f4.jpg
Processing image:  2401 ,  1eef410a0.jpg
Processi

In [7]:
print('X_train Image count: ', X_train_processed.shape)
print('X_test Image count: ', X_test_processed.shape)
# print('X_test Image count: ', X.shape)

X_train Image count:  (2736, 224, 224, 3)
X_test Image count:  (114, 224, 224, 3)


In [8]:
# Setting up our y.
new_y_array = []
for x in y_train:
    for y in range(6):
        new_y_array.append(x)

y_train_processed, label_encorder = prepare_labels(new_y_array)
y_test_processed, label_encorder = prepare_labels(y_test)

# y, label_encorder = prepare_labels(new_y_array)

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.
In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [9]:
print('Y train processed shape: ', y_train_processed.shape)
print('Y test processed shape: ', y_test_processed.shape)
# print("Y shape: ", y.shape)

Y train processed shape:  (2736, 10)
Y test processed shape:  (114, 10)


In [10]:
# X_train_processed_s, y_train_processed_s = shuffle(X_train_processed, y_train_processed, random_state=0)
# X_test_processed_s, y_test_processed_s = shuffle(X_test_processed, y_test_processed, random_state=0)

X_train_p_n, X_test_p_n, y_train_p_n, y_test_p_n = train_test_split(X_train_processed, y_train_processed, test_size=0)

# print("X_train_p_n shape: ", X_train_p_n.shape)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

# Importing and using Pre-Trained Models

### VGG_16

In [11]:
vgg16_model = keras.applications.vgg16.VGG16()

Instructions for updating:
Colocations handled automatically by placer.


In [12]:
vgg16_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [13]:
model_vgg_16 = Sequential()
# model.add()
for layer in vgg16_model.layers[:-1]:
    model_vgg_16.add(layer)
    
for layer in model_vgg_16.layers:
    layer.trainable = False
    
model_vgg_16.add(Dense(10, activation='softmax'))

model_vgg_16.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
__________

In [14]:
model_vgg_16.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [15]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')

history = model_vgg_16.fit(X_train_p_n, y_train_p_n, batch_size=64, epochs=125, verbose = 1, callbacks=[early_stopping])

Instructions for updating:
Use tf.cast instead.
Epoch 1/125




Epoch 2/125
Epoch 3/125
Epoch 4/125
Epoch 5/125
Epoch 6/125
Epoch 7/125
Epoch 8/125
Epoch 9/125
Epoch 10/125
Epoch 11/125
Epoch 12/125
Epoch 13/125
Epoch 14/125
Epoch 15/125
Epoch 16/125
Epoch 17/125
Epoch 18/125
Epoch 19/125
Epoch 20/125
Epoch 21/125
Epoch 22/125
Epoch 23/125
Epoch 24/125
Epoch 25/125
Epoch 26/125
Epoch 27/125
Epoch 28/125
Epoch 29/125
Epoch 30/125
Epoch 31/125
Epoch 32/125
Epoch 33/125
Epoch 34/125
Epoch 35/125
Epoch 36/125
Epoch 37/125
Epoch 38/125
Epoch 39/125
Epoch 40/125
Epoch 41/125
Epoch 42/125
Epoch 43/125
Epoch 44/125
Epoch 45/125
Epoch 46/125
Epoch 47/125
Epoch 48/125
Epoch 49/125
Epoch 50/125
Epoch 51/125
Epoch 52/125
Epoch 53/125
Epoch 54/125
Epoch 55/125
Epoch 56/125
Epoch 57/125
Epoch 58/125
Epoch 59/125
Epoch 60/125
Epoch 61/125
Epoch 62/125
Epoch 63/125
Epoch 64/125
Epoch 65/125
Epoch 66/125
Epoch 67/125
Epoch 68/125
Epoch 69/125
Epoch 70/125
Epoch 71/125
Epoch 72/125
Epoch 73/125
Epoch 74/125
Epoch 75/125
Epoch 76/125
Epoch 77/125
Epoch 78/125
Epoch 7

Epoch 84/125
Epoch 85/125
Epoch 86/125
Epoch 87/125
Epoch 88/125
Epoch 89/125
Epoch 90/125
Epoch 91/125
Epoch 92/125
Epoch 93/125
Epoch 94/125
Epoch 95/125
Epoch 96/125
 512/2736 [====>.........................] - ETA: 24s - loss: 0.1783 - acc: 0.9629

KeyboardInterrupt: 

In [16]:
score_vgg16 = model_vgg_16.evaluate(X_test_processed, y_test_processed, verbose=1)
print('The accuracy is :', score_vgg16[1])

The accuracy is : 0.7719298224700125


### Saving model

In [20]:
model_vgg_16.save("vgg_16_agumented_help.h5")