In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import array_to_img
import os
import tensorflow as tf
from keras_preprocessing.image import ImageDataGenerator
import random

# Load Image
## First get familiar with Keras API
Keras provides the load_img() function for loading an image from file as a PIL image object.
img_to_array() and array_to_img() let you convert between PIL image and numpy array

In [None]:
# example of loading an image with the Keras API
#from keras.preprocessing.image import load_img
# load the image
img = load_img('/kaggle/input/dogs-vs-cats-redux-kernels-edition/train/cat.1.jpg')
# report details about the image
print(type(img))
print(img.format)
print(img.mode)
print(img.size)
# show the image
# img.show()
plt.imshow(img)

In [None]:
# example of converting an image with the Keras API
#from keras.preprocessing.image import img_to_array
#from keras.preprocessing.image import array_to_img

# convert to numpy array
img_array = img_to_array(img)
# print(img_array)
print(img_array.dtype)
print(img_array.shape)
# convert back to image
# img_pil = array_to_img(img_array)
print(type(img))

# the image's H:W 280:300, with RBG - 3 color channel

## Ok this works now Let's load in all the image

In [None]:
filenames = os.listdir("/kaggle/input/dogs-vs-cats-redux-kernels-edition/train/")
# filenames
# clearly the target labels are embeded in the filename, Let's parse them out

In [None]:
dog_filenames = []
cat_filenames = []
for i in filenames:
    if i.split('.')[0] == 'dog':
        dog_filenames.append(i)
    else:
        cat_filenames.append(i)

# At first I was using 0,1 to label target, but it throw error when using imagedatagenerator.flow_from_dataframe()
# function -- obviously if you are using 'binary' as class_mode, the target label data type has to be string
# Don't know why
dog_df = pd.DataFrame({'filename':dog_filenames, 'label':'dog'})
cat_df = pd.DataFrame({'filename':cat_filenames, 'label':'cat'})
all_df = cat_df.append(dog_df)
#train_data_array= train_df['filename'].apply(lambda x: load_img_array('dogs-vs-cats/train/'+ x))
#train_label_array = train_df['label'].values
all_df.head()

In [None]:
all_df.label.value_counts()
# perfectly balanced dataset

# Feature Scaling
1.we need to make every pics the same size

2.A best practice in neural network, to scale down the feature range for quicker convergence. 
Since this is a RBG feature, each element (pixel) is from 0 - 255, we can simply rescale by 255

3.Also we need image augmentation - basically means to rotate, flip the original image to create more training images, add some spice on variation, help the model less overfit.
All these can be achieve by Keras ImageDataGenerator() function.
tutorial:[https://medium.com/@vijayabhaskar96/tutorial-on-keras-flow-from-dataframe-1fd4493d237c]
This function also helps you batch loading the data from the directory into memory.

In [None]:
# first Let's hold out 20% of data to be our testing set
# the images in '~/test1/' folder do not have label, we will only be apply best performance model on test dataset
# to get optimal submission score
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(all_df, test_size=0.2, random_state = 42)

In [None]:
test_df.shape


In [None]:
train_df.shape

In [None]:
image_size = 224 # the input shape for VGG net has to be 224 * 224, so here I set it up this way for later adoption on VGG 

train_datagen=ImageDataGenerator(
    rescale=1./255., # scaling
    rotation_range = 15, shear_range = 0.1, zoom_range = 0.1, 
    horizontal_flip = True, width_shift_range=0.1,height_shift_range=0.1) # give more variants to the training dataset

train_generator=train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory="/kaggle/input/dogs-vs-cats-redux-kernels-edition/train/", # dir path which contains all the training dataset
    x_col="filename", # the col that contains the file name
    y_col="label", # the col that shows the class label
    batch_size=32, # ImageDataGenerator is also a batch loading data function
    seed=42, # for reproductivbility
    shuffle=True,
    class_mode="binary", 
    target_size=(image_size,image_size)) # The dimensions to which all images found will be resized. in this case I am just using 150

In [None]:
# check out the training data shape for each batch
for data_batch, labels_batch in train_generator:
    print('data batch shape: ', data_batch.shape)
    print('labels batch shape: ', labels_batch.shape)
    break

In [None]:
# also build a test data generator for later use.
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_dataframe(
        dataframe=test_df,
        directory='/kaggle/input/dogs-vs-cats-redux-kernels-edition/train/',
        x_col="filename",
        y_col="label",
        target_size=(image_size, image_size),
        batch_size=32,
        class_mode='binary')

# Model1 - Simple CNN Model
Just to get ourself familiar with the idea of CNN, we are not going to throw some fancy model here.
We are simply using sequential model and add 3 filter & pooling layers, fully connected layer on.
Not even tunning any data, we will do it later

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, BatchNormalization, Dropout
from keras import optimizers
input_shape = (image_size, image_size, 3)

In [None]:
# build model
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), strides=(1, 1),
                 activation='relu',
                 input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3,3), activation='relu'))
model.add(MaxPooling2D((2,2)))

model.add(Flatten())
model.add(Dense(1000, activation='relu'))
model.add(Dense(1, activation='sigmoid')) 
model.compile(optimizers.rmsprop(lr=1e-3, decay=1e-6),loss="binary_crossentropy",metrics=["accuracy"])

Difference between fit() and fit_generator()

If your data do not fit into RAM, or if you are using image augmentation, like I do here, we should use fit_generator
https://www.pyimagesearch.com/2018/12/24/how-to-use-keras-fit-and-fit_generator-a-hands-on-tutorial/

Here we might have some confusion, the validation_data means --  data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. 
Early stopping requires that a validation dataset is evaluated during training. Here I keep validation so that we can do early stopping or later we can draw the epoch versus loss line plot. 

In [None]:
# fit the model
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size # number of training instances // each batch size
# in our case it is 25000 // 32 = 781
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN, 
                    # Total number of steps (batches of samples) to yield from generator 
                    # before declaring one epoch finished and starting the next epoch.
                    validation_data=test_generator,
                    validation_steps=STEP_SIZE_TEST,
                    epochs=3
)

Very interestingly we can see for each epoch, the accuracy on test are higher than arrcuracy on train..wow
accuracy on train over 3 epochs - 0.7756
accuracy on train over 3 epochs - 0.7903

In [None]:
model.save("./model1_simpleCNN.hdf5")
# we only got accuracy of 0.7175 not great, but we have a base line model

In [None]:
# just to download the output model file from kaggle to laptop
from IPython.display import FileLink
FileLink(r'model1_simpleCNN.hdf5')

# Model2 - Simple CNN but with tunned hyperparameters 
Let's try use randomsearch to tunned hyperparameters
In general you want to train for learning_rate first, and I will also try add dropout / batchnormalization / optimization in this one. Just for simplicity using only 3 epochs.

In [None]:
import keras
def create_CNN(activation = 'relu', dropout_flag = False, batchnorm_flag = False, 
               dropout_rate = 0.5, learning_rate = 1e-4, optimizer = 'rmsprop'):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), strides=(1, 1),
                     activation = activation,
                     input_shape=input_shape))
    if batchnorm_flag:
        model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    if dropout_flag:
        model.add(Dropout(dropout_rate))

    model.add(Conv2D(64, (3, 3), activation=activation))
    if batchnorm_flag:
        model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    if dropout_flag:
        model.add(Dropout(dropout_rate))

    model.add(Conv2D(128, (3,3), activation=activation))
    if batchnorm_flag:
        model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2,2)))
    if dropout_flag:
        model.add(Dropout(dropout_rate))

    model.add(Flatten())
    model.add(Dense(1000, activation=activation))
    model.add(Dense(1, activation='sigmoid')) 
    if optimizer == 'sgd':
        optimize_instance = keras.optimizers.SGD(lr = learning_rate)
    elif optimizer == 'adam':
        optimize_instance = keras.optimizers.Adam(learning_rate = learning_rate, beta_1=0.9, beta_2=0.999, amsgrad=False)
    elif optimizer == 'rmsprop':
        optimize_instance = keras.optimizers.RMSprop(learning_rate= learning_rate, rho=0.9, decay = 1e-6)
    elif optimizer == 'adagrad':
        optimize_instance = keras.optimizers.Adagrad(learning_rate= learning_rate)
    else:
        print('Not a valid optimizer') 
    model.compile(optimize_instance,loss="binary_crossentropy",metrics=["accuracy"])
    return model

In [None]:
activations = ['relu', 'tanh', 'sigmoid', 'linear']
optimizers = ['adam', 'rmsprop', 'adagrad']
# I cross out SGD optimizer Because the accuracy over 3 epochs are around 57%, barely smarter than a ramdom guess..
# 
learning_rates = [1e-5, 1e-4,1e-3]
dropoutrates = [0.5, 0.25]
initializers = ['glorot_uniform','glorot_normal','he_normal', 'he_uniform']

Because I didn't find gridsearchCV and randomsearchCV that can be used with fit_generator, I will have to define my own sudo-randomsearch. Not going to do any cross validation that would be too expensive, but just randomly sample elements from the param_gird and 

In [None]:
def sample_params(param_grid, n): # n = how many sample combination you want
    l = []
    num = 0
    for i in range(n):
        d = {}
        for k, v in param_grid.items():
            d[k] = random.sample(v,1)
        l.append(d)
    return l

However, trying too many combinations might explode kaggle kernel.  I will instead do a gridsearch on optimizer and learning_rate first. And also this time we will be train on a CNN that has dropout and batch normalization layers
This time we will be using 3 epoch and looking at the validation accuracy to decide further hyperparameter tunning.

In [None]:
param_grid = dict(
#            activation = activations, 
            optimizer = optimizers, 
            learning_rate = learning_rates,
            batchnorm_flag = [True],
            dropout_flag = [True],
#            dropout_rate = dropoutrates
            
)
import itertools as it
param_combinations = [dict(zip(param_grid,v)) for v in it.product(*param_grid.values())]
#len(param_combinations) # 9 combinations

In [None]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size # number of training instances // each batch size
# in our case it is 25000 // 32 = 781
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

# iterate over each combination of hyperparameters
count = 1
for i in param_combinations:
    print('--------train CNN ',count, ' -----------')
    print('HyperParameters: ', i)
    model = create_CNN(**i)
    model.fit_generator(generator=train_generator,
                        steps_per_epoch=STEP_SIZE_TRAIN, 
                        # Total number of steps (batches of samples) to yield from generator 
                        # before declaring one epoch finished and starting the next epoch.
                        validation_data=test_generator,
                        validation_steps=STEP_SIZE_TEST,
                        epochs=3
    )
    count +=1
# Now you just wait and see..

(accuracy on train, accuracy on test) over 3 epochs
<table>
    <tr>
        <th>optimizer</th>
        <th>lr = 1e-05</th>
        <th>lr = 1e-04</th>
        <th>lr = 1e-03</th>
    </tr>
    <tr>        
        <th>adam</th>
        <th>0.6463, 0.5684</th>
        <th>0.7219, 0.7152</th>
        <th>0.6641, 0.5775</th>
    </tr>
    <tr>        
        <th>rmsprop</th>
        <th>0.6485, 0.5272</th>
        <th>0.6955, 0.6860</th>
        <th>0.7447, 0.7160</th>
    </tr>
    <tr>        
        <th>adagrad</th>
        <th>0.5757, 0.5300</th>
        <th>0.6266, 0.5894</th>
        <th>0.6641, 0.4996</th>
    </tr>        
</table>

Learning rate is a big factor, we can see that as learning_rate 0.001 and 0.00001 work better for all 3 optimizers.
However, the best optimizer is still the rmsprop. Eventhough we only have 3 epochs we can see that lr=0.001&rmsprop has the highest accuracy rate on train among all - 0.7447. The corresponding test rate are 0.7160, which is a good sign, the gap between the accuracy on test & loss is not big we can increase epoch to get better results.

# Model3 - fine tune model 1, add 1 fc layer
However, none of these combination in model2 section are better than  model1(model without dropout and batchnormalization layer). Ok fine. Let‘s went back to model1, this time we add 1 dense layer and dropout layer.
We also test different weight initializer and dropout rate.

In [None]:
import keras
def create_CNN3(activation = 'relu', dropout_flag = False, batchnorm_flag = False, 
               dropout_rate = 0.5, learning_rate = 1e-4, optimizer = 'rmsprop', initializer = 'glorot_uniform'):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), strides=(1, 1),
                     activation = activation,
                     input_shape=input_shape,
                     kernel_initializer = initializer))
    if batchnorm_flag:
        model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    if dropout_flag:
        model.add(Dropout(dropout_rate))

    model.add(Conv2D(64, (3, 3), activation=activation, kernel_initializer = initializer))
    if batchnorm_flag:
        model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    if dropout_flag:
        model.add(Dropout(dropout_rate))

    model.add(Conv2D(128, (3,3), activation=activation, kernel_initializer = initializer))
    if batchnorm_flag:
        model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2,2)))
    if dropout_flag:
        model.add(Dropout(dropout_rate))

    model.add(Flatten())
    model.add(Dense(1000, activation=activation, kernel_initializer = initializer))
    # add drop out layer and fc layer
    model.add(Dropout(dropout_rate))
    model.add(Dense(1000, activation = activation, kernel_initializer = initializer))
    
    model.add(Dense(1, activation='sigmoid')) 
    if optimizer == 'sgd':
        optimize_instance = keras.optimizers.SGD(lr = learning_rate)
    elif optimizer == 'adam':
        optimize_instance = keras.optimizers.Adam(learning_rate = learning_rate, beta_1=0.9, beta_2=0.999, amsgrad=False)
    elif optimizer == 'rmsprop':
        optimize_instance = keras.optimizers.RMSprop(learning_rate= learning_rate, rho=0.9, decay = 1e-6)
    elif optimizer == 'adagrad':
        optimize_instance = keras.optimizers.Adagrad(learning_rate= learning_rate)
    else:
        print('Not a valid optimizer') 
    model.compile(optimize_instance,loss="binary_crossentropy",metrics=["accuracy"])
    return model

In [None]:
param_grid = dict(
#            activation = activations, 
#            optimizer = optimizers, 
#            learning_rate = learning_rates,
#            batchnorm_flag = [True],
#            dropout_flag = [True],
            dropout_rate = dropoutrates,
            initializer = initializers
            
)
import itertools as it
param_combinations = [dict(zip(param_grid,v)) for v in it.product(*param_grid.values())]
#len(param_combinations) # 9 combinations

In [None]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size # number of training instances // each batch size
# in our case it is 25000 // 32 = 781
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

# iterate over each combination of hyperparameters
count = 1
for i in param_combinations:
    if count != 1 and count != 2:
        print('--------train CNN ',count, ' -----------')
        print('HyperParameters: ', i)
        model = create_CNN3(**i)
        model.fit_generator(generator=train_generator,
                            steps_per_epoch=STEP_SIZE_TRAIN, 
                            # Total number of steps (batches of samples) to yield from generator 
                            # before declaring one epoch finished and starting the next epoch.
                            validation_data=test_generator,
                            validation_steps=STEP_SIZE_TEST,
                            epochs=3
        )
    count +=1
# Now you just wait and see..

<table>
    <tr>
        <th>initializer</th>
        <th>dropoutrate</th>
        <th>accuracy on train</th>
        <th>accuracy on test</th>
    </tr>
    <tr>        
        <th>glorot_uniform</th>
        <th>0.5</th>
        <th>0.7556</th>
        <th>0.7772</th>
    </tr>
    <tr>        
        <th>glorot_normal</th>
        <th>0.5</th>
        <th>0.7456</th>
        <th>0.7760</th>
    </tr>
    <tr>        
        <th>he_normal</th>
        <th>0.5</th>
        <th>0.7456</th>
        <th>0.7760</th>
    </tr>
    <tr>        
        <th>he_uniform</th>
        <th>0.5</th>
        <th>0.7525</th>
        <th>0.7513</th>
    </tr>
    <tr>        
        <th>glorot_uniform</th>
        <th>0.25</th>
        <th>0.7716</th>
        <th>0.7919</th>
    </tr>
    <tr>        
        <th>glorot_normal</th>
        <th>0.25</th>
        <th>0.7648</th>
        <th>0.7903</th>
    </tr>
    <tr>        
        <th>he_normal</th>
        <th>0.25</th>
        <th>0.7539</th>
        <th>0.7818</th>
    </tr>
    <tr>        
        <th>he_uniform</th>
        <th>0.25</th>
        <th>0.7488</th>
        <th>0.7649</th>
    </tr>
</table>

As we can see that glorot_uniform with 0.25 dropout rate performs the best. Let's train the model over 80 epochs.

To make sure the training process could be stopped early if the loss on validation set did not decrease, we will use callback to early stop the training process.

In [None]:
m3 = create_CNN3(dropout_rate = 0.25)

In [None]:
from keras.callbacks import EarlyStopping
es = EarlyStopping(monitor = 'val_accuracy', mode = 'max', verbose = 1, patience = 3)
# patience = 3 means that if the accuracy on test set does not improve over 3 epochs we will stop the training

In [None]:
# fit the model
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size # number of training instances // each batch size
# in our case it is 25000 // 32 = 781
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
history = m3.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN, 
                    # Total number of steps (batches of samples) to yield from generator 
                    # before declaring one epoch finished and starting the next epoch.
                    validation_data=test_generator,
                    validation_steps=STEP_SIZE_TEST,
                    epochs=80,
                    callbacks=[es]
)

In [None]:
# plot traning histoy -- loss
plt.plot(history.history['loss'], label = 'train')
plt.plot(history.history['val_loss'], label = 'test')
plt.title('loss')
plt.xlabel('epochs')
plt.legend()
plt.show()

In [None]:
# plot traning histoy
plt.plot(history.history['accuracy'], label = 'train')
plt.plot(history.history['val_accuracy'], label = 'test')
plt.title('accuracy')
plt.xlabel('epochs')
plt.legend()
plt.show()

okay, not that exhilarating, I plan to train on 80 epochs, however it stopped at the 12th epoch, reaching training accuracy of 0.8372, and test accuracy of 0.8533

Very strangly, the accuracy on train is always less than that on test ,,, I guess it might due to the way I generate the train and test data. I apply image augmentation on the train set but not on the test, making training data much trickier to be dealt with.

In [None]:
# save model
m3.save("./model3_tunnedCNN.hdf5")
# to download the output model file from kaggle to laptop
from IPython.display import FileLink
FileLink(r'model3_tunnedCNN.hdf5')

# Model4 - transfer learning, base model as feature extractor


There is an old saying in Chinese that 'we should all learn from the wisdom of the elder'. In the deep learning context, we could say we should learn from the pre-trained model. 

The author of the third link shown below, utilized InceptionV3 as the base model and achieved 94% accuracy on cat vs. dog classification. So I will re-implement his solution with a little bit twist on the input data augmentation and training with earlystop.

Great articles articulate transfer learning

1.https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html

2.https://codelabs.developers.google.com/codelabs/keras-flowers-transfer-learning/#3

3.https://medium.com/abraia/first-steps-with-transfer-learning-for-custom-image-classification-with-keras-b941601fcad5

In [None]:
from keras.applications.inception_v3 import InceptionV3
base_model = InceptionV3(weights='imagenet', include_top=False) 
# we don't need the final output layer, we just need the feature activation map

In [None]:
# freeze each layer of base model, which means that we are not going to update weights or biases along the training process
for layer in base_model.layers:
    layer.trainable = False

In [None]:
from keras.layers import GlobalAveragePooling2D, Dropout, Dense
from keras.models import Model
# keras model API
x = base_model.output
x = GlobalAveragePooling2D(name='avg_pool')(x) # you don't have to name the layer
x = Dropout(0.25)(x) # based on the tunned hyperparamters of model 3
x = Dense(256, activation = 'relu')(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

Here we use GlobalAveragePooling before FC layer, (I don't know if this GAP layer would improve performance, you can try train one without GAP layer). Anyway, on top of the ability to indicate what is the object in the image, one cool effect of GAP layer is that it can help us identify where the object is in the image, which is called object localization. More info you can refer to the following link
https://alexisbcook.github.io/2017/global-average-pooling-layers-for-object-localization/

In [None]:
from keras.callbacks import EarlyStopping
es = EarlyStopping(monitor = 'val_accuracy', mode = 'max', verbose = 1, patience = 3)
# patience = 3 means that if the accuracy on test set does not improve over 3 epochs we will stop the training

In [None]:
# fit the model
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size # number of training instances // each batch size
# in our case it is 25000 // 32 = 781
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
history = model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN, 
                    # Total number of steps (batches of samples) to yield from generator 
                    # before declaring one epoch finished and starting the next epoch.
                    validation_data=test_generator,
                    validation_steps=STEP_SIZE_TEST,
                    epochs=80,
                    callbacks=[es]
)

1Remarkable, after the fist epoch. the model achieved 0.8867 accuracy on train and 0.9866 on test
As epoch increases, the accuracy on test dataset go up gradually to 0.94 ， accuracy on test remains relatively stable at around 0.9865. 
A complete slam dunk over the previous basic CNN models...

In [None]:
# plot traning histoy -- loss
plt.plot(history.history['loss'], label = 'train')
plt.plot(history.history['val_loss'], label = 'test')
plt.title('model4 - transfered model - loss')
plt.xlabel('epochs')
plt.legend()
plt.show()

In [None]:
# plot traning histoy
plt.plot(history.history['accuracy'], label = 'train')
plt.plot(history.history['val_accuracy'], label = 'test')
plt.title('model4 - transfered model - accuracy')
plt.xlabel('epochs')
plt.legend()
plt.show()

In [None]:
model.save('./model4_transfer_lr_feature_extractor.hdf5')
# to download the output model file from kaggle to laptop
from IPython.display import FileLink
FileLink(r'model4_transfer_lr_feature_extractor.hdf5')

# Predict on test
use model4 - the transfered learning model

In [None]:
filenames = os.listdir("/kaggle/input/dogs-vs-cats-redux-kernels-edition/test/")
predictset = pd.DataFrame({'filename': filenames})
predictset.sort_values(by='filename', inplace = True)

In [None]:
predictset

In [None]:
# what is the last row.by index..delete that 
predictset.drop(4505, inplace = True)

In [None]:
predictIterator = test_datagen.flow_from_dataframe(dataframe = predictset,
                                                    directory = '/kaggle/input/dogs-vs-cats-redux-kernels-edition/test/',
                                                    x_col = 'filename', y_col = None,
                                                    target_size = (image_size, image_size),
                                                     batch_size = 32,
                                                     class_mode = None,
                                                    shuffle = False) # don't shuffle, we are here to predict each image

In [None]:
labels = model.predict_generator(predictIterator)

In [None]:
labels.shape

In [None]:
submit = pd.DataFrame({})
submit['id'] = predictset.filename.str.split('.').str[0]
submit['label']  = np.round(labels[:,0]).astype(int)
submit.to_csv('submission_pa_hw6.csv', index=False)