In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Pre-processing data

In [None]:
import zipfile
with zipfile.ZipFile("../input/dogs-vs-cats/train.zip","r") as z:
    z.extractall(".")
with zipfile.ZipFile("../input/dogs-vs-cats/test1.zip",'r') as z:
    z.extractall(".")

In [None]:
import os
train_path = '/kaggle/working/train'
filenames = os.listdir(train_path)
categories = []
for f in filenames:
    category = f.split(".")[0]
    if category == 'dog':
        categories.append(1)
    else:
        categories.append(0)
print(len(categories))

In [None]:
import pandas as pd
df = pd.DataFrame({'file':filenames,'category':categories})
df.head()

In [None]:
df['category'].value_counts()

# Visualisation

In [None]:
from keras.preprocessing import image
import matplotlib.pyplot as plt
for i in range(5):
    plt.figure(figsize=(10, 10))
    plt.subplot(2, 3, i+1)
    img_path = os.path.join(train_path, filenames[i])
    img = image.load_img(img_path)
    plt.imshow(img)
plt.tight_layout()    
plt.show()

# Pre-train model

In [None]:
from keras.applications import VGG16
conv = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
conv.trainable = False
conv.summary()

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Activation, Flatten
from keras.regularizers import l2
model = Sequential()
model.add(conv)
model.add(Flatten())
model.add(Dense(512, activation='relu', kernel_regularizer=l2(0.001)))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
model.summary()

In [None]:
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
        rescale=1./255,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.5,
        rotation_range=40,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
)
val_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
df['category'] = df['category'].map({1:'dog',0:'cat'})
from sklearn.model_selection import train_test_split
train, validate = train_test_split(df, train_size=0.8)

In [None]:
train_generator = datagen.flow_from_dataframe(train,
                                              directory=train_path,
                                              x_col='file',
                                              y_col='category',
                                              target_size=(224, 224),
                                              batch_size=32,
                                              class_mode='binary')
validation_generator = val_datagen.flow_from_dataframe(validate,
                                        directory=train_path,
                                        x_col='file',
                                        y_col='category',
                                        target_size=(224, 224),
                                        batch_size=32,
                                        class_mode='binary')

In [None]:
sample = df.sample(n=1)
sample_generator = datagen.flow_from_dataframe(sample,
                                               directory=train_path,
                                               x_col='file',
                                               y_col='category',
                                               target_size=(224,224),
                                               class_mode='categorical')
for i in range(6):
    plt.subplot(2,3,i+1)
    for i,l in sample_generator:
        plt.imshow(i[0])
        break
plt.show()      

In [None]:
history = model.fit(train_generator, validation_data=validation_generator, batch_size=32, epochs=10, verbose=1)

In [None]:
import matplotlib.pyplot as plt
acc = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc)+1)
plt.figure()
plt.plot(epochs, acc,'r-', label='Training Accuracy')
plt.plot(epochs, val_accuracy, 'b-', label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Train-Validation-Accuracy-Curve')
plt.ylim([0,1])
plt.legend()
plt.figure()
plt.plot(epochs,loss, 'r-', label='Training Loss')
plt.plot(epochs, val_loss,'b-', label='Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Train-Validation-loss-Curve')
plt.legend()
plt.ylim([0,1])
plt.show()

In [None]:
train_layer = False
for i in conv.layers:
    if i.name == 'block5_conv1':
        train_layer = True
    if train_layer == True:
        i.trainable = True
        print(i,' :yes')
    else:
        print(i,' :no')

In [None]:
from keras import optimizers
model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=1e-4), metrics=['accuracy'])

In [None]:
history_fine = model.fit(train_generator, validation_data=validation_generator, batch_size=32, epochs=10, verbose=1)

In [None]:
import matplotlib.pyplot as plt
acc = history_fine.history['accuracy']
val_accuracy = history_fine.history['val_accuracy']
loss = history_fine.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc)+1)
plt.figure()
plt.plot(epochs, acc,'r-', label='Training Accuracy')
plt.plot(epochs, val_accuracy, 'b-', label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Train-Validation-Accuracy-Curve')
plt.ylim([0,1])
plt.legend()
plt.figure()
plt.plot(epochs,loss, 'r-', label='Training Loss')
plt.plot(epochs, val_loss,'b-', label='Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Train-Validation-loss-Curve')
plt.legend()
plt.ylim([0,1])
plt.show()

In [None]:
test_path='/kaggle/working/test1'
test_file=os.listdir('/kaggle/working/test1')

In [None]:
test_df=pd.DataFrame({'file':test_file})
test_df.head()

In [None]:
test_generator = val_datagen.flow_from_dataframe(test_df,directory=test_path,
                                                 x_col='file',
                                                 y_col=None,
                                                 class_mode=None,
                                                 target_size=(224,224),
                                                 batch_size=32,
                                                 shuffle=False)

In [None]:
predict=model.predict(test_generator)

In [None]:
sub = np.around(predict).astype(int)

In [None]:
img = image.load_img(os.path.join(test_path,test_file[0]))
plt.subplot(1, 2, 1)
plt.imshow(img)
plt.title(str(predict[0]))
plt.subplot(1, 2, 2)
img = image.load_img(os.path.join(test_path,test_file[1]))
plt.imshow(img)
plt.title(str(predict[1]))
plt.show()

In [None]:
submission = test_df.copy()
submission['id'] = submission['file'].str.split(".").str[0]
submission['label'] = sub
submission.drop(['file'], axis=1, inplace=True)
submission['id'] = submission['id'].astype('int')
submission = submission.sort_values(by=['id'])
submission.to_csv('submission.csv', index=False)

In [None]:
submission.head()

In [None]:
print("Your submission was successfully saved!")

!rm -rf "/kaggle/working/train"

In [None]:
!rm -rf train

In [None]:
!rm -rf test1

In [None]:
ls -a

In [None]:
model.save_weights("model.h5")