In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import sklearn
from sklearn.model_selection import train_test_split
import tensorflow as tf
print(tf.__version__)

In [None]:
import zipfile
from zipfile import ZipFile
with zipfile.ZipFile("/kaggle/input/dogs-vs-cats/train.zip","r") as zip_ref:
    zip_ref.extractall("/kaggle/working/train")

In [None]:
from zipfile import ZipFile
with zipfile.ZipFile("/kaggle/input/dogs-vs-cats/test1.zip","r") as zip_ref:
    zip_ref.extractall("/kaggle/working/test")

In [None]:
TRAIN_DIR,TEST_DIR='../working/train/train/','../working/test/test1/'

In [None]:
def preprocessing_df(train,test,batch=100,epochs=50):
    TRAIN_DIR = train
    TEST_DIR = test
    TRAIN_SIZE = len([name for name in os.listdir(TRAIN_DIR)])
    TEST_SIZE = len([name for name in os.listdir(TEST_DIR)])
    print("Number of training images:", TRAIN_SIZE)
    print("Number of test images:", TEST_SIZE)

    VALID_FRACTION = 0.2
    BATCH_SIZE = batch
    EPOCHS = epochs

    IMAGE_SIZE=256

     # creating df with train labels
    train_filenames = os.listdir(TRAIN_DIR)
    train_labels = []
    for filename in train_filenames:
        label = filename.split('.')[0]
        train_labels.append(label)
    train_df = pd.DataFrame({
         'id': train_filenames,
         'label': train_labels
     })

        # splitting to train & valid
    train_df, valid_df = train_test_split(train_df, test_size=VALID_FRACTION)
    
    train_generator=tf.keras.preprocessing.image.ImageDataGenerator(horizontal_flip=True,
                                                                rescale=1./255,
                                                                fill_mode='nearest'
                                                               )
    
    train_ds=train_generator.flow_from_dataframe(train_df,
                                             TRAIN_DIR,
                                             x_col='id',
                                             y_col='label',
                                             target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                             class_mode='binary',
                                             batch_size=BATCH_SIZE
                                            )
    
    valid_gen=tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
    
    
    valid_ds=valid_gen.flow_from_dataframe(valid_df,
                                       TRAIN_DIR,
                                       x_col='id',
                                       y_col='label',
                                       target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                       shuffle=False,
                                       class_mode='binary',
                                       batch_size=BATCH_SIZE
                                      )
    
    return train_ds,valid_ds,TRAIN_SIZE,TEST_SIZE,BATCH_SIZE,VALID_FRACTION,IMAGE_SIZE,EPOCHS

In [None]:
train_ds,valid_ds,TRAIN_SIZE,TEST_SIZE,BATCH_SIZE,VALID_FRACTION,IMAGE_SIZE,EPOCHS=preprocessing_df(TRAIN_DIR,TEST_DIR,64)

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64,(3,3),activation='relu',input_shape=(IMAGE_SIZE,IMAGE_SIZE,3)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32,(3,3),activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32,(3,3),activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64,(3,3),activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(1024,activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(512,activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer = tf.keras.optimizers.RMSprop(lr=0.0001), 
          loss = 'binary_crossentropy', 
          metrics = ['accuracy'])
model.summary()

es = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
    mode='min',
    restore_best_weights=True, 
    verbose=1,
    patience=4)

In [None]:
tb=tf.keras.callbacks.TensorBoard(log_dir="logs/",write_images=True)

In [None]:
history = model.fit(train_ds,
    validation_data=valid_ds,
    steps_per_epoch=round(TRAIN_SIZE*(1.-VALID_FRACTION)/BATCH_SIZE),
    validation_steps=round(TRAIN_SIZE*VALID_FRACTION/BATCH_SIZE),
    epochs=50,
    callbacks=[es,tb],
    verbose=1)

In [None]:
#plotting

import matplotlib.pyplot as plt

#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc = history.history['accuracy']
val_acc = history.history[ 'val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs   = range(len(acc)) # Get number of epochs

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot(epochs, acc)
plt.plot(epochs, val_acc)
plt.title('Training and validation accuracy')
plt.figure()

#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot(epochs, loss)
plt.plot(epochs, val_loss)
plt.title('Training and validation loss')

In [None]:
%%time

# preparing testing data
test_filenames = os.listdir(TEST_DIR)
test_df = pd.DataFrame({
    'id': test_filenames
})

test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./255)

test_generator = test_datagen.flow_from_dataframe(
    test_df, 
    TEST_DIR, 
    x_col='id',
    y_col=None,
    class_mode=None,
    target_size=(IMAGE_SIZE,IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    shuffle=False
)

yhat = model.predict(test_generator, steps=np.ceil(TEST_SIZE/BATCH_SIZE))

In [None]:
# sigmoid returns probability between 0 and 1, need to convert it to an integer class
yhat = [1 if y > 0.5 else 0 for y in yhat]

test_df['label'] = yhat

# restoring back to class names (dog|cat)
#label_map = dict((v,k) for k,v in train_generator.class_indices.items())
#test_df['label'] = test_df['label'].replace(label_map)

# encoding according to submission format, 1 = dog, 0 = cat
#test_df['label'] = test_df['label'].replace({ 'dog': 1, 'cat': 0 })

test_df.to_csv('submission.csv', index=False)

In [None]:
!apt-get install zip

In [None]:
!zip -r logs.zip ./logs