In [None]:
import numpy as np
import pandas as pd 
from zipfile import ZipFile
import matplotlib.pyplot as plt
import random
from keras.preprocessing.image import ImageDataGenerator, load_img
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
import os

input = '../input/dogs-vs-cats/'
output = '/kaggle/working/'

In [None]:
def extract_zip(file_name: str)-> str:
    with ZipFile(file_name, 'r') as zip:
        print('Extracting all the files now...')
        zip.extractall()
        print('Done!')

In [None]:
def plot_random_image_from(filenames: list, path: str)-> None:
    sample = random.choice(filenames)
    image = load_img(output+path+sample)
    plt.imshow(image)

In [None]:
def get_random_image_shape(filenames:list, path:str)-> np.ndarray:
    return np.asarray(Image.open(output+path+random.choice(filenames))).shape

In [None]:
sample = pd.read_csv(input + "sampleSubmission.csv")
extract_zip(input + "test1.zip")
extract_zip(input + "train.zip")

test_filenames = os.listdir(output + "test1/")
train_filenames = os.listdir(output + "train/")

In [None]:
plot_random_image_from(train_filenames, 'train/')

In [None]:
train_data = {"image": [], "is_dog": []}
for path in train_filenames:
    train_data['image'].append(path)
    train_data['is_dog'].append(int(path.split('.')[0] == 'dog'))

df = pd.DataFrame.from_dict(train_data)

In [None]:
df.head()

In [None]:
df['is_dog'].value_counts().plot.bar()

In [None]:
model = models.Sequential()

model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.25))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.25))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.5))

model.add(layers.Dense(2, activation='softmax'))

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

In [None]:
df["is_dog"] = df["is_dog"].replace({0: 'cat', 1: 'dog'}) 
train_df, validate_df = train_test_split(df, test_size=0.20)

train_df.reset_index(drop=True)
validate_df.reset_index(drop=True)

In [None]:
train_df['is_dog'].value_counts().plot.bar()

In [None]:
train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    output + "train/", 
    x_col='image',
    y_col='is_dog',
    target_size=[128,128],
    class_mode='categorical',
    batch_size=15
)

In [None]:
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validate_df, 
    output + "train/",  
    x_col='image',
    y_col='is_dog',
    target_size=[128,128],
    class_mode='categorical',
    batch_size=15
)

In [None]:
def plot_random_aug_image(df: pd.DataFrame, datagen: ImageDataGenerator)-> None:   
    example_generator = datagen.flow_from_dataframe(
        df.sample(n=1).reset_index(drop=True), 
        output + "train/", 
        x_col='image',
        y_col='is_dog',
        target_size=[128,128],
        class_mode='categorical'
    )

    plt.figure(figsize=(15, 15))
    for i in range(0, 15):
        plt.subplot(5, 3, i+1)
        for X_batch, Y_batch in example_generator:
            image = X_batch[0]
            plt.title("Dog" if Y_batch[0] == 1 else "Cat")
            plt.imshow(image)
            break
    plt.tight_layout()
    plt.show()

In [None]:
plot_random_aug_image(train_df, train_datagen)

In [None]:
batch_size = 15
val_total = validate_df.shape[0]
train_total = train_df.shape[0]

In [None]:
history = model.fit(
    train_generator, 
    epochs=3,
    validation_data=validation_generator,
    validation_steps=val_total//batch_size,
    steps_per_epoch=train_total//batch_size
)

In [None]:
history.history

In [None]:
plt.plot([1,2,3], history.history['accuracy'], label='accuracy')
plt.plot([1,2,3], history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.xlim([1, 3])
plt.legend(loc='lower right')

In [None]:
plt.plot([1,2,3],history.history['loss'], label='loss')
plt.plot([1,2,3], history.history['val_loss'], label = 'val_loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.ylim([0, 1.5])
plt.xlim([1, 3])
plt.legend(loc='lower right')

In [None]:
val_loss, val_acc = model.evaluate(x=validation_generator, verbose=2)
val_acc

In [None]:
test_data ={"image": []}
for path in test_filenames:
    test_data['image'].append(path)

test_df = pd.DataFrame.from_dict(test_data)

test_gen = ImageDataGenerator(rescale=1./255)
test_generator = test_gen.flow_from_dataframe(
    test_df, 
    output+"test1/", 
    x_col='image',
    y_col=None,
    class_mode=None,
    target_size=[128,128],
    batch_size=batch_size,
    shuffle=False
)

In [None]:
predict = model.predict_generator(test_generator, steps=np.ceil(test_df.shape[0]/batch_size))

In [None]:
test_df['category'] = np.argmax(predict, axis=-1)
label_map = dict((v,k) for k,v in train_generator.class_indices.items())
test_df['category'] = test_df['category'].replace(label_map)

In [None]:
test_df['category'].value_counts().plot.bar()


In [None]:
def visualize_provided_number_of_random_predictions(num: int)->None:
    plt.figure(figsize=(12, 24))
    for index, row in test_df.sample(n=num).reset_index(drop=True).iterrows():
        filename = row['image']
        category = row['category']
        img = load_img(output+"test1/"+filename, target_size=[128,128])
        plt.subplot(num//2, num//3, index+1)
        plt.imshow(img)
        plt.xlabel(filename + '(' + "{}".format(category) + ')' )
    plt.tight_layout()
    plt.show()

In [None]:
visualize_provided_number_of_random_predictions(20)