In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
print(os.getcwd())

In [None]:
# Used to clean the working space before be
!rm -rf *

In [None]:
# Extracting the datasets
import  zipfile

train_zip = zipfile.ZipFile('../input/dogs-vs-cats/train.zip','r')
train_zip.extractall('./')
test_zip = zipfile.ZipFile('../input/dogs-vs-cats/test1.zip')
test_zip.extractall('./')
train_zip.close()
test_zip.close()

In [None]:
# Getting the size of train and test sets
print(len(os.listdir('./test1')))
print(len(os.listdir('./train')))

In [None]:
# Visualizing a portion of our datasets
print(os.listdir('./test1')[:10])
print(os.listdir('./train')[:10])

In [None]:
# Loading our datasets into dataframes
train = pd.DataFrame(os.listdir('./train'),columns = ['filename'])
test = pd.DataFrame(os.listdir('./test1'),columns = ['filename'])
train,test

In [None]:
# Getting labels for train dataset based on the filename
train['label'] = train['filename'].str.split('.',1,expand = True)[0]
train

In [None]:
# Checking whether data is balanced or not
train['label'].value_counts().plot.bar()

In [None]:
from sklearn.model_selection import train_test_split
train_df , validation_df = train_test_split(train,test_size=0.2, random_state=42)

In [None]:
train_df, validation_df

In [None]:
# Creating simple model
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16,(3,3),activation = 'relu', input_shape = (150,150,3)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32,(3,3),activation = 'relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64,(3,3),activation = 'relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512,activation = 'relu'),
    tf.keras.layers.Dense(1, activation='sigmoid') 
])

model.compile(optimizer=RMSprop(lr=0.001), loss='binary_crossentropy', metrics=['acc'])
model.summary()

In [None]:
# Creating the images generator and augmenting them
from tensorflow.keras.preprocessing.image import ImageDataGenerator

TRAIN_DIR = './train'
train_datagen =ImageDataGenerator( rescale = 1.0/255.,
                                rotation_range=40,
                                width_shift_range=0.2,
                                height_shift_range=0.2,
                                shear_range=0.2,
                                zoom_range=0.2,
                                horizontal_flip=True,
                                fill_mode='nearest')

train_generator = train_datagen.flow_from_dataframe(train_df,
                                                    directory = TRAIN_DIR,
                                                   batch_size = 10,
                                                    x_col = 'filename',
                                                    y_col = 'label',
                                                   class_mode = 'binary',
                                                   target_size = (150,150))

validation_datagen =ImageDataGenerator(rescale = 1.0/255.)
val_generator = validation_datagen.flow_from_dataframe(validation_df,
                                                    directory = TRAIN_DIR,
                                                   batch_size = 10,
                                                    x_col = 'filename',
                                                    y_col = 'label',
                                                   class_mode = 'binary',
                                                   target_size = (150,150))


In [None]:
history = model.fit(train_generator,epochs = 4, validation_data = val_generator)

In [None]:
import matplotlib.pyplot as plt 
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(len(acc))

plt.figure(figsize=(15, 15))
plt.subplot(2, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
test_generator = validation_datagen.flow_from_dataframe(test,
                                                    directory = './test1',
                                                   batch_size = 10,
                                                    x_col = 'filename',
                                                    y_col = None,
                                                   class_mode = None,
                                                   target_size = (150,150))

In [None]:
preds = model.predict(test_generator)

In [None]:
preds