In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!unzip -q ../input/dogs-vs-cats/test1.zip
!unzip -q ../input/dogs-vs-cats/train.zip

In [None]:
!mkdir Train
!mkdir Validation
!mkdir Train/cat
!mkdir Train/dog
!mkdir Validation/cat
!mkdir Validation/dog

In [None]:
import numpy as np
import tensorflow
import tensorflow.keras
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Dropout, GlobalMaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tqdm import tqdm
import os
import cv2
import shutil
from random import shuffle
import matplotlib.pyplot as plt
# from google.colab.patches import cv2_imshow
%matplotlib inline

In [None]:
files = os.listdir('train')
shuffle(files)

In [None]:
os.path.isdir('train/')

In [None]:
# cv2.imshow(cv2.imread('train/' + files[4]))

In [None]:
base_path = 'train'
dest_path1 = 'Train'
dest_path2 = 'Validation'

In [None]:
cat_split = int(len(files) * 0.1)
dog_split = int(len(files) * 0.1)
cat_counter = 0
dog_counter = 0

for i in tqdm(range(len(files))):
    img = files[i]
    pet = img.split('.')[0]
    
    if pet == 'dog':
        if dog_counter < dog_split:
            shutil.copyfile(
                os.path.join(base_path, img),
                os.path.join(dest_path2, pet, img))
            dog_counter += 1
        else:
            shutil.copyfile(
                os.path.join(base_path, img),
                os.path.join(dest_path1, pet, img))
    
    elif pet == 'cat':
        if cat_counter < cat_split:
            shutil.copyfile(
                os.path.join(base_path, img),
                os.path.join(dest_path2, pet, img))
            cat_counter += 1
        else:
            shutil.copyfile(
                os.path.join(base_path, img),
                os.path.join(dest_path1, pet, img))

In [None]:
datagen = ImageDataGenerator(rescale=1./255 ,preprocessing_function=preprocess_input)

train_datagen = datagen.flow_from_directory('Train',
                                            batch_size=32,
                                            shuffle=True,
                                            target_size=(224, 224),
                                            class_mode='binary')

val_datagen = datagen.flow_from_directory('Validation',
                                            batch_size=32,
                                            shuffle=True,
                                            target_size=(224, 224),
                                            class_mode='binary')

In [None]:
print(int(20000 / 32) * 32)
print(int(5000 / 32) * 32)

Cat label = 0.0

Dog label = 1.0

In [None]:
data, label = next(train_datagen)
i = 0
plt.imshow(data[i])
print(label[i])

In [None]:
model = VGG16(weights='imagenet', include_top=False)

inp = Input(shape=(224, 224, 3), batch_size=32)

x = model(inp)
x = GlobalMaxPooling2D()(x)

model_feature_extractor = Model(inputs=[inp], outputs=[x])

In [None]:
X_train = np.zeros((20000, 512), dtype=np.float32)
y_train = np.zeros((20000))

X_val = np.zeros((4992, 512), dtype=np.float32)
y_val = np.zeros((4992))

In [None]:
for i in tqdm(range(0, 20000, 32)):
    batch = train_datagen.next()
    X_train[i:i + 32] = model_feature_extractor.predict(batch[0])
    y_train[i:i + 32] = batch[1]

for i in tqdm(range(0, 4992, 32)):
    batch = val_datagen.next()
    X_val[i:i + 32] = model_feature_extractor.predict(batch[0])
    y_val[i:i + 32] = batch[1]

In [None]:
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)

In [None]:
def plot_history(history):
    fig, axs = plt.subplots(2)
    # Plot both Loss & Accuracy in Subplot
  # creat accuracy plot
    axs[0].plot(history.history['acc'], label='train accuracy')
    axs[0].plot(history.history['val_acc'], label='test accuracy')
    axs[0].set_ylabel('Accuracy')
    axs[0].legend(loc='best')
    axs[0].set_title('Accuracy eval')

  # creat loss plot
    axs[1].plot(history.history['loss'], label='train loss')
    axs[1].plot(history.history['val_loss'], label='test loss')
    axs[1].set_ylabel('Loss')
    axs[1].set_xlabel('Epoch')
    axs[1].legend(loc='best')
    axs[1].set_title('Loss eval')

    plt.show()

In [None]:
import tensorflow as tf
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
import datetime

In [None]:
clf_model =  Sequential()

clf_model.add(Dense(128, activation='relu', input_dim=512,
                    kernel_regularizer=l2(0.001)))
clf_model.add(Dropout(0.3))

# clf_model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.001)))
# clf_model.add(Dropout(0.3))

clf_model.add(Dense(1, activation='sigmoid'))

clf_model.summary()

clf_model.compile(optimizer=Adam(learning_rate=0.0001), 
                  loss='binary_crossentropy', metrics=['acc'])

In [None]:
res = clf_model.fit(X_train, y_train.reshape(-1, 1), epochs=50, batch_size=32,
              validation_data=(X_val, y_val.reshape(-1, 1)), verbose=2)

plot_history(res)

In [None]:
test_filenames = os.listdir("/kaggle/working/test1")
test_df = pd.DataFrame({
    'filename': test_filenames
})
nb_samples = test_df.shape[0]

In [None]:
test_df.head()

In [None]:
test_datagen = datagen.flow_from_dataframe(
    test_df, 
    "/kaggle/working/test1/",
    x_col='filename',
    y_col=None,
    class_mode=None,
    target_size=(224, 224),
    batch_size=32,
    shuffle=False
)

In [None]:
print(int(12500 / 32) * 32)

In [None]:
X_test = np.zeros((12480, 512), dtype=np.float32)

In [None]:
for i in tqdm(range(0, 12480, 32)):
    batch = test_datagen.next()
    X_test[i:i + 32] = model_feature_extractor.predict(batch)
#     y_train[i:i + 32] = batch[1]

In [None]:
X_test.shape

In [None]:
preds = clf_model.predict(X_test)

In [None]:
for i in range(len(preds)):
    if preds[i] >= 0.5:
        preds[i] = 1
    else:
        preds[i] = 0

In [None]:
predictions = pd.DataFrame()
predictions["id"] = np.arange(1, 12481)
predictions["target"] = preds

predictions.to_csv('Pejman_submission.csv', index=False, header=predictions.columns)
predictions.head(10)

In [None]:
predictions['target'].value_counts()