In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
np.set_printoptions(suppress=False)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!unzip '../input/dogs-vs-cats-redux-kernels-edition/test.zip'
!unzip '../input/dogs-vs-cats-redux-kernels-edition/train.zip'

In [None]:
n_train = len(os.listdir('./train'))
n_test = len(os.listdir('./test'))
print(f'Train images: {n_train}')
print(f' Test images: {n_test}')

In [None]:
import glob
import cv2
from sklearn.model_selection import train_test_split
from tensorflow import keras
from keras.layers import *
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.models import Model
from keras.applications.mobilenet_v2 import MobileNetV2

In [None]:
train_filename = glob.glob('./train/*.jpg')
test_filename = glob.glob('./test/*.jpg')
train_labels = []
for i in train_filename:
    train_clsname = i.split('/')[-1].split('.')[0]
    train_labels.append(train_clsname)

df = pd.DataFrame({'filename': train_filename, 'label': train_labels})
train_df, test_df = train_test_split(df, test_size=0.2, shuffle=True, random_state=32)

In [None]:
train_df

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=45,
    shear_range=10,
    zoom_range=[1,1.3],
    validation_split=0.2
)

test_datagen = ImageDataGenerator(
    rescale=1./255,
)

In [None]:
train_images = train_datagen.flow_from_dataframe(
    train_df,
    target_size=(224,224),
    x_col='filename',
    y_col='label',
    subset='training',
    color_mode='rgb',
    class_mode='categorical'
)

val_images = train_datagen.flow_from_dataframe(
    train_df,
    subset='validation',
    target_size=(224,224),
    class_mode='categorical',
    color_mode='rgb',
    x_col='filename',
    y_col='label'
)

test_images = test_datagen.flow_from_dataframe(
    test_df,
    target_size=(224,224),
    class_mode='categorical',
    color_mode='rgb',
    x_col='filename',
    y_col='label'
)

In [None]:
train_images.class_indices

In [None]:
mn_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224,224,3))
mn_model.trainable = False
x = mn_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dense(512, activation = 'relu')(x)
pred = Dense(2, activation='softmax')(x)

model = Model(inputs=mn_model.input, outputs=pred)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
lr_reduction = ReduceLROnPlateau(patience=1, verbose=1, factor=0.25, min_lr=0.000003)
es = EarlyStopping(monitor='val_loss', patience=5, verbose=5, restore_best_weights=True)
model.summary()

In [None]:
hist = model.fit(train_images, validation_data=val_images, epochs=20, callbacks=[lr_reduction,es])

In [None]:
import matplotlib.pyplot as plt
print(hist.history.keys())
plt.figure(figsize=(15,15))

plt.subplot(1,2,1)
plt.plot(hist.history['accuracy'])
plt.plot(hist.history['val_accuracy'])
plt.title('model accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend(['train','test'], loc='upper left')

plt.subplot(1,2,2)
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('model loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend(['train','test'], loc='upper left')

plt.show()

In [None]:
res = model.evaluate(test_images)
print(f'    Loss: {res[0]}')
print(f'Accuracy: {res[1]}')

In [None]:
pred_images = glob.glob('./test/*.jpg')
prob = []
for image in pred_images:
    img = cv2.imread(image)/255.0
    img = cv2.resize(img, (224,224))
    img = img.reshape(1,224,224,3)
    prob.append(model.predict(img)[0][1])

In [None]:
ids = []
for i in test_filename:
    name = i.split('/')[-1].split('.')[0]
    ids.append(name)

In [None]:
output = pd.DataFrame({'id': ids, 'label': prob})
output.to_csv('./my_submission.csv', index=False)

In [None]:
output

In [None]:
output.describe()