In [None]:
#imports
import os
import zipfile

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras.optimizers import RMSprop

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt


In [None]:

TRAIN_PATH = "../input/dogs-vs-cats-redux-kernels-edition/train.zip"
TEST_PATH = "../input/dogs-vs-cats-redux-kernels-edition/test.zip"

UNZIP_DATA = "../kaggle/files/unzipped/"
UNZIP_TRAIN = "../kaggle/files/unzipped/train"
UNZIP_TEST = "../kaggle/files/unzipped/test"

BATCH_SIZE = 32

SEED = 88888
IMG_SIZE = 224
EPOCHS = 10



In [None]:

with zipfile.ZipFile(TRAIN_PATH, 'r') as zipp:
    zipp.extractall(UNZIP_DATA)
    print('Done!')
    
with zipfile.ZipFile(TEST_PATH, 'r') as zipp:
    zipp.extractall(UNZIP_DATA)
    print('Done!')

In [None]:
training_images_files = os.listdir("../kaggle/files/unzipped/train")
test_image_files =os.listdir("../kaggle/files/unzipped/test")
#len(test_image_files)


In [None]:
classes = [name.split('.')[0] for name in training_images_files]

train_df = pd.DataFrame({
    'filename': training_images_files,
    'class': classes,
})
train_df['Path'] = train_df['filename'].apply(
    lambda filename: os.path.join('../kaggle/files/unzipped/test', filename))



In [None]:
test_df = pd.DataFrame(data = test_image_files, columns = ['filename'])
test_df['id'] = test_df['filename'].apply(lambda f: int(f.split('.')[0]))
test_df.sort_values(by = 'id', inplace = True, ignore_index = True)
test_df


In [None]:

train_df, valid_df = train_test_split(train_df, 
                                      test_size = .2, 
                                      shuffle=True ,
                                      random_state=SEED)


In [None]:
train_generator = ImageDataGenerator(preprocessing_function=preprocess_input,
                                    rotation_range=45,
                                    shear_range=0.1,
                                    zoom_range=0.2,
                                    horizontal_flip=False,
                                    width_shift_range=0.1,
                                    height_shift_range=0.1,
                                    )


train_generator = train_generator.flow_from_dataframe(
            train_df,
            UNZIP_TRAIN,
            x_col='filename',
            y_col='class',
            target_size=(IMG_SIZE,IMG_SIZE),
            batch_size=32,
            class_mode='binary'

)

validation_generator = ImageDataGenerator(preprocessing_function=preprocess_input,
)


validation_generator = validation_generator.flow_from_dataframe(
            valid_df,
            UNZIP_TRAIN,
            x_col = 'filename',
            y_col = 'class',
            target_size = (IMG_SIZE,IMG_SIZE),
            batch_size=32,
            class_mode='binary'

)


In [None]:
pre_trained_model = EfficientNetB0(input_shape = (IMG_SIZE, IMG_SIZE, 3),
                                include_top = False,
                                weights = 'imagenet')

for layer in pre_trained_model.layers:
    layer.trainable = False

#pre_trained_model.summary()

In [None]:
last_layer = pre_trained_model.get_layer('top_activation')

last_output=last_layer.output

def create_model(last_output):
    
    x=keras.layers.GlobalAveragePooling2D()(last_output)
    x=keras.layers.BatchNormalization()(x)
    x=keras.layers.Dense(1, activation='sigmoid')(x)

    model = Model(pre_trained_model.input, x)

    model.compile(optimizer = RMSprop(learning_rate=0.001),
                  loss = 'binary_crossentropy',
                  metrics = ['acc'])
    return model

model=create_model(last_output)
#model.summary()

In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc',
                                            patience=2,
                                            verbose=2,
                                            factor=0.5,
                                            min_delt=0.001,
                                            min_lr=0.00001)

early_stopping = EarlyStopping(
    monitor = "val_accuracy",
    patience = 50,
    verbose = 2,
    mode = "max",
    )


In [None]:
history = model.fit(
    train_generator,
    validation_data = validation_generator,
    epochs = EPOCHS,
    callbacks = [learning_rate_reduction, early_stopping],
    )

In [None]:
model.save('./dog_cat_model')

In [None]:

def plot_hist(hist):
    plt.plot(hist.history["acc"])
    plt.plot(hist.history["val_acc"])
    plt.title("model accuracy")
    plt.ylabel("accuracy")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.show()


plot_hist(history)

In [None]:
test_gen = ImageDataGenerator(preprocessing_function=preprocess_input)
test_generator = test_gen.flow_from_dataframe(
    test_df, 
    UNZIP_TEST, 
    x_col='filename',
    #y_col=None,
    class_mode= None,
    target_size=(IMG_SIZE,IMG_SIZE),
    batch_size=BATCH_SIZE,
    shuffle=False
)


predict = model.predict(test_generator, verbose = 1)

In [None]:

test_df["predict"] = predict
test_df["label"] = test_df["predict"]
result = test_df[["id", "label"]]

In [None]:
result.to_csv('submission.csv', index=False)