In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!unzip -q -o '/kaggle/input/dogs-vs-cats-redux-kernels-edition/train.zip'

In [None]:
!unzip -q -o '/kaggle/input/dogs-vs-cats-redux-kernels-edition/test.zip'

In [None]:
# test/10007.jpg
# train/cat.4511.jpg

In [None]:
from PIL import Image

Image.open('train/cat.4511.jpg')

In [None]:
import glob

train = pd.DataFrame({'path' : glob.glob('train/*')})
train.head(2)

In [None]:
train['target'] = train['path'].apply(lambda x: x.split('/')[1].split('.')[0])
train

In [None]:
from keras.preprocessing.image import ImageDataGenerator

## 1. 점수 올리기 -> data augmentation

In [None]:
idg = ImageDataGenerator()
idg2 = ImageDataGenerator(horizontal_flip = True, brightness_range = [0.2,1.0])

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_valid = train_test_split(train, test_size = 0.2, random_state = 42, 
                                   stratify = train['target'])

In [None]:
x_valid['target'].value_counts()

In [None]:
x_train['target'].value_counts()

In [None]:
train_generator = idg2.flow_from_dataframe(x_train, x_col = 'path', y_col = 'target',
                                         target_size = (300,300))

In [None]:
valid_generator = idg.flow_from_dataframe(x_valid, x_col = 'path', y_col = 'target',
                                         target_size = (300,300))

In [None]:
import matplotlib.pyplot as plt


plt.figure(figsize = (12,12))
for i in range(0,15):
    plt.subplot(5,3,i+1)
    for x, y in train_generator:
        image = x[0]
        plt.imshow(image.astype('uint8'))
        break
# plt.tight_layout()
plt.show()

## 2. 점수올리기 --> 모델을 B0 -> B1

In [None]:
from tensorflow.keras import Sequential

from tensorflow.keras.layers import *

from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications import EfficientNetB1

In [None]:
eb0 = EfficientNetB0(include_top = False, pooling = 'avg')
eb1 = EfficientNetB1(include_top = False, pooling = 'avg')

In [None]:
model = Sequential()

model.add(eb1)
model.add(Dense(2, activation = 'softmax'))

from tensorflow.keras.optimizers import SGD

model.compile(metrics = ['acc'], loss = 'categorical_crossentropy',
             optimizer = SGD(momentum = 0.9 , nesterov = True, lr = 0.01))

In [None]:
model.summary()

## 3. 점수올리기-  callback 함수 추가

In [None]:
##callback

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

es = EarlyStopping(patience = 5, verbose = 1)

mc = ModelCheckpoint('best.h5', save_best_only = True, verbose = 1)

rl = ReduceLROnPlateau(patience = 3, verbose = 1)

In [None]:
model.fit(train_generator, epochs = 100, callbacks = [es, mc, rl], validation_data = valid_generator)

In [None]:
test = pd.DataFrame({'path' : glob.glob('test/*')})

In [None]:
test_generator = idg.flow_from_dataframe(test, x_col = 'path', y_col = None,
                                        class_mode = None, target_size = (300,300),
                                        shuffle = False)

In [None]:
result  = model.predict(test_generator, verbose=  1)

In [None]:
result

In [None]:
from keras.preprocessing.image import load_img


n = 10
for i, (index, row) in enumerate(test.iterrows()):
    if i >= n:
        break
    fig = plt.figure(figsize=(8, 32))
    img = load_img(row['path'], target_size=(100, 100))
    subfig = fig.add_subplot(n, 1, i + 1)
    pred = result[i][0]
    pred_label = 'cat' if pred > 0.5 else 'dog'
    pred = pred if pred > 0.5 else 1-pred
    plt.title('Looks like a {0} with probability {1}'.format(pred_label, pred))
    f = plt.imshow(img)
    f.axes.get_xaxis().set_visible(False)
    f.axes.get_yaxis().set_visible(False)

In [None]:
sub= pd.read_csv('/kaggle/input/dogs-vs-cats-redux-kernels-edition/sample_submission.csv')
sub

In [None]:
sub['id'] = test['path'].apply(lambda x : x.split('/')[1].split('.')[0] )

In [None]:
sub['label'] = result[:,1].clip(0.005, 0.995)
sub

In [None]:
sub.to_csv('base.csv', index = 0)

### 4. 점수올리기 - Ensemble (Blending, Stacking..) Blending

In [None]:
# csv1 = pd.read_csv('/kaggle/input/ensemble/DogVsCats_submission (7).csv')
# csv2 = pd.read_csv('/kaggle/input/ensemble/DogVsCats_submission (1) (3).csv')

# csv1 = csv1.sort_values('id').reset_index(drop = True)
# csv2 = csv2.sort_values('id').reset_index(drop = True)

# sub = pd.read_csv('/kaggle/input/dogs-vs-cats-redux-kernels-edition/sample_submission.csv')
# sub['label'] = (csv1['label'] * 0.5) + (csv2['label'] *0.5)
# sub.to_csv('sub1.csv', index = 0)