In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data_dir = '/kaggle/input/aerial-cactus-identification'
labels = pd.read_csv(data_dir+"/train.csv")
labels['has_cactus'] = labels['has_cactus'].astype('str')

labels.head()

In [None]:
import zipfile
train_dir = "/kaggle/temp/train"
test_dir = "/kaggle/temp/test"
with zipfile.ZipFile(data_dir+"/train.zip","r") as z:
    z.extractall("/kaggle/temp")
with zipfile.ZipFile(data_dir+"/test.zip","r") as z:
    z.extractall("/kaggle/temp/test_dummy")

In [None]:
for dirname, _, filenames in os.walk('/kaggle/temp'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import cv2
import matplotlib.pyplot as plt

im = cv2.imread(train_dir+"/0004be2cfeaba1c0361d39e2b000257b.jpg")
plt.imshow(im)

In [None]:
!pip install git+https://github.com/qubvel/efficientnet

In [None]:
from efficientnet.keras import EfficientNetB7
from keras.layers import Dense
from keras.models import Sequential

efficient_net = EfficientNetB7(
    weights='imagenet',
    input_shape=(32,32,3),
    include_top=False,
    pooling='max'
)

model = Sequential()
model.add(efficient_net)
model.add(Dense(512, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])

In [None]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
    rescale=1/255,
    validation_split=0.25,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = train_datagen.flow_from_dataframe(
    dataframe = labels,
    directory = train_dir,
    x_col="id",
    y_col="has_cactus",
    target_size=(32,32),
    subset="training",
    batch_size=256,
    shuffle=True,
    class_mode="binary"
)

val_generator = train_datagen.flow_from_dataframe(
    dataframe = labels,
    directory = train_dir,
    x_col="id",
    y_col="has_cactus",
    target_size=(32,32),
    subset="validation",
    batch_size=256,
    shuffle=True,
    class_mode="binary"
)

test_datagen = ImageDataGenerator(
    rescale=1./255.
)

test_generator = test_datagen.flow_from_directory(
    '/kaggle/temp/test_dummy/',
    target_size=(32,32),
    batch_size=256,
    shuffle=False,
    class_mode=None
)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping # 조기 종료
early_stopping = EarlyStopping(
    monitor='val_accuracy',
    patience=20,
    mode='auto',
    verbose=2)

In [None]:
history = model.fit_generator(
    train_generator,
    epochs = 2000,
    steps_per_epoch = 15,
    validation_data = val_generator,
    validation_steps = 10,
    callbacks=[early_stopping]
)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1,len(acc) + 1)

plt.plot(epochs,acc,'bo',label = 'Training Accuracy')
plt.plot(epochs,val_acc,'b',label = 'Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.figure()

plt.plot(epochs,loss,'bo',label = 'Training loss')
plt.plot(epochs,val_loss,'b',label = 'Validation Loss')
plt.title('Training and Validation Loss')
plt.legend()

plt.show()

In [None]:
preds = model.predict_generator(
    test_generator,
    steps=len(test_generator.filenames)
)
preds.shape

In [None]:
image_ids = [name.split('/')[-1] for name in test_generator.filenames]
predictions = preds.flatten()
data = {'id': image_ids, 'has_cactus':predictions} 
submission = pd.DataFrame(data)
print(submission.head())

In [None]:
submission.to_csv("submission.csv", index=False)