In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from skimage.io import imread

from keras.applications import resnet50
from keras.optimizers import Adam
from keras.layers import GlobalAveragePooling2D, Dense
from keras.models import Model
from keras.callbacks import ModelCheckpoint

Using TensorFlow backend.


In [2]:
train = pd.read_csv('../input/aerial-cactus-identification/train.csv')
test = pd.read_csv('../input/aerial-cactus-identification/sample_submission.csv')
y_train = np.array(train.has_cactus)

In [3]:
X_train = []
for name in train.id:
    X_train.append(imread('../input/aerial-cactus-identification/train/train/'+name))

In [4]:
X_test = []
for name in test.id:
    X_test.append(imread('../input/aerial-cactus-identification/test/test/'+name))

In [5]:
X_train = np.array(X_train)
X_test = np.array(X_test)

In [11]:
#Preprocessing the inputs
X_train = resnet50.preprocess_input(X_train)
X_test = resnet50.preprocess_input(X_test)

In [12]:
#Retriving the model without the fully connected layers
base_model = resnet50.ResNet50(include_top=False, weights='imagenet')

Instructions for updating:
Colocations handled automatically by placer.




Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [13]:
#Let's add a GAP and a fully connected layer with relu function
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(inputs = base_model.input, outputs = predictions)

In [14]:
model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.0001), metrics=['accuracy'])

In [16]:
#If have never trained this CNN, you can comment this line
model.load_weights('../input/weights/model.hdf5')

In [17]:
#Let's overfit in one batch
model.fit(X_train[:128], y_train[:128], epochs=30)

Instructions for updating:
Use tf.cast instead.
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f8ada7f2da0>

In [18]:
#Now we can train the model and save the weights in a .hdf5 file
checkpoint = ModelCheckpoint(filepath='model.hdf5')
model.fit(X_train, y_train, epochs=100, batch_size=128, callbacks=[checkpoint])

Epoch 1/100


KeyboardInterrupt: 

In [19]:
sample = pd.read_csv('../input/aerial-cactus-identification/sample_submission.csv')

In [None]:
sample.has_cactus = model.predict(X_test)

In [None]:
#Let's plot the distribution of the test targets
fig, ax = plt.subplots()
sns.kdeplot(sample.has_cactus, ax=ax)
ax.set_xlim([0,1])

In [20]:
sample.to_csv('sample_submission.csv', index=False)