In [1]:
import pandas as pd
import numpy as np
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from os.path import join
from sklearn.preprocessing import LabelEncoder
import shutil, os, glob
from PIL import Image

Using TensorFlow backend.


In [2]:
labels = pd.read_csv('labels.csv')
sample_submission = pd.read_csv('sample_submission.csv')

In [3]:
labels.groupby("breed").count().sort_values("id", ascending=False).head(10)

Unnamed: 0_level_0,id
breed,Unnamed: 1_level_1
scottish_deerhound,126
maltese_dog,117
afghan_hound,116
entlebucher,115
bernese_mountain_dog,114
shih-tzu,112
great_pyrenees,111
pomeranian,111
basenji,110
samoyed,109


In [4]:
labels.groupby("breed").count().sort_values("id", ascending=False).tail(10)

Unnamed: 0_level_0,id
breed,Unnamed: 1_level_1
tibetan_mastiff,69
german_shepherd,69
giant_schnauzer,69
walker_hound,69
otterhound,69
golden_retriever,67
brabancon_griffon,67
komondor,67
briard,66
eskimo_dog,66


In [None]:
breed = labels.breed.unique()

os.chdir("train")
os.mkdir("breed")
os.chdir("breed")

for breed_type in breed:
    os.mkdir(breed_type)
    print("Directory ", breed_type, " created")

In [None]:
os.chdir("..")
os.getcwd()

In [None]:
os.chdir("..")
os.getcwd()

In [None]:
breed = labels.breed.unique()

os.mkdir("valid")
os.chdir("valid")
os.mkdir("breed")
os.chdir("breed")

for breed_type in breed:
    os.mkdir(breed_type)
    print("Directory ", breed_type, " created")

In [None]:
os.chdir("..")
os.getcwd()

In [None]:
os.chdir("..")
os.getcwd()

In [5]:
x = labels.id
y = labels.breed

In [6]:
from sklearn.model_selection import train_test_split
x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size = 0.2, random_state = 0)

In [None]:
os.getcwd()

In [7]:
image_count_train = x_train.shape[0]
image_count_valid = x_valid.shape[0]

In [8]:
image_count_train

8177

In [9]:
image_count_valid

2045

In [None]:
os.chdir("train")

for image in x_train.index:
    breed_type = labels.breed[image]
    path = "breed/" + breed_type + "/"
    file = labels.id[image] + ".jpg"
    shutil.move(file, path)

In [None]:
os.chdir("..")
os.getcwd()

In [None]:
for image in x_valid.index:
    breed_type = labels.breed[image]
    path = "valid/breed/" + breed_type + "/"
    file = "train/" + labels.id[image] + ".jpg"
    shutil.move(file, path)

In [None]:
os.mkdir("test2")
shutil.move("test", "test2")
os.rename("test2", "test")

In [10]:
input_size = 300
batch_size = 16

In [11]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        rotation_range=30,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True)

valid_datagen = ImageDataGenerator(rescale=1./255)

test_datagen = ImageDataGenerator(rescale=1./255)

In [12]:
train_generator = train_datagen.flow_from_directory(
    directory="train/breed/",
    target_size=(input_size, input_size),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=False,
    seed=42
)

Found 8177 images belonging to 120 classes.


In [13]:
valid_generator = valid_datagen.flow_from_directory(
    directory="valid/breed/",
    target_size=(input_size, input_size),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=False,
    seed=42
)

Found 2045 images belonging to 120 classes.


In [14]:
test_generator = test_datagen.flow_from_directory(
    directory="test/",
    target_size=(input_size, input_size),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=False,
    seed=42
)

Found 10357 images belonging to 1 classes.


In [58]:
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, BatchNormalization, Flatten, MaxPooling2D, Activation
from keras.optimizers import Adam, SGD, RMSprop, adadelta
from keras.models import Sequential, Model
from keras.applications.xception import Xception
from keras.applications.inception_v3 import InceptionV3
from keras.utils import np_utils
from keras.models import model_from_json

In [16]:
y_train = train_generator.classes
y_valid = valid_generator.classes
y_train = np_utils.to_categorical(y_train)
y_valid = np_utils.to_categorical(y_valid)

In [17]:
inception_model = InceptionV3(include_top=False, weights='imagenet', input_shape=(input_size,input_size,3))

In [None]:
train_inception_v3 = inception_model.predict_generator(train_generator)

In [None]:
np.save('train_inception_v3_300.npy', train_inception_v3)

In [None]:
valid_inception_v3 = inception_model.predict_generator(valid_generator)
np.save('valid_inception_v3_300.npy', valid_inception_v3)

In [18]:
train_inception_v3 = np.load('train_inception_v3_300.npy')

In [19]:
valid_inception_v3 = np.load('valid_inception_v3_300.npy')

In [20]:
train_inception_v3.shape

(8177, 8, 8, 2048)

In [68]:
model = Sequential()
model.add(GlobalAveragePooling2D(input_shape=train_inception_v3.shape[1:]))
model.add(Dense(256))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(120, activation = 'softmax'))

In [69]:
optimizer = RMSprop(lr = 0.0001, rho = 0.99)
model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [70]:
model.fit(train_inception_v3, y_train, epochs=20, batch_size=batch_size, validation_data = (valid_inception_v3, y_valid))

Train on 8177 samples, validate on 2045 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fc9bdfa1588>

In [71]:
model_json = model.to_json()
with open("model_inception_v3.json", "w") as json_file:
    json_file.write(model_json)

model.save_weights("model_inception_v3.h5")
print("Saved model to disk")

Saved model to disk


In [None]:
test_inception = inception_model.predict_generator(test_generator)

In [None]:
np.save('test_inception_v3_300.npy', test_inception)

In [72]:
test_inception = np.load('test_inception_v3_300.npy')

In [73]:
prediction = model.predict(test_inception, verbose=1)



In [None]:
json_file = open('model_inception_v3.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

loaded_model.load_weights("model_inception_v3.h5")
print("Loaded model from disk")

In [None]:
prediction = loaded_model.predict(test_inception, verbose=1)

In [74]:
prediction = pd.DataFrame(prediction)

In [75]:
prediction.to_csv('prediction_inception.csv')

In [None]:
prediction