#Roush Kitty Tool


In [None]:
! export PATH=/usr/local/cuda-10.0/bin:$PATH
!export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-10.0/lib64/
#!config.gpu_options.allow_growth = True


In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid

from os.path import join
from tqdm import tqdm
from sklearn.metrics import log_loss, accuracy_score
from sklearn.linear_model import LogisticRegression

from keras.preprocessing import image
from keras.applications import xception
from keras.callbacks import TensorBoard, ModelCheckpoint, LearningRateScheduler
from keras.applications import Xception
from keras.models import Model
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.regularizers import l2

#loadLabels

In [None]:
INPUT_SIZE = 299 #image input for xception model
NUM_CLASSES = 58 #Number of cat breeds
SEED = 42  #meaning of life
base_dir = '' #base directory

labels = pd.read_csv(join(base_dir, 'catLabels.csv')) #getOurLabels


In [None]:
breedsList = list(labels.groupby('breed').count().sort_values(by='id', ascending=False).head(NUM_CLASSES).index)
labels = labels[labels['breed'].isin(breedsList)]
labels['target'] = 1
labels['rank'] = labels['breed'].rank(ascending=0,method='dense')
labels_pivot = labels.pivot('id', 'breed', 'target').reset_index().fillna(0)
np.random.seed(seed=SEED)
rnd = np.random.random(len(labels))
#Split 80:20 train validation
trainX = rnd < 0.8
validX = rnd >= 0.8
y_train = labels_pivot[breedsList].values
trainy = y_train[trainX]
valy = y_train[validX]

In [None]:
def read_img(img_id, train_or_test, size):  #takenFromKaggle
    """Read and resize image.
    # Arguments
        img_id: string
        train_or_test: string 'train' or 'test'.
        size: resize the original image.
    # Returns
        Image as numpy array.
    """
    img = image.load_img(join(base_dir, train_or_test, '%s.jpg' % img_id), target_size=size)
    img = image.img_to_array(img)
    return img

# Get images and make dimension size of 299 for xception

In [None]:
x_train = np.zeros((len(labels), INPUT_SIZE, INPUT_SIZE, 3), dtype='float32')
for i, img_id in tqdm(enumerate(labels['id'])):
    img = read_img(img_id, 'train', (INPUT_SIZE, INPUT_SIZE))
    x = xception.preprocess_input(np.expand_dims(img.copy(), axis=0))
    x_train[i] = x
print('Train Images shape: {} size: {:,}'.format(x_train.shape, x_train.size))

In [None]:
#setup our train and val
Xtrain = x_train[trainX]
Xval = x_train[validX]
print((Xtrain.shape, Xval.shape, trainy.shape, valy.shape))

In [None]:
# lr decay schedule
def lr_schedule(epoch): #using base version on kaggle, reduces lr after 80 and 120
    """Learning Rate Schedule
    # Arguments
        epoch (int): The number of epochs
    # Returns
        lr (float32): learning rate
    """
    lr = 1e-4
    if epoch > 120:
        lr *= 1e-2
    elif epoch > 80:
        lr *= 1e-1
    print('Learning rate: ', lr)
    return lr

In [None]:
#Image augmentation to account for poorly taken cat pictures, as well as generating more samples
aug = ImageDataGenerator(rotation_range=20, zoom_range=0.15,
	width_shift_range=0.2, height_shift_range=0.2, shear_range=0.15,
	horizontal_flip=True, fill_mode="nearest")

In [None]:
# pretrain dense layer to avoid large gradient to destroy the pretrained model
# build model
batch_size = 8
 #using xception as mentioned, imagenet weights rather than None
basic_model = Xception(include_top=False, weights='imagenet', pooling='avg')

for layer in basic_model.layers:
    layer.trainable = False

input_tensor = basic_model.input
# build top
x = basic_model.output
x = Dropout(.5)(x) #dropout to help with overfiftting
x = Dense(NUM_CLASSES, activation='softmax')(x) #softmax as multiple outputs

model = Model(inputs=input_tensor, outputs=x)
model.compile(optimizer=RMSprop(1e-3), loss='categorical_crossentropy', metrics=['accuracy'])

model.fit_generator(aug.flow(Xtrain, trainy, batch_size=8), validation_data=(Xval,valy), 
                    epochs=40, 
                    steps_per_epoch=len(x_train)/batch_size,
                    verbose=1)

model.save('xception.h5') #saving model in case

In [None]:
print("hi")

In [None]:
# train with whole model
# train model

for layer in model.layers:
    layer.W_regularizer = l2(1e-2)
    layer.trainable = True

model.compile(optimizer=RMSprop(lr_schedule(0)), loss='categorical_crossentropy', metrics=['accuracy'])

# call backs
checkpointer = ModelCheckpoint(filepath='weights_xception.h5', verbose=1,
                               save_best_only=True)
#to get best version


lr = LearningRateScheduler(lr_schedule)

# train dense layer
model.fit_generator(aug.flow(Xtrain, trainy, batch_size=2), validation_data=(Xval,valy),
                    steps_per_epoch=400,
                    epochs=150, 
                    callbacks=[checkpointer, lr],
                    initial_epoch=40,
                    verbose=1)



model.save('xception2.h5') #saveModel

#Now let's test the model ourselves to make sure

In [None]:
train_x_test = model.predict(Xtrain, batch_size=16, verbose=1)
valid_x_test = model.predict(Xval, batch_size=16, verbose=1)
print('Xception train features shape: {} size: {:,}'.format(train_x_test.shape, train_x_test.size))
print('Xception valid features shape: {} size: {:,}'.format(valid_x_test.shape, valid_x_test.size))

In [None]:
logreg = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=SEED, max_iter=10000000)
logreg.fit(train_x_test, (trainy * range(NUM_CLASSES)).sum(axis=1))
valid_probs = logreg.predict_proba(valid_x_test)
valid_preds = logreg.predict(valid_x_test)
print('Validation LogLoss {}'.format(log_loss(valy, valid_probs)))
print('Validation Accuracy {}'.format(accuracy_score((valy * range(NUM_CLASSES)).sum(axis=1), valid_preds)))

# Check errors
#See which ones we get wrong for further improvements

In [None]:
valid_breeds = (valy * range(NUM_CLASSES)).sum(axis=1)
error_idx = (valid_breeds != valid_preds)
for img_id, breed, pred in zip(labels.loc[validX, 'id'].values[error_idx],
                                [breedsList[int(b)] for b in valid_preds[error_idx]],
                                [breedsList[int(b)] for b in valid_breeds[error_idx]]):
    fig, ax = plt.subplots(figsize=(5,5))
    img = read_img(img_id, 'train', (299, 299))
    ax.imshow(img / 255.)
    ax.text(10, 250, 'Actual: %s' % pred, color='w', backgroundcolor='r', alpha=0.8)
    ax.text(10, 270, 'Prediction: %s' % breed, color='k', backgroundcolor='g', alpha=0.8)
    ax.axis('off')
    plt.show()                                                    