#Roush Kitty Tool


In [None]:
#!export PATH=/usr/local/cuda-10.0/bin:$PATH
#!export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-10.0/lib64/
#!config.gpu_options.allow_growth = True
import tensorflow as tf

#sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))


In [None]:
tf.test.is_built_with_cuda() 
#tf.test.is_gpu_available()

In [None]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

In [None]:
tf.test.is_gpu_available()

In [None]:
%matplotlib inline
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
from os import listdir, makedirs
from os.path import join, exists, expanduser
from tqdm import tqdm
from sklearn.metrics import log_loss, accuracy_score
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications import xception
from tensorflow.keras.applications import inception_v3
from tensorflow.keras.applications.vgg16 import preprocess_input, decode_predictions
from sklearn.linear_model import LogisticRegression
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.applications import Xception
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.regularizers import l2

#loadLabels

In [None]:
INPUT_SIZE = 299 #image input for xception model
NUM_CLASSES = 58 #Number of cat breeds
SEED = 42  #meaning of life
data_dir = ''
POOLING = 'avg'

labels = pd.read_csv(join(data_dir, 'catLabels.csv'))
print(len(listdir(join(data_dir, 'train'))), len(labels))

In [None]:
selected_breed_list = list(labels.groupby('breed').count().sort_values(by='id', ascending=False).head(NUM_CLASSES).index)
labels = labels[labels['breed'].isin(selected_breed_list)]
labels['target'] = 1
#print(labels)
labels['rank'] = labels['breed'].rank(ascending=0,method='dense')
labels_pivot = labels.pivot('id', 'breed', 'target').reset_index().fillna(0)
np.random.seed(seed=SEED)
rnd = np.random.random(len(labels))
train_idx = rnd < 0.9
valid_idx = rnd >= 0.1
y_train = labels_pivot[selected_breed_list].values
ytr = y_train[train_idx]
yv = y_train[valid_idx]

In [None]:
def read_img(img_id, train_or_test, size):
    """Read and resize image.
    # Arguments
        img_id: string
        train_or_test: string 'train' or 'test'.
        size: resize the original image.
    # Returns
        Image as numpy array.
    """
    img = image.load_img(join(data_dir, train_or_test, '%s.jpg' % img_id), target_size=size)
    img = image.img_to_array(img)
    return img

# Get images 

In [None]:
x_train = np.zeros((len(labels), INPUT_SIZE, INPUT_SIZE, 3), dtype='float32')
for i, img_id in tqdm(enumerate(labels['id'])):
    img = read_img(img_id, 'train', (INPUT_SIZE, INPUT_SIZE))
    x = xception.preprocess_input(np.expand_dims(img.copy(), axis=0))
    x_train[i] = x
print('Train Images shape: {} size: {:,}'.format(x_train.shape, x_train.size))

In [None]:
Xtr = x_train[train_idx]
Xv = x_train[valid_idx]
print((Xtr.shape, Xv.shape, ytr.shape, yv.shape))

In [None]:
# lr decay schedule
def lr_schedule(epoch):
    """Learning Rate Schedule
    Learning rate is scheduled to be reduced after 80, 120epochs.
    Called automatically every epoch as part of callbacks during training.
    # Arguments
        epoch (int): The number of epochs
    # Returns
        lr (float32): learning rate
    """
    lr = 1e-4
    if epoch > 150:
        lr *= 1-3
    elif epoch > 120:
        lr *= 1e-2
    elif epoch > 80:
        lr *= 1e-1
    print('Learning rate: ', lr)
    return lr

In [None]:
aug = ImageDataGenerator(rotation_range=10, zoom_range=0.1,
	width_shift_range=0.1, height_shift_range=0.1, shear_range=0.8,
	horizontal_flip=True, fill_mode="nearest")

In [None]:
# pretrain dense layer
# to avoid large gradient to destroy the pretrained model
# build model
batch_size = 16
#tensorboard = TensorBoard('./logs')

basic_model = Xception(include_top=False, weights='imagenet', pooling='avg')

for layer in basic_model.layers:
    layer.trainable = False

input_tensor = basic_model.input
# build top
x = basic_model.output
x = Dropout(.5)(x)
x = Dense(NUM_CLASSES, activation='softmax')(x)

model = Model(inputs=input_tensor, outputs=x)
model.compile(optimizer=RMSprop(1e-3), loss='categorical_crossentropy', metrics=['accuracy'])

model.fit_generator(aug.flow(Xtr, ytr, batch_size=16), validation_data=(Xv,yv), 
                    epochs=40, 
              #      validation_data=Xv,
             #       callbacks=[tensorboard],
           #         workers=4,
                    steps_per_epoch=len(x_train)/batch_size,
                    verbose=1)

model.save('xception20E.h5')

In [None]:
#NowToImprove

In [None]:
import gc
gc.collect()

In [None]:
# train with whole model
# train model
from tensorflow.keras.models import load_model 
model = load_model('xception20E.h5')

for layer in model.layers:
    layer.W_regularizer = l2(1e-2)
    layer.trainable = True

model.compile(optimizer=RMSprop(lr_schedule(0)), loss='categorical_crossentropy', metrics=['accuracy'])

# call backs
checkpointer = ModelCheckpoint(filepath='weights_xception.h5', verbose=1,
                               save_best_only=True)


lr = LearningRateScheduler(lr_schedule)

# train dense layer
model.fit_generator(aug.flow(Xtr, ytr, batch_size=8), validation_data=(Xv,yv),
                    steps_per_epoch=400,
                    epochs=100, 
                    callbacks=[checkpointer, lr],
                    initial_epoch=40,
         #           workers=4,
                    verbose=1)



model.save('xception2.h5')

#Old method LogReg on Xception bottleneck features which gets 65% accuracy

In [None]:
train_x_bf = model.predict(Xtr, batch_size=16, verbose=1)
valid_x_bf = model.predict(Xv, batch_size=16, verbose=1)
print('Xception train bottleneck features shape: {} size: {:,}'.format(train_x_bf.shape, train_x_bf.size))
print('Xception valid bottleneck features shape: {} size: {:,}'.format(valid_x_bf.shape, valid_x_bf.size))

In [None]:
logreg = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=SEED, max_iter=10000000)
logreg.fit(train_x_bf, (ytr * range(NUM_CLASSES)).sum(axis=1))
valid_probs = logreg.predict_proba(valid_x_bf)
valid_preds = logreg.predict(valid_x_bf)
print('Validation LogLoss {}'.format(log_loss(yv, valid_probs)))
print('Validation Accuracy {}'.format(accuracy_score((yv * range(NUM_CLASSES)).sum(axis=1), valid_preds)))

# Check errors
#See which ones we get wrong

In [None]:
valid_breeds = (yv * range(NUM_CLASSES)).sum(axis=1)
error_idx = (valid_breeds != valid_preds)
for img_id, breed, pred in zip(labels.loc[valid_idx, 'id'].values[error_idx],
                                [selected_breed_list[int(b)] for b in valid_preds[error_idx]],
                                [selected_breed_list[int(b)] for b in valid_breeds[error_idx]]):
    fig, ax = plt.subplots(figsize=(5,5))
    img = read_img(img_id, 'train', (299, 299))
    ax.imshow(img / 255.)
    ax.text(10, 250, 'Label: %s' % pred, color='w', backgroundcolor='r', alpha=0.8)
    ax.text(10, 270, 'Prediction: %s' % breed, color='k', backgroundcolor='g', alpha=0.8)
    ax.axis('off')
    plt.show()                                                    