# Kaggle | Dog Breed Identification

We're going to use convolutional neural networks (CNNs) to allow our computer to see - something that is only possible thanks to deep learning. More, we are going to reuse a pre-trained model.

Link : https://www.kaggle.com/c/dog-breed-identification/

# Video from the fastai course

Watch video : https://youtu.be/JNxcznsrRb8?t=1h39m50s

# Review steps

## 1) All steps

1. Enable data augmentation, and precompute=True (it is just to save time but you can start with precompute=False)
1. Use `lr_find()` to find highest learning rate where loss is still clearly improving
1. Train last layer from precomputed activations for 1-2 epochs
1. Train last layer with data augmentation (i.e. precompute=False) for 2-3 epochs with cycle_len=1
1. Unfreeze all layers
1. Set earlier layers to 3x-10x lower learning rate than next higher layer
1. Use `lr_find()` again
1. Train full network with cycle_mult=2 until over-fitting

## 2) Main steps

1. Get best LR
1. Train frozen firts layers
1. Train unfrozen model

1. Use `lr_find()` to find highest learning rate where loss is still clearly improving
1. Train last layer with data augmentation (i.e. precompute=False) for 2-3 epochs with cycle_len=1
1. Unfreeze all layers
1. Set earlier layers to 3x-10x lower learning rate than next higher layer
1. Train full network with cycle_mult=2 until over-fitting

# 1) Preparation

In [None]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

Here we import the libraries we need. We'll learn about what each does during the course.

In [None]:
# This file contains all the main external libs we'll use
from fastai.imports import *

In [None]:
from fastai.torch_imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

In [None]:
# if you have multiple GPU, choose one 
# torch.cuda.set_device(1)

In [None]:
PATH = "data/dogbreed/"
arch=resnet34
#arch=resnext50
#arch=resnext101_64

In [None]:
sz=299
bs=64

# 2) Train and valid images

In [None]:
label_csv = f'{PATH}labels.csv'
n = len(list(open(label_csv)))-1
val_idxs = get_cv_idxs(n)

In [None]:
print(f'{n} training images')
print(f'{len(val_idxs)} validation images')

# 3) Data analysis

In [None]:
!ls {PATH}

In [None]:
label_df = pd.read_csv(label_csv)
label_df.head()

In [None]:
label_df.pivot_table(index='breed', aggfunc=len).sort_values('id', ascending=False)

In [None]:
tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
data = ImageClassifierData.from_csv(PATH, 'train', label_csv, test_name='test',
                                    val_idxs=val_idxs, suffix='.jpg', tfms=tfms, bs=bs)

In [None]:
fn = PATH+data.trn_ds.fnames[0]; fn

In [None]:
img = PIL.Image.open(fn); img

In [None]:
img.size

In [None]:
size_d = {k: PIL.Image.open(PATH+k).size for k in data.trn_ds.fnames}

In [None]:
row_sz, col_sz = zip(*size_d.values())
row_sz = np.array(row_sz)
col_sz = np.array(col_sz)

In [None]:
row_sz[:5]

In [None]:
plt.hist(row_sz);

In [None]:
plt.hist(row_sz[row_sz<1000])

In [None]:
len(col_sz)

In [None]:
plt.hist(col_sz);

In [None]:
plt.hist(col_sz[col_sz<1000])

In [None]:
len(data.trn_ds),len(data.test_ds)

In [None]:
len(data.classes),data.classes[:5]

# 3) Initial model

In [None]:
def get_data(sz,bs):
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
    data = ImageClassifierData.from_csv(PATH, 'train', label_csv, test_name='test', num_workers=4,
                                    val_idxs=val_idxs, suffix='.jpg', tfms=tfms, bs=bs)
    # without test files
    #data = ImageClassifierData.from_csv(PATH, 'train', f'{label_csv}', num_workers=4,
    #                                val_idxs=val_idxs, suffix='.jpg', tfms=tfms, bs=bs)
    return data if sz>300 else data.resize(340, 'tmp')

## 3.1) Step : pre - Pre-compute (check everything works well)

In [None]:
# you can set sz a small value like 64 to check if everything works well before to raise it 
data = get_data(sz,bs)

In [None]:
learn = ConvLearner.pretrained(arch, data, precompute=True)

In [None]:
# get all details about layers
learn.summary()

In [None]:
# get the layers structure of model
learn.get_layer_groups()

In [None]:
# search for best learning rate
lrf=learn.lr_find()
#learn.sched.plot_lr()
learn.sched.plot()

In [None]:
lr=1e-2

In [None]:
%time learn.fit(lr, 5)

In [None]:
learn.save('weights1')

In [None]:
learn.load('weights1')

## 3.2) Step1 - Data augmentation and LR restart (run a new model learn)

In [None]:
from sklearn import metrics

In [None]:
data = get_data(sz,bs)

In [None]:
learn = ConvLearner.pretrained(arch, data, precompute=True, ps=0.5)

In [None]:
learn.summaryetails about layers
learn.summary()

In [None]:
# get the layers structure of model
learn.get_layer_groups()

In [None]:
# search for best learning rate
lrf=learn.lr_find()
#learn.sched.plot_lr()
learn.sched.plot()

In [None]:
lr=1e-2

In [None]:
%time learn.fit(lr, 2)

In [None]:
learn.save('weights2')

In [None]:
learn.precompute = False

In [None]:
# get all details about layers
learn.summary()

In [None]:
# get the layers structure of model
learn.get_layer_groups()

In [None]:
# search for best learning rate
lrf=learn.lr_find()
#learn.sched.plot_lr()
learn.sched.plot()

In [None]:
lr=1e-2

In [None]:
%time learn.fit(lr, 5, cycle_len=1)

In [None]:
learn.save('weights3')

## 3.3) Step 2 - Increase size images (improve your model and avoid overfitting)

In [None]:
# Increase size of images to 299 x 299 (224 x 224 before)
learn.set_data(get_data(299,bs))

# Call freeze method to be sure that all layers but last layer are frozen
learn.freeze()

In [None]:
# get all details about layers
learn.summary()

In [None]:
# get the layers structure of model
learn.get_layer_groups()

In [None]:
# search for best learning rate
lrf=learn.lr_find()
#learn.sched.plot_lr()
learn.sched.plot()

In [None]:
lr=1e-2

In [None]:
learn.fit(lr, 3, cycle_len=1)
# This will show that our model is underfitting as Training loss >> Validation loss
# it means that my restart occurs too soon : we move to another hill without reaching the low of the valley

In [None]:
learn.save('weights4')

In [None]:
# We set up cycle_mult to 2 in order to avoid to restart too soon
# Then, Traing Loss and Validation Loss are about the same
learn.fit(lr, 3, cycle_len=1, cycle_mult=2)

In [None]:
learn.save('weights5')

In [None]:
# this gives prediction for validation set. Predictions are in log scale
log_preds,y = learn.TTA()
probs = np.exp(log_preds)
accuracy(log_preds,y), metrics.log_loss(y,probs)

In [None]:
# One more fit with cycle_len = 2 could improve a little bit the accuracy
learn.fit(lr, 1, cycle_len=2)

In [None]:
learn.save('weights6')

In [None]:
# this gives prediction for validation set. Predictions are in log scale
log_preds,y = learn.TTA()
probs = np.exp(log_preds)
accuracy(log_preds,y), metrics.log_loss(y,probs)

## 3.4) Unfreeze ?

No need to try to unfreeze the first layers because the dataset of this kaggle is very similar to the one of ImageNet (it is a subset ?).

## 3.5) Take the average of weights ?

Use cycle_save_name in learn.fit() if you want to save weights after each cycle_len and get at the end the average of weights.

## 3.6) Analyzing results

### Confusion matrix 

In [None]:
preds = np.argmax(log_preds, axis=1)
probs = np.exp(log_preds[:,1])

A common way to analyze the result of a classification model is to use a [confusion matrix](http://www.dataschool.io/simple-guide-to-confusion-matrix-terminology/). Scikit-learn has a convenient function we can use for this purpose:

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y, preds)

We can just print out the confusion matrix, or we can show a graphical view (which is mainly useful for dependents with a larger number of categories).

In [None]:
plot_confusion_matrix(cm, data.classes)

### Images analysis

to be done through image display

# 4) Final model (use the whole training set)

Just follow the same steps but with the full training step (ie, with 20% more images) before to make predictions on the test set through the following code (see lesson1-DogBreed-resnet34-FullTrainingSet.ipynb).

In [None]:
val_idxs=[0]

# 5) Create a submission file

In [None]:
#data.test_ds.fnames

In [None]:
# this gives prediction for test set. Predictions are in log scale
log_preds,y = learn.TTA(is_test=True)
probs = np.exp(log_preds)

In [None]:
probs.shape

In [None]:
df = pd.DataFrame(probs)
df.columns = data.classes

In [None]:
df.insert(0, 'id', [o[5:-4] for o in data.test_ds.fnames])

In [None]:
df.head()

In [None]:
SUBM = f'{PATH}subm/'
os.makedirs(SUBM, exist_ok=True)
df.to_csv(f'{SUBM}subm.gz', compression='gzip', index=False)

In [None]:
FileLink(f'{SUBM}subm.gz')

# 6) Save predictions on test file

In [None]:
# save predictions
def save_array(fname, arr):
    c=bcolz.carray(arr, rootdir=fname, mode='w')
    c.flush()

save_array(f'{SUBM}probs.bc', probs)

# If many probs from diferent models, you can take the average
# avg = (prob1 + prob2 + prob3) / 3

# 7) Individual prediction

## 7.1) All steps

In [None]:
fn = data.val_ds.fname[0]; fn

In [None]:
Image.open(PATH.fn).resize((150,150))

In [None]:
trn_tfms,val_tfms = tfms_from_model(arch, sz) # get transformations

In [None]:
ds = FilesIndexArrayDataset([fn], np.array([0]), val_tfms, PATH)
dl = DataLoader(ds)
preds = learn_predict_dl(dl)
np.argmax(preds)

In [None]:
# short version
im = val_tfms(Image.open(PATH+fn))
preds = learn.predict_array(im[None])
np.argmax(preds)

In [None]:
# create a bar graph of the predictions
plt.barh(np.arange(2), preds[0])
_ = plt.yticks(np.arange(2), data.classes)

## 7.2) Short version

In [None]:
# Get image url
fn = data.val_ds.fname[0]
Image.open(PATH.fn).resize((150,150))

In [None]:
# get transformations
trn_tfms,val_tfms = tfms_from_model(arch, sz) 

In [None]:
# get prediction
im = val_tfms(Image.open(PATH+fn))
learn.precompute=False # We'll pass in a raw image, not activations
preds = learn.predict_array(im[None])
np.argmax(preds) # preds are log probabilities of classes

In [None]:
# create a bar graph of the predictions
plt.barh(np.arange(2), preds[0])
_ = plt.yticks(np.arange(2), data.classes)

# Annex : get automatically the best learning rate number

In [None]:
def plot_loss_change(sched, sma=1, n_skip=20, y_lim=(-0.01,0.01)):
    """
    Plots rate of change of the loss function.
    Parameters:
        sched - learning rate scheduler, an instance of LR_Finder class.
        sma - number of batches for simple moving average to smooth out the curve.
        n_skip - number of batches to skip on the left.
        y_lim - limits for the y axis.
    """
    derivatives = [0] * (sma + 1)
    for i in range(1 + sma, len(learn.sched.lrs)):
        derivative = (learn.sched.losses[i] - learn.sched.losses[i - sma]) / sma
        derivatives.append(derivative)
        
    plt.ylabel("d/loss")
    plt.xlabel("learning rate (log scale)")
    plt.plot(learn.sched.lrs[n_skip:], derivatives[n_skip:])
    plt.xscale('log')
    plt.ylim(y_lim)

In [None]:
# search for best learning rate
lrf=learn.lr_find()
#learn.sched.plot_lr()
learn.sched.plot()

In [None]:
plot_loss_change(learn.sched, sma=20)