# Dog breeds
## Using Fast.ai's Deep Learning techniques to validate the highest accuracy in the classification of dog breeds

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
#import sys
#sys.path.append("../fastai") 

In [None]:
from fastai.imports import *
from fastai.torch_imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

In [None]:
PATH = "data/"
sz=224
arch=resnext101_64
bs=58

In [None]:
label_csv = f'{PATH}labels.csv'
n = len(list(open(label_csv)))-1
val_idx = get_cv_idxs(n)

## Data Exploration after unzipping

In [None]:
!pwd
!ls {PATH}

In [None]:
label_df= pd.read_csv(label_csv)

In [None]:
label_df.head()

In [None]:
label_df.pivot_table(index='breed',aggfunc=len).sort_values('id',ascending=False).head()

In [None]:
transformations = tfms_from_model(arch,sz,aug_tfms=transforms_side_on,max_zoom=1.2)

In [None]:
data = ImageClassifierData.from_csv(PATH,'train', f'{PATH}labels.csv',test_name='test',val_idxs=val_idx,suffix='.jpg',tfms=transformations,bs=bs)

In [None]:
fn=PATH+data.trn_ds.fnames[0]; fn

In [None]:
n

In [None]:
val_idx

In [None]:
len(val_idx)

In [None]:
img = PIL.Image.open(fn); img

In [None]:
img.size

In [None]:
size_d = {k:PIL.Image.open(PATH+k).size for k in data.trn_ds.fnames}

In [None]:
row_sz,col_sz = list(zip(*size_d.values()))

In [None]:
row_sz = np.array(row_sz);col_sz= np.array(col_sz)

In [None]:
row_sz[:5]

In [None]:
plt.hist(row_sz);

In [None]:
plt.hist(row_sz[row_sz<1000]);

# Initial model

In [None]:
def get_data(sz,bs):
    tfms = tfms_from_model(arch,sz,aug_tfms=transforms_side_on,max_zoom=1.2)
    data = ImageClassifierData.from_csv(PATH,'train',f'{PATH}labels.csv',test_name='test',num_workers=4, 
                                        val_idxs=val_idx,suffix='.jpg',tfms=tfms,bs=bs)
    return data if sz >300 else data.resize(340,'tmp')

## Precompute

In [None]:
data = get_data(sz,bs)

In [None]:
learn = ConvLearner.pretrained(arch,data,precompute=True)

In [None]:
learn.fit(1e-2,5)

## Augment

In [None]:
from sklearn import metrics

In [None]:
data = get_data(sz,bs)

In [None]:
learn = ConvLearner.pretrained(arch,data,precompute=True,ps=0.5)

In [None]:
learn.fit(1e-2,2)

In [None]:
learn.precompute=False

In [None]:
learn.fit(1e-2, 5, cycle_len=1)

In [None]:
learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2)

In [None]:
learn.save('224_pre')

In [None]:
learn.load('224_pre')

## Increase size

In [None]:
learn.set_data(get_data(299,bs))
learn.freeze()

In [None]:
learn.fit(1e-2, 3, cycle_len=1)

In [None]:
learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2)

In [None]:
log_preds,y = learn.TTA()
probs = np.mean(np.exp(log_preds), axis=0)

In [None]:
accuracy_np(probs,y), metrics.log_loss(y, probs)

In [None]:
learn.save('299_pre')

In [None]:
learn.load('299_pre')

In [None]:
learn.fit(1e-2,1,cycle_len=2)

In [None]:
log_preds,y = learn.TTA(is_test=True)
probs = np.exp(log_preds)
probs.shape

In [None]:
probs[0,:,:].shape

In [None]:
data.classes

In [None]:
df = pd.DataFrame(probs[0,:,:], columns = data.classes)

In [None]:
df.columns

In [None]:
df.insert(0,'id',[o[5:-4] for o in data.test_ds.fnames])

In [None]:
df.columns

In [None]:
df.head()

In [None]:
SUBM = f'{PATH}subm/'
os.makedirs(SUBM,exist_ok=True)
df.to_csv(f'{SUBM}subm.gz', compression='gzip',index=False)

In [None]:
FileLink(f'{SUBM}subm.gz')