# Hamburg Birds Classifier

This notebook is training the classifier for some of the bird spieces which can be found in Hamburg

In [None]:
import os
import fastbook
from fastbook import *
from fastai.vision.widgets import *
import torch
import gc


In [None]:
import fastbook
fastbook.setup_book()

In [None]:
key = os.environ.get('AZURE_SEARCH_KEY', 'XXX')

In [None]:
birds_types = [
    'European Robin',
    'Marsh Tit',
    'Eurasian Blackbird',
    'Eurasian Nuthatch',
    'Eurasian Jay',
    'Eurasian Wren',
    'Hawfinch',
    'Bullfinch',
    'Common Starling',
    'Greylag Goose',
    'Barnacle Goose',
    'Meadow Pipit',
    'Common Wood Pigeon',
    'Mistle Thrush'
]
path = Path('birds')

In [None]:
if not path.exists():
    path.mkdir()
    for bird in birds_types:
        dest = (path/bird)
        dest.mkdir(exist_ok=True)
        results = search_images_bing(key, bird)
        download_images(dest, urls=results.attrgot('contentUrl'))

In [None]:
filenames = fns = get_image_files(path)

In [None]:
failed = verify_images(fns)

In [None]:
failed

In [None]:
failed.map(Path.unlink);

In [None]:
birdsDB = DataBlock(
    blocks=(ImageBlock, CategoryBlock),
    get_items=get_image_files,
    splitter=RandomSplitter(valid_pct=0.2, seed=1337),
    get_y=parent_label,
    item_tfms=RandomResizedCrop(224, min_scale=0.5),
    batch_tfms=aug_transforms())

In [None]:
dls = birdsDB.dataloaders(path)

In [None]:
dls.show_batch(max_n=36, nrows=6)

# Dataset Cleanup

Here we wil do a few iterations of training the model and removing incorrect data. This will improve the quality of the dataset.

In [None]:
dls = birdsDB.dataloaders(path)

In [None]:
learn = cnn_learner(dls, resnet18, metrics=[accuracy, error_rate], cbs)

In [None]:
learn.fine_tune(8)

In [None]:
interp = ClassificationInterpretation.from_learner(learn)


In [None]:
interp.plot_top_losses(7, nrows=7)

In [None]:
cleaner = ImageClassifierCleaner(learn50, height=256, width=512)

In [None]:
for idx in cleaner.delete():
        cleaner.fns[idx].unlink()

for idx,cat in cleaner.change():
        shutil.move(str(cleaner.fns[idx]), path/cat)


In [None]:
cleaner.delete()

# After we polished the dataset, let's start the batch size data collection!

We're going to check the batch size effects on two 3 architectures: ResNet50, ResNet34 and ResNet18. Unfortunately Resnet101 was too big for my GPU to swallow and it is not the part of the experiment. 

In [None]:
architectures = {
    "rn50": resnet50,
    "rn34": resnet34,
    "rn18": resnet18,
}

The function which we use to get the training restuls. CSVLogger is what we need to have the training metrics saved in files.

In [None]:
def train(arch, batch_size, index):
    dls = birdsDB.dataloaders(path, bs=batch_size)
    learn = cnn_learner(dls, architectures[arch], metrics=error_rate, cbs=[ShowGraphCallback, CSVLogger(fname=f'birds-{arch}-bs{batch_size}-{index}.csv')]).to_fp16()
    learn.fine_tune(10)

The final training loop. It took around 6 hours to complete using nvidia rtx 2080ti GPU, so have a bit of patience.

In [None]:
for arch in ['rn50', 'rn34', 'rn18']:
    for bs in [128, 64, 32, 16, 8, 4]:
        for index in range(10):
            train(arch, bs, index)
            torch.cuda.empty_cache()
            gc.collect()