In [None]:
pip install -Uqq fastbook

In [None]:
from fastbook import *
from fastai.vision.widgets import *
import numpy as np
import pandas as pd

In [None]:
key = os.environ.get('AZURE_SEARCH_KEY', 'XXX')

In [None]:
# get list of rodents
with open('rodent_names_reduced.csv') as file_name:
    rodent_names = next(csv.reader(file_name))

In [None]:
path = Path('images')

# for each rodent: download images of it has no image folder
for o in rodent_names:
    if not (path/o).exists():
        print(o)
        dest = (path/o)
        dest.mkdir(exist_ok=True)
        results = search_images_bing(key, f'{o} rodent')
        download_images(dest, urls=results.attrgot('contentUrl'))

In [None]:
# get list of images that can't be opened
fns = get_image_files(path)
failed = False
failed = verify_images(fns)
failed

# delete those images
if failed:
    failed.map(Path.unlink);

In [None]:
# remove non-images and convert images to RGBA
# (we do this because the pretrained model we will use was trained with RGBA images)
for file in os.listdir(path):
    for image in os.listdir(path/file):
        if not image.startswith('.'):
            try:
                im = Image.open(path/file/image)
            except:
                try:
                    os.remove(path/file/image)
                except Exception as e:
                    print(e)
            if im.format == 'PNG':
                if im.mode != 'RGBA':
                    try:
                        im.convert('RGBA').save(path/file/image)
                    except:
                        os.remove(path/o/image)

In [None]:
#create datablock object
rodents_db = DataBlock(
    blocks=(ImageBlock, CategoryBlock), 
    get_items=get_image_files, 
    splitter=RandomSplitter(valid_pct=0.2, seed=42),
    get_y=parent_label,
    item_tfms=RandomResizedCrop(224, min_scale=0.5),
    batch_tfms=aug_transforms()
)

In [None]:
# create dataloaders object from datablock
dls = rodents_db.dataloaders(path)
# this object contains a training data loader and a validation data loader
# a dataloader is an object that passes batches of a few items at a time to the GPU
# i.e. we use a data loader to pass image files to the GPU

In [None]:
# show batch of four images from validation datablock things are ok
dls.valid.show_batch(max_n=4, nrows=1)

In [None]:
# create deep learning model that:
# uses our dataloaders object to fetch data from,
# uses initial parameters of the resnet18 model,
# displays error_rate as human readable measure of accuracy
learn = vision_learner(dls, resnet18, metrics=error_rate)

# run through batches of the training set, for each batch, 
# slightly tweak the paramters of the model to decrease whatever the loss function is for that batch
# (loss_function was chosen automatically in previous line)
learn.fine_tune(4)

In [None]:
# create and display matrix showing which images were mistaken for a different species
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(30,30))

In [None]:
# open cleaner utility which lets us remove innacurately classified images from the dataset
cleaner = ImageClassifierCleaner(learn)
cleaner

In [None]:
# delete images we chose to remove
for idx in cleaner.delete(): cleaner.fns[idx].unlink()
for idx,cat in cleaner.change(): shutil.move(str(cleaner.fns[idx]), path/cat)
# we could now train our model again (from scratch) to get more accurate results

In [None]:
# export the model
learn.export()