## Saving a Cats v Dogs Model

This is a minimal example showing how to train a fastai model on Kaggle, and save it so you can use it in your app.

In [None]:
# Make sure we've got the latest version of fastai:
!pip install -Uqq fastai

First, import all the stuff we need from fastai:

In [None]:
from fastai.vision.all import *

Download and decompress our dataset, which is pictures of dogs and cats:

In [None]:
path = untar_data(URLs.PETS)/'images'

We need a way to label our images as dogs or cats. In this dataset, pictures of cats are given a filename that starts with a capital letter:

In [None]:
def is_cat(x): return x[0].isupper() 

Now we can create our `DataLoaders`:

In [None]:
dls = ImageDataLoaders.from_name_func('.',
    get_image_files(path), valid_pct=0.2, seed=42,
    label_func=is_cat,
    item_tfms=Resize(192))

... and train our model, a resnet18 (to keep it small and fast):

In [None]:
learn = vision_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(3)

Now we can export our trained `Learner`. This contains all the information needed to run the model:

In [None]:
learn.export('model.pkl')

Finally, open the Kaggle sidebar on the right if it's not already, and find the section marked "Output". Open the `/kaggle/working` folder, and you'll see `model.pkl`. Click on it, then click on the menu on the right that appears, and choose "Download". After a few seconds, your model will be downloaded to your computer, where you can then create your app that uses the model.

# Allright, lets build my own model!

In [None]:
#NB: Kaggle requires phone verification to use the internet or a GPU. If you haven't done that yet, the cell below will fail
#    This code is only here to check that your internet is enabled. It doesn't do anything else.
#    Here's a help thread on getting your phone number verified: https://www.kaggle.com/product-feedback/135367

import socket,warnings
try:
    socket.setdefaulttimeout(1)
    socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect(('1.1.1.1', 53))
except socket.error as ex: raise Exception("STOP: No internet. Click '>|' in top right and set 'Internet' switch to on")
    
# It's a good idea to ensure you're running the latest version of any libraries you need.
# `!pip install -Uqq <libraries>` upgrades to the latest version of <libraries>
# NB: You can safely ignore any warnings or errors pip spits out about running as root or incompatibilities
import os
iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')

!pip install voila
!jupyter serverextension enable --sys-prefix voila

if iskaggle:
    !pip install -Uqq fastai 'duckduckgo_search>=6.2' fastbook # Remember to set environment to always use latest environment. Else it won't work

In [None]:
from duckduckgo_search import DDGS #DuckDuckGo has changed the api so we need to update 
from fastcore.all import *
from fastdownload import download_url
import time, json
from fastai.vision.all import *
from fastbook import *
from fastai.vision.widgets import *
import fastai.data.transforms as fdt


In [None]:
# Create functions

def search_images(keywords, max_images=200): return L(DDGS().images(keywords, max_results=max_images)).itemgot('image')


In [None]:
# Let's get some data
searches = 'rabbits', "bears", "dogs"
path = Path("rabbit_bears_dogs_classifier")


for o in searches:
    print(o)
    dest = (path/o)
    dest.mkdir(exist_ok=True, parents=True)
    download_images(dest, urls=search_images(f'{o} photo'))
    time.sleep(5)
    resize_images(path/o, max_size=200, dest=path/o)

In [None]:
# Some links might be broken, so lets not include those
failed = verify_images(fdt.get_image_files(path))
failed.map(Path.unlink) # Using map removes them from the directory
print(f'Percentage of failed: {len(failed) / len(fdt.get_image_files(path))}')


To turn our downloaded data into a DataLoaders object we need to tell fastai at least four things:

- What kinds of data we are working with
- How to get the list of items
- How to label these items
- How to create the validation set

In [None]:
# Lets get the data
rabbits = DataBlock(
    blocks=(ImageBlock, CategoryBlock),
    get_items=get_image_files,
    splitter=RandomSplitter(valid_pct=0.2, seed=42),
    get_y=parent_label,
    item_tfms=Resize(128))

In [None]:
# Having the dataloader object (x,y + where files are + splitting into train/val + how to get y + the size of the images) lets use it
dls = rabbits.dataloaders(path)
dls.valid.show_batch(max_n=4, nrows=1)

In [None]:
# Lets try to do some data augmentation = get some more data
rabbits = rabbits.new(
    item_tfms=RandomResizedCrop(224, min_scale=.5),
    batch_tfms=aug_transforms())
dls = rabbits.dataloaders(path)
dls.train.show_batch(max_n=8, nrows=2, unique=True)

In [None]:
# Lets try and create the model
learn = vision_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(4)

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix()

In [None]:
# lets view the worst cases
interp.plot_top_losses(2, nrows=1)

In [None]:
# The first one is not correctly labeled so lets delete it
cleaner = ImageClassifierCleaner(learn)
cleaner

In [None]:
# Made an error the last time, so not running it.

#for idx in cleaner.delete(): cleaner.fns[idx].unlink()
#for idx,cat in cleaner.change(): shutil.move(str(cleaner.fns[idx]), path/cat)

# Lets try and export the model

In [None]:
learn.export() # create a .pkl file

In [None]:
path = Path()
path.ls(file_exts='.pkl') # use ls to add fastai to python path class

In [None]:
download_url(search_images("rabbit photos", max_images=1)[0], "rabbit_test.jpg", show_progress=False)
Image.open('rabbit_test.jpg').to_thumb(256,256)

In [None]:
# mimic load and inference
learn_inf = load_learner(path/'export.pkl')
yhat = learn_inf.predict('rabbit_test.jpg')
print(f"Scores: {yhat}")
print(f"Order: {learn_inf.dls.vocab}") # alphabetic order
print(f"Probability it is a rabbit: {yhat[2][2]}")

# Lets create the app

In [None]:
def on_click_classify(change):
    img = PILImage.create(btn_upload.data[-1])
    out_pl.clear_output()
    with out_pl: display(img.to_thumb(128,128))
    pred,pred_idx,probs = learn_inf.predict(img)
    lbl_pred.value = f'Prediction: {pred}; Probability: {probs[pred_idx]:.04f}'

btn_run.on_click(on_click_classify)

In [None]:
#hide
#Putting back btn_upload to a widget for next cell
btn_upload = widgets.FileUpload()

In [None]:
#hide_output
VBox([widgets.Label('Select your rabbit!'), 
      btn_upload, btn_run, out_pl, lbl_pred])