In [None]:
#hide
! [ -e /content ] && pip install -Uqq fastbook
import fastbook
fastbook.setup_book()

In [None]:
#hide
from fastbook import *
from fastai.vision.widgets import *

`untar_data` is a fast.ai function to download and decompress data

Deep learning not always good with Tabular data. Better for data with high cardinality e.g. Postcodes, Product IDs.

Deep learning wasn't good at text generation by the time of this course -> bu transformers architecture have improved this over time.

Deep learning is OK at captioning, but not always accurate.

Lots you can do with deep learning *if* you think creatively about how one mode of data (e.g. language, text) can be used for other modes (e.g. names of proteins)

# From Model to Production

## The Practice of Deep Learning

### Starting Your Project

### The State of Deep Learning

#### Computer vision

#### Text (natural language processing)

#### Combining text and images

#### Tabular data

In the new version of fast.ai, you can use:

`learn=tabular_learner(dls, metrics=accuracy)`

`learn.fit_one_cycle(2)`

Generally there won't be a model available for tabular data to do transfer learning. So you need to fit one cycle, rather than fine tuning.


#### Recommendation systems

#### Other data types

### The Drivetrain Approach

## Gathering Data

# clean
To download images with Bing Image Search, sign up at [Microsoft Azure](https://azure.microsoft.com/en-us/services/cognitive-services/bing-web-search-api/) for a free account. You will be given a key, which you can copy and enter in a cell as follows (replacing 'XXX' with your key and executing it):

In [None]:
key = os.environ.get('AZURE_SEARCH_KEY', 'XXX')

In [None]:
search_images_bing

In [None]:
results = search_images_bing(key, 'grizzly bear')
ims = results.attrgot('contentUrl')
len(ims)

In [None]:
#hide
ims = ['http://3.bp.blogspot.com/-S1scRCkI3vY/UHzV2kucsPI/AAAAAAAAA-k/YQ5UzHEm9Ss/s1600/Grizzly%2BBear%2BWildlife.jpg']

In [None]:
dest = 'images/grizzly.jpg'
download_url(ims[0], dest)

In [None]:
im = Image.open(dest)
im.to_thumb(128,128)

In [None]:
bear_types = 'grizzly','black','teddy'
path = Path('bears')

In [None]:
if not path.exists():
    path.mkdir()
    for o in bear_types:
        dest = (path/o)
        dest.mkdir(exist_ok=True)
        results = search_images_bing(key, f'{o} bear')
        download_images(dest, urls=results.attrgot('contentUrl'))

In [None]:
fns = get_image_files(path)
fns

In [None]:
failed = verify_images(fns)
failed

In [None]:
failed.map(Path.unlink);

### Sidebar: Getting Help in Jupyter Notebooks

### End sidebar

## From Data to DataLoaders

### MOST IMPORTANT data structure for the course

In [None]:
bears = DataBlock(
    blocks=(ImageBlock, CategoryBlock), 
    get_items=get_image_files, 
    splitter=RandomSplitter(valid_pct=0.2, seed=42),
    get_y=parent_label,
    item_tfms=Resize(128))

In [None]:
dls = bears.dataloaders(path)

In [None]:
dls.valid.show_batch(max_n=4, nrows=1)

In [None]:
bears = bears.new(item_tfms=Resize(128, ResizeMethod.Squish))
dls = bears.dataloaders(path)
dls.valid.show_batch(max_n=4, nrows=1)

In [None]:
bears = bears.new(item_tfms=Resize(128, ResizeMethod.Pad, pad_mode='zeros'))
dls = bears.dataloaders(path)
dls.valid.show_batch(max_n=4, nrows=1)

In [None]:
bears = bears.new(item_tfms=RandomResizedCrop(128, min_scale=0.3))
dls = bears.dataloaders(path)
dls.train.show_batch(max_n=4, nrows=1, unique=True)

### Data Augmentation

In [None]:
bears = bears.new(item_tfms=Resize(128), batch_tfms=aug_transforms(mult=2))
dls = bears.dataloaders(path)
dls.train.show_batch(max_n=8, nrows=2, unique=True)

## Training Your Model, and Using It to Clean Your Data

In [None]:
bears = bears.new(
    item_tfms=RandomResizedCrop(224, min_scale=0.5),
    batch_tfms=aug_transforms())
dls = bears.dataloaders(path)

In [None]:
learn = vision_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(4)

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix()

In [None]:
interp.plot_top_losses(5, nrows=1)

In [None]:
cleaner = ImageClassifierCleaner(learn)
cleaner

In [None]:
#hide
# for idx in cleaner.delete(): cleaner.fns[idx].unlink()
# for idx,cat in cleaner.change(): shutil.move(str(cleaner.fns[idx]), path/cat)

## Turning Your Model into an Online Application

### Using the Model for Inference

In [None]:
learn.export()

In [None]:
path = Path()
path.ls(file_exts='.pkl')

In [None]:
learn_inf = load_learner(path/'export.pkl')

In [None]:
learn_inf.predict('images/grizzly.jpg')

In [None]:
learn_inf.dls.vocab

### Creating a Notebook App from the Model

In [None]:
btn_upload = widgets.FileUpload()
btn_upload

In [None]:
#hide
# For the book, we can't actually click an upload button, so we fake it
btn_upload = SimpleNamespace(data = ['images/grizzly.jpg'])

In [None]:
img = PILImage.create(btn_upload.data[-1])

In [None]:
out_pl = widgets.Output()
out_pl.clear_output()
with out_pl: display(img.to_thumb(128,128))
out_pl

In [None]:
pred,pred_idx,probs = learn_inf.predict(img)

In [None]:
lbl_pred = widgets.Label()
lbl_pred.value = f'Prediction: {pred}; Probability: {probs[pred_idx]:.04f}'
lbl_pred

In [None]:
btn_run = widgets.Button(description='Classify')
btn_run

In [None]:
def on_click_classify(change):
    img = PILImage.create(btn_upload.data[-1])
    out_pl.clear_output()
    with out_pl: display(img.to_thumb(128,128))
    pred,pred_idx,probs = learn_inf.predict(img)
    lbl_pred.value = f'Prediction: {pred}; Probability: {probs[pred_idx]:.04f}'

btn_run.on_click(on_click_classify)

In [None]:
#hide
#Putting back btn_upload to a widget for next cell
btn_upload = widgets.FileUpload()

In [None]:
VBox([widgets.Label('Select your bear!'), 
      btn_upload, btn_run, out_pl, lbl_pred])

### Turning Your Notebook into a Real App

In [None]:
#hide
# !pip install voila
# !jupyter serverextension enable --sys-prefix voila 

### Deploying your app

## How to Avoid Disaster

### Unforeseen Consequences and Feedback Loops

## Get Writing!

## Questionnaire

1. Provide an example of where the bear classification model might work poorly in production, due to structural or style differences in the training data.
1. Where do text models currently have a major deficiency?
1. What are possible negative societal implications of text generation models?
1. In situations where a model might make mistakes, and those mistakes could be harmful, what is a good alternative to automating a process?
1. What kind of tabular data is deep learning particularly good at?
1. What's a key downside of directly using a deep learning model for recommendation systems?
1. What are the steps of the Drivetrain Approach?
1. How do the steps of the Drivetrain Approach map to a recommendation system?
1. Create an image recognition model using data you curate, and deploy it on the web.
1. What is `DataLoaders`?
1. What four things do we need to tell fastai to create `DataLoaders`?
1. What does the `splitter` parameter to `DataBlock` do?
1. How do we ensure a random split always gives the same validation set?
1. What letters are often used to signify the independent and dependent variables?
1. What's the difference between the crop, pad, and squish resize approaches? When might you choose one over the others?
1. What is data augmentation? Why is it needed?
1. What is the difference between `item_tfms` and `batch_tfms`?
1. What is a confusion matrix?
1. What does `export` save?
1. What is it called when we use a model for getting predictions, instead of training?
1. What are IPython widgets?
1. When might you want to use CPU for deployment? When might GPU be better?
1. What are the downsides of deploying your app to a server, instead of to a client (or edge) device such as a phone or PC?
1. What are three examples of problems that could occur when rolling out a bear warning system in practice?
1. What is "out-of-domain data"?
1. What is "domain shift"?
1. What are the three steps in the deployment process?

## Answers

1. Provide an example of where the bear classification model might work poorly in production, due to structural or style differences in the training data.

> If the pictures of the bear you are trying to classify is facing away from the camera, and the images in the training data are only of bears front on.*

2. Where do text models currently have a major deficiency?

> Conversation. Better at classification or translation.

3. What are possible negative societal implications of text generation models?

> Toxic languge reproduced. Disinformation reproduced. Generated text relied on by humans, as if created by humans.

4. In situations where a model might make mistakes, and those mistakes could be harmful, what is a good alternative to automating a process?

> Augmenting the process, with human review.

5. What kind of tabular data is deep learning particularly good at?

> High cardinality variables - those which have lots and lots of discrete levels, i.e. postcodes or product codes.

6. What's a key downside of directly using a deep learning model for recommendation systems?

> Example of prediction <> recommendation issue: if I buy a book by one author, I'm probably already aware of that author and other titles. So showing other titles is not necessarily recommending something I might be interested in buying that I wouldn't have come to myself, it is predicting what I will buy next.

7. What are the steps of the Drivetrain Approach?

> See 'The Drivetrain Approach' - a process for building data products:
* Firstly, Define objective - e.g. maximise 5 year profit
* Secondly, Levers or inputs to control - e.g. price to set
* Thirdly, What data can we collect - e.g. data which can tell you as you change you levers, how you impact your objective
* Finally, How levers influence the objective - e.g. to determine how the levers influence the objective, we build models.

8. How do the steps of the Drivetrain Approach map to a recommendation system?

> Firstly, **objective** is to drive sales. Secondly, **levers** are rankings of products for recommendation to a user. Thirdly, **data** is generated from randomized experiments that test a wide range of recommendations for a wide range of customers. Then data is used on historic purchases by people similar the the user. Finally, produce **models** that predict the purchase probability for products based on whether the customers are shown the recommendation or not. 

9. **Create an image recognition model using data you curate, and deploy it on the web.**

> NB: Use Binder -> A web service that converts the notebook documents in a specified repository into a web application. It creates sharable notebooks that can be accessed by anyone with a single click. It also runs the notebooks on its own virtual machine that stores all the files that are needed to run in the cloud.

10. What is `DataLoaders`?

> `Dataloaders` provides the data for your model. `Dataloaders` is a fastai class that stores multiple DataLoader objects you pass to it, normally a train and a valid.

11. What four things do we need to tell fastai to create `DataLoaders`?

> 1. The data we need -> independent and dependent variables
> 2. How to get the list of items
> 3. How to label these items
> 4. How to create the validation set

> Data Block: A class that stores all the preprocessing steps to prepare the dataset for the model. It sets the blocks parameter to two or more of the TransformBlock classes to specify the input and output data types. It sets the `get_items` parameter to a function to specify how to get the data. It sets the splitter parameter to a split function to specify how to split the training and validation sets. It sets the `get_y` parameter to a function to specify how to get the label values. It also returns the `DataBlock` object.

> The DataBlocks class needs the blocks, get_items, splitter, and get_y parameters to be specified to create the DataLoaders object.

12. What does the `splitter` parameter to `DataBlock` do?

> `splitter` setermines how to split the data into train and validtion sets

13. How do we ensure a random split always gives the same validation set?

> Fix the random seed, using `seed = 42`

14. What letters are often used to signify the independent and dependent variables?

> The independent variable is often referred to as x and the dependent variable is often referred to as y.

15. What's the difference between the crop, pad, and squish resize approaches? When might you choose one over the others?

> Crop: A technique that saves a portion of the image that fits in a square shape of the specified image size. It helps improve the performance of the model by adding images to the training set where the object isn’t fully visible. It can also lose important details in the image that are cropped out.

> Pad: A technique that resizes the image to the specified image size while preserving the aspect ratio. It helps create the square shape that the model expects by adding black pixels to the shortest sides of the image. It can also create blank spaces and lower the resolution of the useful part of the image.

> Squish: A technique that squeezes or stretches the image to the specified image size without preserving the aspect ratio. It helps resize the image to the square shape that the model expects. It can also cause unrealistic proportions in the image that confuses the model and lowers the accuracy.

> fast.ai suggest randomly cropping different areas of an image to help the model learn to focus on objects in different sizes and on different locations in the image. This approach can also help present the images in a way that reflects the real world where the same object is framed differently in different images.

16. What is data augmentation? Why is it needed?

> Data augmentation is a technique to create variations in the dataset by modifying versions of the images in the dataset. It can involve flipping, rotating, scaling, padding, cropping, moving, and resizing images.

> Data augmentation provides new and different examples, that prepares a model for a greater range of possibilities in the real world.

17. What is the difference between `item_tfms` and `batch_tfms`?

> Item Transforms (`item_Tfms`): A parameter that applies the specified `Transform` functions to the images in the dataset before separating them into mini-batches. It also performs the transformations on the CPU.

> Batch Transforms (`batch_Tfms`): A parameter that applies the specified `Transforms` functions to the mini-batches after resizing and separating them from the dataset. It also performs the transformations on the GPU.

18. What is a confusion matrix?

> A matrix that helps visualise the performance of a model, mapping True Positive, True Negative, False Positive, False Negative results.

19. What does `export` save?

> `export` saves a trained model so you can use the model to make predictions in production. `export` saves everything required to build the `Learner` object using the pickle protocol, which includes the architecture, weights, and biases, and definitions that specify how to create the DataLoaders object.

20. What is it called when we use a model for getting predictions, instead of training?

> Inference

21. What are IPython widgets?

> Interactive HTML widgets for Jupyter notebooks

22. When might you want to use CPU for deployment? When might GPU be better?

> CPU for price, when you only require inferences one at a time (i.e. not parallel processing). GPU when conducting multiple inferences at the same time. GPU can also be cost effective when enough volume to conduct inferences in btaches.

23. What are the downsides of deploying your app to a server, instead of to a client (or edge) device such as a phone or PC?

> requires users to have an internet connection to use the model.
> causes delays while the data transmitted to and from the server.
> requires protecting the sensitive data that’s uploaded by users.
> adds overhead for managing, scaling, and protecting the server.

24. What are three examples of problems that could occur when rolling out a bear warning system in practice?

> The textbook provides examples caused by out-of-domain data:
* detect bears correctly but take too long to be useful in practice.
* detect bears incorrectly and trigger false alarms.
* training and production data don’t match, so the system won't work.

25. What is "out-of-domain data"?

> Data that is significantly different in some respect from the training data.

26. What is "domain shift"?

> When **production** data changes over time such that production no longer reflects data used for **training**

27. What are the three steps in the deployment process?

> Manual process: Run model in parallel + human checks all predictions

> Limited scope deployment: Careful human supervision + time or geography limited

> Gradual expansion: Good reporting systems required + consider what could go wrong

### Further Research

1. Consider how the Drivetrain Approach maps to a project or problem you're interested in.
1. When might it be best to avoid certain types of data augmentation?
1. For a project you're interested in applying deep learning to, consider the thought experiment "What would happen if it went really, really well?"
1. Start a blog, and write your first blog post. For instance, write about what you think deep learning might be useful for in a domain you're interested in.