# Imports

In [24]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [25]:
import json
from glob import glob
from pathlib import Path
from PIL import Image as Image_PIL
import imghdr # built in module
import numpy as np
from fastai import *
import fastai
from fastai.vision import *
from fastai.metrics import error_rate, accuracy
import pandas as pd

In [26]:
!which python

/Users/maurice/anaconda3/envs/fastai/bin/python


In [27]:
print(fastai.__version__)

1.0.59


# Paths

In [28]:
# Data
path_gdrive = Path.home() / 'Google Drive/mushrooms'
data_folder = path_gdrive / 'data'


# Project
project_path = Path.home() / 'repos_github/mushroom-identifier/'
app_path = project_path / 'app'
model_path = app_path / 'models'

## Load Mushroom Info

In [43]:
df_mushrooms = pd.read_excel(project_path / 'data/mushrooms.xlsx', index_col=0)
df_mushrooms.set_index('Index', inplace=True, drop=True)
df_mushrooms.head()

Unnamed: 0_level_0,description,edibility,insta_tag,latin,name,other_names,poisonous,wikipedia
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Amanita_Caesarea,,Edible,ntsamc_cat,Amanita Caesarea,Ou de reig,,No. Edible.,http://www.fichasmicologicas.com/index.php?id=...
Amanita_Citrina,-,Edible,ntsaci_cat,Amanita Citrina,Reig bord groc,,No. Edible.,http://www.fichasmicologicas.com/?micos=1&s=ci...
Amanita_Muscaria,,Not edible,ntsam_cat,Amanita Muscaria,"Reig de Fageda, Reig Bord",,No. Not Edible.,http://www.fichasmicologicas.com/?micos=1&s=mu...
Amanita_Pahlloides,,Not edible,ntsap_cat,Amanita Pahlloides,Farinera borda,,Yes.,http://www.fichasmicologicas.com/index.php?id=...
Amanita_Pantherina,-,Not edible,,Amanita Pantherina,----,,Yes.,http://www.fichasmicologicas.com/?micos=1&s=pa...


In [44]:
df_mushrooms.columns

Index(['description', 'edibility', 'insta_tag', 'latin', 'name', 'other_names',
       'poisonous', 'wikipedia'],
      dtype='object')

In [45]:
df_mushrooms[['poisonous','latin','name']].to_json(app_path/'static/mushroom_info.json', orient='index')

# Train the model

## generate databunch

Add some data augmentation with `get_transforms`:

In [32]:
tfms = get_transforms(
    flip_vert=True,
    max_lighting=0.1,
    max_zoom=1.05,
    max_warp=0.5,
)

Define batch size, image size:

In [33]:
bs = 32 # 64       # batch_size
size = 128
np.random.seed(42) # set random seed so we always get the same validation set

Create an `ImageDataBunch` from `path` by splitting the data in folder and labelled in a file `csv_labels` between a training and validation set.

Use `valid_pct` to indicate the percentage of the total images to use as the validation set. An optional test folder contains unlabelled data and suffix contains an optional suffix to add to the filenames in `csv_labels` (such as '.jpg').
* `fn_col` is the index (or the name) of the the column containing the filenames and 
* `label_col` is the index (indices) (or the name(s)) of the column(s) containing the labels.
* Use `header` to specify the format of the csv header, and
* `delimiter` to specify a non-standard csv-field separator.

In case your csv has no header, column parameters can only be specified as indices. If `label_delim` is passed, split what's in the label column according to that separator.

In [None]:
# doc(ImageDataBunch)

In [34]:
src = (ImageList.from_folder(path=data_folder)
       .split_by_rand_pct(0.2)
       .label_from_folder())

## Train resnet34; image size=128 

In [35]:
img_data = (src.transform(tfms, size=128)
            .databunch()
            .normalize(imagenet_stats))

First image of the training dataset:

In [None]:
img_data.train_ds[0]

First image of the validation dataset:

In [None]:
img_data.valid_ds[0]

Show some images from the dataset:

In [None]:
img_data.show_batch(rows=3, figsize=(10,8))

Number of classes in the dataset

In [36]:
# Number of categories
img_data.c

43

In [37]:
# Names of the categories
img_data.classes

['Amanita_Caesarea',
 'Amanita_Citrina',
 'Amanita_Muscaria',
 'Amanita_Pahlloides',
 'Amanita_Pantherina',
 'Amanita_Vaginata',
 'Boletus_Aereus',
 'Boletus_Calopus',
 'Boletus_Edulis',
 'Boletus_Lupinus',
 'Boletus_Regius',
 'Calocybe_Gambosa',
 'Cantharellus_Cibarius',
 'Cantharellus_Lutescens',
 'Chroogomphus_Rutilus',
 'Clathus_Ruber',
 'Clavulina_Cinerea',
 'Clitocybe_Costata',
 'Craterellus_Cinereus',
 'Craterellus_Cornucopioides',
 'Entoloma_Lividum',
 'Gyromitra_Esculenta',
 'Helvella_Crispa',
 'Helvella_Lacunosa',
 'Hydnum_Albidum',
 'Hydnum_Rufescens',
 'Hygrophorus_Eburneus',
 'Hygrophorus_Latitabundus',
 'Hygrophorus_Marzuolus',
 'Hygrophorus_Personii',
 'Hygrophorus_Russula',
 'Lactarius_Deliciosus',
 'Lactarius_Sanguifluus',
 'Lactarius_Vinosus',
 'Macrolepiota_Procera',
 'Marasmius_Oreades',
 'Morchella_Deliciosa',
 'Omphalotus_Olearius',
 'Phallus_Impudicus',
 'Rubroboletus_Satanas',
 'Russula_Aurea',
 'Russula_Cyanoxantha',
 'Russula_Delica']

Set a learner with resnet34 architecture

In [None]:
# model = cnn_learner(img_data, models.resnet34, metrics=[accuracy, error_rate])
# model = cnn_learner(img_data, models.resnet34, metrics=[error_rate])

# Used in multi label classifier:
# acc_02 = partial(accuracy_thresh, thresh=0.2)
# f_score = partial(fbeta, thresh=0.2)
# model = cnn_learner(img_data, models.resnet34, metrics=[acc_02, f_score])

model = cnn_learner(img_data, models.resnet34, metrics=[accuracy, error_rate])

In [None]:
model.fit_one_cycle(1)

In [None]:
model.save(path_gdrive / 'mushrooms_1_cycle')

In [None]:
model.fit_one_cycle(2)

In [None]:
model.save('mushrooms_3_cycles')

In [None]:
interpret = ClassificationInterpretation.from_learner(model)

In [None]:
interpret.plot_top_losses(4, figsize=(20, 25))

In [None]:
interpret.plot_confusion_matrix(figsize=(20,20), dpi=60)

In [None]:
model.lr_find()

In [None]:
model.recorder.plot()

In [None]:
model.unfreeze()
model.fit_one_cycle(3, max_lr=slice(1e-03, 5e-02))

In [None]:
model.save('stage-1-128-rn34_lr_6_cycles')

In [None]:
model.export('export_stage-1-128-rn34_lr_6_cycles.pkl')

In [None]:
model.fit_one_cycle(5, max_lr=slice(1e-03, 1e-02))

## Train resnet34; image size=256

Do some data augmentation by creating new databunch with images of size=256

In [None]:
img_data = (src.transform(tfms, size=256)
            .databunch()
            .normalize(imagenet_stats))

In [None]:
model.data = img_data

In [None]:
model.freeze()
model.lr_find()
model.recorder.plot()

In [None]:
lr = 1e-3/2
model.fit_one_cycle(2, slice(lr))

In [None]:
model.fit_one_cycle(3, slice(lr))

In [None]:
model.save('stage-2-128-rn34_lr_5_cycles')

In [None]:
model.load('stage-2-128-rn34_lr_5_cycles')

In [None]:
model.export('export_stage-2-128-rn34_lr_5_cycles.pkl')

# Test model with other images:

In [6]:
# image = 'rovello.jpg'
#image = 'phallus.jpg'
image = 'fredolic.jpg'
image = 'aguacate.jpg'

In [7]:
predictor = load_learner(model_path)

In [8]:
image_path = Path.home() / image

In [9]:
img = open_image(image_path)

In [10]:
result = predictor.predict(img)

In [19]:
type(result)

tuple

In [11]:
mushroom_predicted = str(result[0]).replace('Category ', '')
mushroom_predicted

'Boletus_Regius'

In [12]:
result[1]

tensor(10)

In [13]:
probability = max(result[2]).item()
probability

0.3197043836116791

In [14]:
result[2]

tensor([3.8417e-03, 8.5018e-05, 5.1163e-05, 2.7157e-01, 2.5229e-06, 3.6156e-05,
        1.6173e-04, 2.1466e-01, 3.1631e-04, 1.3586e-04, 3.1970e-01, 8.1715e-05,
        1.3885e-04, 1.6182e-03, 1.8477e-04, 3.6631e-05, 4.1688e-06, 9.7293e-06,
        2.8626e-04, 3.0337e-04, 2.7231e-05, 2.1254e-04, 4.2989e-06, 1.8701e-04,
        3.7796e-06, 9.6503e-07, 4.1749e-05, 5.5567e-05, 2.2808e-05, 1.6806e-04,
        1.5257e-05, 4.3757e-07, 1.8432e-06, 8.5881e-06, 6.3273e-06, 3.4058e-05,
        1.0502e-04, 1.1173e-04, 1.4493e-04, 5.7671e-04, 1.8461e-01, 4.1601e-04,
        2.0296e-05])

In [15]:
max(result[2])

tensor(0.3197)

Generate text to be shown in web app

In [16]:
with open(app_path/'static/mushroom_info.json', 'r') as fp:
    mushroom_info = json.load(fp)
mushroom_info

{'Amanita_Caesarea': {'poisonous': 'Not poisonous',
  'latin': 'Amanita Caesarea',
  'name': 'Ou de reig'},
 'Amanita_Citrina': {'poisonous': 'Not poisonous',
  'latin': 'Amanita Citrina',
  'name': 'Reig bord groc'},
 'Amanita_Muscaria': {'poisonous': 'Not Poisonous',
  'latin': 'Amanita Muscaria',
  'name': 'reig de fageda / reig bord'},
 'Amanita_Pahlloides': {'poisonous': 'Poisonous',
  'latin': 'Amanita Pahlloides',
  'name': 'Farinera borda'},
 'Amanita_Pantherina': {'poisonous': 'Poisonous',
  'latin': 'Amanita Pantherina',
  'name': '----'},
 'Amanita_Vaginata': {'poisonous': 'Not poisonous',
  'latin': 'Amanita Vaginata',
  'name': '----'},
 'Boletus_Aereus': {'poisonous': 'Not poisonous',
  'latin': 'Boletus Aereus',
  'name': '----'},
 'Boletus_Calopus': {'poisonous': 'Not Poisonous',
  'latin': 'Boletus Calopus',
  'name': 'Mataparent amarg'},
 'Boletus_Edulis': {'poisonous': 'Not poisonous',
  'latin': 'Boletus Edulis',
  'name': 'Cep'},
 'Boletus_Lupinus': {'poisonous': '

In [17]:
latin = mushroom_info[mushroom_predicted]['latin']
poisonous = mushroom_info[mushroom_predicted]['poisonous']
other_names = mushroom_info[mushroom_predicted]['name']
text = (f'Mushroom: {latin} (probability = {100*probability:.2f}%)\n'
        f'Other names: {other_names}\n'
        f'Poisonous: {poisonous}')
print(text)

Mushroom: Boletus Regius (probability = 31.97%)
Other names: ----
Poisonous: Not poisonous


In [39]:
text = (f'Mushroom: {latin} (probability = {100*probability:.2f}%)\n'
            f'Other names: {other_names}\n'
            f'Poisonous: {poisonous}')
text

'Mushroom: Boletus Regius (probability = 31.97%)\nOther names: ----\nPoisonous: Not poisonous'

# Deploy Web Page with Elastic Beanstalk

Tutorial Client: https://docs.aws.amazon.com/elasticbeanstalk/latest/dg/eb-cli3.html

Titorial Bundle: https://docs.aws.amazon.com/elasticbeanstalk/latest/dg/applications-sourcebundle.html

# References

* [Fastai multi label](https://gilberttanner.com/blog/fastai-multi-label-image-classification)
* [Google images scrapping](https://medium.com/@intprogrammer/how-to-scrape-google-for-images-to-train-your-machine-learning-classifiers-on-565076972ce)
* [Basic mushroom info](https://bolets.info/)
* [Mushroom names in Latin and additional info](https://ca.wikipedia.org/)
* [Instagram @natros56](https://www.instagram.com/explore/tags/indexboletsnatros56_cat/)




# Not Used