# Seedlings Classifier

## Import libraries and schtuff

In [None]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# This file contains all the main external libs we'll use
from fastai.imports import *

In [None]:
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

In [None]:
import os
import random

## Designate the path to data and assign image size/batch size

In [None]:
PATH = "data/seedlings/"
sz=112
bs=64

## Randomly create a validation set from training set

In [None]:
# if not os.path.exists(f'{PATH}valid'):
#     os.makedirs(f'{PATH}valid')

# train_path=f'{PATH}train/'
# dirlist = [ item for item in os.listdir(train_path) if os.path.isdir(os.path.join(train_path, item)) ]

# for dir_index in range(len(dirlist)):
#     if not os.path.exists(f'{PATH}valid/{dirlist[dir_index]}'):
#         os.makedirs(f'{PATH}valid/{dirlist[dir_index]}')
        
#     src_path = f'{PATH}train/{dirlist[dir_index]}/'
#     dst_path = f'{PATH}valid/{dirlist[dir_index]}/'
    
#     list_src = os.listdir(src_path)
#     number_files = len(list_src)
#     number_to_extract = int(.3 * number_files)
    
#     for file_index in range(number_to_extract):
#         random_filename = random.choice([
#             x for x in os.listdir(src_path)
#             if os.path.isfile(os.path.join(src_path, x))
#         ])
#         shutil.move(src_path + str(random_filename), dst_path + str(random_filename))
    

## Check cuda cores are configured and working properly

In [None]:
torch.cuda.is_available()

In [None]:
torch.backends.cudnn.enabled

## 1. Explore Data

In [None]:
os.listdir(PATH)

In [None]:
os.listdir(f'{PATH}valid')

In [None]:
files = os.listdir(f'{PATH}valid/maize')[:5]
files

In [None]:
img = plt.imread(f'{PATH}valid/maize/{files[0]}')
plt.imshow(img);

In [None]:
img.shape

In [None]:
img[:4,:4]

# 2.  Initial Model

In [None]:
arch=resnet34

In [None]:
def get_data(sz,bs):
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
    data = ImageClassifierData.from_paths(PATH, tfms=tfms_from_model(arch, sz), trn_name = 'train', 
                                      val_name = 'valid', test_name = 'test', num_workers=4)
    
    return data if sz>300 else data.resize(340, 'tmp')

In [None]:
data = get_data(sz,bs)

In [None]:
learn = ConvLearner.pretrained(arch, data, precompute=True)

In [None]:
learn.fit(0.1, 3)

## 3. Choosing a learning rate

In [None]:
learn = ConvLearner.pretrained(arch, data, precompute=False)

In [None]:
lrf=learn.lr_find()

In [None]:
learn.sched.plot_lr()

In [None]:
learn.sched.plot()

## Improving the model
### Data Augementation - Set precompute to False so that data augmentation can take effect

In [None]:
learn.precompute = False

In [None]:
learn.fit(.1, 2, cycle_len=1)

In [None]:
learn.sched.plot_lr()

In [None]:
learn.save('224_lastlayer')

In [None]:
learn.load('224_lastlayer')

### Increase Size - Increase the size of images in the data set

In [None]:
learn.set_data(get_data(299,bs))

In [None]:
learn.fit(.1, 2, cycle_len=1)

### Fine Tuning - Unfreeze and retrain layers

In [None]:
learn.unfreeze()

In [None]:
lr=np.array([.001,.01,.1])

In [None]:
learn.fit(.1, 3, cycle_len=1, cycle_mult=2)

In [None]:
learn.save('224_all')

In [None]:
learn.load('224_all')

### TTA - Test Time Augmentation

In [None]:
log_preds,y = learn.TTA()
probs = np.mean(np.exp(log_preds),0)

In [None]:
accuracy_np(probs, y)

## Analyzing results

In [None]:
# preds = np.argmax(probs, axis=1)
# probs = np.exp(log_preds[:,1])
#print(probs)

print(preds.shape())

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y, preds)

In [None]:
plot_confusion_matrix(cm, data.classes)

# **Data Visualization**

In [None]:
log_preds = learn.predict()
log_preds.shape

In [None]:
log_preds[:10]

In [None]:
preds = np.argmax(log_preds, axis=1)  # from log probabilities to 0 or 11
probs = np.exp(log_preds[:,11])        # pr(Sugar beet)

In [None]:
def rand_by_mask(mask): return np.random.choice(np.where(mask)[0], 4, replace=False)
def rand_by_correct(is_correct): return rand_by_mask((preds == data.val_y)==is_correct)

In [None]:
def plot_val_with_title(idxs, title):
    imgs = np.stack([data.val_ds[x][0] for x in idxs])
    title_probs = [probs[x] for x in idxs]
    print(title)
    return plots(data.val_ds.denorm(imgs), rows=1, titles=title_probs)

In [None]:
def plots(ims, figsize=(12,6), rows=1, titles=None):
    f = plt.figure(figsize=figsize)
    for i in range(len(ims)):
        sp = f.add_subplot(rows, len(ims)//rows, i+1)
        sp.axis('Off')
        if titles is not None: sp.set_title(titles[i], fontsize=16)
        plt.imshow(ims[i])

In [None]:
def load_img_id(ds, idx): return np.array(PIL.Image.open(PATH+ds.fnames[idx]))

def plot_val_with_title(idxs, title):
    imgs = [load_img_id(data.val_ds,x) for x in idxs]
    title_probs = [probs[x] for x in idxs]
    print(title)
    return plots(imgs, rows=1, titles=title_probs, figsize=(16,8))

In [None]:
def most_by_mask(mask, mult):
    idxs = np.where(mask)[0]
    return idxs[np.argsort(mult * probs[idxs])[:4]]

def most_by_correct(y, is_correct): 
    mult = -1 if (y==1)==is_correct else 1
    return most_by_mask(((preds == data.val_y)==is_correct) & (data.val_y == y), mult)

In [None]:
# This is the label for a val data
data.val_y

In [None]:
# from here we know that 'Black-grass' is labeled 0 and 'Sugar beat' is labeled 11.
data.classes

In [None]:
# 1. A few correct labels at random
plot_val_with_title(rand_by_correct(True), "Correctly classified")

In [None]:
# 2. A few incorrect labels at random
plot_val_with_title(rand_by_correct(False), "Incorrectly classified")

In [None]:
plot_val_with_title(most_by_correct(0, True), "Most correct Black-grass")

In [None]:
plot_val_with_title(most_by_correct(0, False), "Most incorrect Black-grass")

In [None]:
plot_val_with_title(most_by_correct(1, True), "Most correct Charlock")

In [None]:
plot_val_with_title(most_by_correct(1, False), "Most incorrect Charlock")

In [None]:
plot_val_with_title(most_by_correct(2, True), "Most correct Cleavers")

In [None]:
plot_val_with_title(most_by_correct(2, False), "Most incorrect Cleavers")

In [None]:
plot_val_with_title(most_by_correct(3, True), "Most correct Chickweed")

In [None]:
plot_val_with_title(most_by_correct(3, False), "Most incorrect Chickweed")

In [None]:
plot_val_with_title(most_by_correct(4, True), "Most correct common wheat")

In [None]:
plot_val_with_title(most_by_correct(4, False), "Most incorrect common wheat")

In [None]:
plot_val_with_title(most_by_correct(5, True), "Most correct Fat Hen")

In [None]:
plot_val_with_title(most_by_correct(5, False), "Most correct Fat Hen")

In [None]:
plot_val_with_title(most_by_correct(6, True), "Most correct loose silky-bent")

In [None]:
plot_val_with_title(most_by_correct(6, False), "Most incorrect loose silky-bent")

In [None]:
plot_val_with_title(most_by_correct(7, True), "Most correct Maize")

In [None]:
plot_val_with_title(most_by_correct(7, False), "Most incorrect Maize")

In [None]:
plot_val_with_title(most_by_correct(8, True), "Most correct scentless mayweed")

In [None]:
plot_val_with_title(most_by_correct(8, False), "Most incorrect scentless mayweed")

In [None]:
plot_val_with_title(most_by_correct(9, True), "Most correct shepherds purse")

In [None]:
plot_val_with_title(most_by_correct(9, False), "Most incorrect shepherds purse")

In [None]:
plot_val_with_title(most_by_correct(10, True), "Most correct small flowered cranesbill")

In [None]:
plot_val_with_title(most_by_correct(10, False), "Most incorrect small flowered cranesbill")

In [None]:
plot_val_with_title(most_by_correct(11, True), "Most correct sugar beat")

In [None]:
plot_val_with_title(most_by_correct(11, False), "Most incorrect sugar beat")