In [8]:
from fastai.vision.all import *
from fastai.vision.data import DataBlock
path = untar_data(URLs.IMAGENETTE)

In [22]:
from fastai.vision.all import *

In [21]:
!pip install -Uqq fastai


In [3]:
!export PYTORCH_ENABLE_MPS_FALLBACK=1

In [9]:
import os
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"

In [7]:
!pytorch --version

/bin/bash: pytorch: command not found


In [11]:
dblock = DataBlock(
    blocks=(ImageBlock, CategoryBlock),
    get_items=get_image_files,
    get_y=parent_label,
    item_tfms=Resize(460),
    batch_tfms=aug_transforms(size=224,min_scale=0.75),
)

dls = dblock.dataloaders(path, bs=64)

### -- BASELINE --

In [7]:
from fastai.losses import CrossEntropyLossFlat


model = xresnet50(n_out=dls.c)
learn = Learner(
    dls, 
    model, 
    loss_func=CrossEntropyLossFlat(),
    metrics=accuracy
)

#### [X] -- I AM GPU POOR BRO --

In [8]:
# @audit-ok : 5 epoch is + 6 hours!  FIX THIS SOMEHOW!
"""
epoch     train_loss  valid_loss  accuracy  time    
0         1.636237    2.114159    0.454070  4:09:37                                           
1         1.252930    1.402056    0.558252  1:15:53                                          
2         0.959257    1.068307    0.677745  1:49:34                                          
3         0.732410    0.760982    0.759895  13:54:36                                           
4         0.587539    0.550948    0.821509  1:04:46      
"""
learn.fit_one_cycle(5, 3e-3)

epoch,train_loss,valid_loss,accuracy,time


KeyboardInterrupt: 

### -- NORMALIZATION --

In [39]:
# one_batch() method retrieves a single batch from the DataLoader
# returned batch consists of a tuple, where
# - (x) is a batch of input data
# - (y) is a batch of labels
x,y = dls.one_batch()

# Computing the mean and standard deviation of the input data (x)
# Specifying dim=[0,2,3], you're asking PyTorch to compute the mean and standard  
# deviation across : 
# - batch, height, and width dimensions
# - separately for each color channel
# This will result in a mean and standard deviation for each color channel of  
# the images in the batch.
x.mean(dim=[0,2,3]), x.std(dim=[0,2,3])

# (TensorImage([0.4669, 0.4457, 0.4136]), TensorImage([0.2971, 0.2916, 0.3091]))

# Explain this result ^
# MEAN VALUE : on average, 
# - the red channel has a value of 0.4669, 
# - the green channel a value of 0.4457, and 
# - the blue channel a value of 0.4136.
# STANDARD DEVIATION :
# - the red channel has a standard deviation of 0.2971,
# - the green channel a standard deviation of 0.2916, and
# - the blue channel a standard deviation of 0.3091.

# These statistics can give you some insight into the characteristics of your  
# image dataset. 
# For example, if the means are very different between channels, 
# - that might indicate that certain colors are more dominant in your images. 
# Similarly, a high standard deviation means 
# - that the values vary a lot from the mean, 
# - while a low standard deviation means that the values are generally close to 
# the mean

(TensorImage([-0.0755,  0.2838,  0.4788]),
 TensorImage([1.1152, 1.0721, 1.2373]))

#### [X] -- get_dls(batch_size, resolution) --

In [41]:

def get_dls(batch_size, resolution):
    dblock = DataBlock(
        blocks=(ImageBlock, CategoryBlock),
        get_items=get_image_files,
        get_y=parent_label,
        item_tfms=Resize(460),
        batch_tfms=[
            *aug_transforms(size=resolution, min_scale=0.75),
            Normalize.from_stats(*imagenet_stats)
        ]
    )

    return dblock.dataloaders(path, batch_size=batch_size)

dls = get_dls(64, 224)
x,y = dls.one_batch()
x.mean(dim=[0,2,3]), x.std(dim=[0,2,3])

# (TensorImage([-0.1231, -0.0449,  0.0777]), TensorImage([1.1290, 1.1470, 1.2299]))
# Explain what happened after normalization

# After normalization, you get (TensorImage([-0.1231, -0.0449,  0.0777]), 
# TensorImage([1.1290, 1.1470, 1.2299])). The goal of normalization is to 
# adjust the values of an array so they share a common scale, without 
# distorting differences in the ranges of values or losing information. 
# In machine learning, normalization is a common step as it can make training 
# less sensitive to the scale of features, so we can better solve for 
# coefficients.

# In this case, the normalization process has adjusted the mean and standard 
# deviation of your images. The new means are close to 0, and the new standard 
# deviations are close to 1. This is typically the goal of normalization in a 
# machine learning context: to shift the distribution of each feature to have a 
# mean of 0 and a standard deviation of 1. This helps to ensure that all 
# features have the same scale and the model does not become biased or overly 
# sensitive to features with larger scales.

(TensorImage([0.0573, 0.1165, 0.2405]), TensorImage([1.1963, 1.1895, 1.2938]))

In [42]:
model = xresnet50()
learn = Learner(
    dls,
    model,
    loss_func=CrossEntropyLossFlat(),
    metrics=accuracy
)


#### [X] -- GPU POOR AM CRY --

In [None]:
# @audit-ok : 5 epoch is + 6 hours!  FIX THIS SOMEHOW!
"""
epoch     train_loss  valid_loss  accuracy  time    
0         1.663105    2.947168    0.370052  2:50:35                                          
1         1.251406    1.662431    0.453697  1:11:42                                          
2         0.947334    0.836982    0.738984  58:35                                          
3         0.740129    0.654092    0.799104  1:01:49                                        
4         0.607023    0.568175    0.824496  56:57  
"""
learn.fit_one_cycle(5, 3e-3)

In [23]:
# Training at a much lower resolution than 224 x 224
dls = get_dls(12, 32) # @audit why batch size 128 instead of 64?

learn = Learner(
    dls,
    xresnet50(n_out=dls.c), # @audit ... explain?
    loss_func=CrossEntropyLossFlat(),
    metrics=accuracy
)

# @audit : Why are we only doing 4 instead of 5 epochs?
# - Is it because the smaller resolution is for more basic features?
# - And epoch 5 is for label correlated features?
learn.fit_one_cycle(4, 3e-3)

epoch     train_loss  valid_loss  accuracy  time    
0         2.192905    4.081844    0.252427  06:01                                          
1         1.530551    1.461747    0.522405  04:42                                          
2         1.210109    1.178366    0.629574  04:45                                          
3         1.161046    1.072473    0.663928  04:42                                          


In [46]:
def train_model(batch_size, resolution, epochs):
    """_summary_
    Encapsulates the training process for the model.

    Args:
        batch_size (_type_): _description_
        resolution (_type_): _description_
        epochs (_type_): _description_
    """
    dls = get_dls(batch_size, resolution)
    learn = Learner(
        dls,
        xresnet50(n_out=dls.c), # @audit: explain??
        loss_func=CrossEntropyLossFlat(),
        metrics=accuracy
    )

    learn.fit_one_cycle(epochs, 3e-3)

In [47]:
train_model(32, 64, 5)

epoch     train_loss  valid_loss  accuracy  time    
0         1.679859    2.173227    0.441748  18:23                                          
1         1.373671    1.227319    0.600821  1:50:47                                           
Epoch 3/5 : |██████████------------------------------| 26.65% [89/334 04:17<11:48 1.1863]

KeyboardInterrupt: 

In [24]:
learn.dls = get_dls(12, 64)

# fine tuning only the last epoch 5
# - effectively freezing the first 4 epochs
# - prevents forgetting the earlier base features
learn.fine_tune(5, 1e-3)

epoch     train_loss  valid_loss  accuracy  time    
0         1.098922    1.004947    0.684839  08:00                                          
epoch     train_loss  valid_loss  accuracy  time    
0         0.941069    0.900726    0.706497  07:56                                          
1         0.956869    0.816833    0.733010  07:10                                          
2         0.932111    0.747074    0.761016  06:25                                          
3         0.830758    0.703328    0.768484  06:14                                          
4         0.787315    0.680215    0.778566  06:15                                          


In [25]:
preds, targs = learn.get_preds()
accuracy(preds, targs).item()

                                                                                

0.7785661220550537

In [29]:
accuracy(preds, targs).item()

0.7785661220550537

In [30]:
preds_tta, targs_tta = learn.tta()
accuracy(preds_tta, targs_tta).item()

epoch     train_loss  valid_loss  accuracy  time    
Epoch 1/4 :                                                                                

0.7979835867881775

### -- MIXUP --

In [None]:
# Mixup manual code here

In [34]:
model = xresnet50(n_out=dls.c)
learn = Learner(
    dls,
    model,
    loss_func=CrossEntropyLossFlat(),
    metrics=accuracy,
    cbs=MixUp
)

learn.fit_one_cycle(5, 3e-3)

epoch     train_loss  valid_loss  accuracy  time    
0         2.305844    2.109548    0.328977  04:44                                          
1         1.787737    1.607176    0.491038  04:47                                          
2         1.668271    1.575980    0.522031  04:42                                          
3         1.525355    1.190362    0.623973  52:01                                           
4         1.440893    1.248785    0.616878  05:58                                          


### -- LABEL SMOOTHING --

In [36]:
model = xresnet50(n_out=dls.c)
learn = Learner(
    dls,
    model,
    loss_func=LabelSmoothingCrossEntropyFlat(),
    metrics=accuracy
)

learn.fit_one_cycle(5, 3e-3)

epoch     train_loss  valid_loss  accuracy  time    
0         2.143363    1.892292    0.442494  04:50                                          
1         1.658357    1.875266    0.495146  04:44                                          
2         1.560091    1.522520    0.582898  04:42                                          
3         1.394092    1.287970    0.679238  04:41                                          
4         1.335982    1.292297    0.692681  04:42                                          


### -- END OF CHAPTER QUIZ --

1 - What is the difference between ImageNet and Imagenette?  When is it better 
to experiment on one vs the other

- Imagenette is a fastai curated subset of ImageNet : 10 vs 1k categories
- Imagenette is useful for model engineering, and drafting out hypothesis.
ImageNet on the other hand is much larger, so is more suitable for Product 
Engineer, where ACCURACY in OUTPUT is prioritized over SPEED of ITERATION
- Fundamentally because Imagenette is smaller, it will not perform as well
as ImageNet on a broader range of tasks.

2 - What is Normalization?

- Normalization is standardizing input data to improve training effectiveness.
Training over a data collection with the SAME unit scale is easier than a data
collection with VARIED unit scale.  The model can focus on learning the features
of the data collection as opposed to learning the feature AND the varied scale
of the data collection.
- Typically this achieved by preprocessing the data set so that there's a mean
of 0 and a standard deviation of 1
    - mean == average essentially
        - we are trying to find the offset between the item value and the
        average of the full dataset (items_summed/num_of_items)
    - standard deviation
        - measure of how much each item value is varied around the mean
    - 6+5+4
        - mean = 5 (15/3)
        - deviation = 1

3 - Why didn't we have to care about normalization when using a pretrained 
model?

- We didn't have to care about normalization on previous pretrained models
because we were using fastai learner models.  And those models implicitly have
been pretrained on ImageNet with certain statistics in Normalize.  Fastai has
all info needed to auto normalize for us.
- However if we start from a pretrained data, we should normalize our data
to using the same statistics to match the pretrained model's data.  This ensures
consistency and performance.  Because then the model is learning the actual
features without getting poisoned by noise from mixing normalized and 
unnormalized values.

