In [None]:
!pip install -Uqq fastbook 
import fastbook 
fastbook.setup_book()

In [None]:
from fastai.vision.all import * 
from fastbook import * 

[Further Research](https://github.com/fastai/fastbook/blob/master/04_mnist_basics.ipynb)

1. Create your own implementation of `Learner` from scratch, based on the 

In [None]:
from fastai.vision.all import *
from fastbook import * 


def mnist_loss(predictions, targets):
    predictions = predictions.sigmoid()
    return torch.where(targets == 1, 1 - predictions, predictions).mean()


def build_dset(path_three, path_seven):
    threes = _build_tensor_from_image(path_three)
    sevens = _build_tensor_from_image(path_seven)
    return _build_dset(threes, sevens)


def _build_dset(threes, sevens):
    x = torch.cat([threes, sevens])
    x = x.view(-1, 28 * 28)
    y = tensor([1] * len(threes) + [0] * len(sevens))
    y = y.unsqueeze(1)
    return list(zip(x, y))
    
    
def _build_tensor_from_image(path):
    x = path.ls().sorted()
    x = [tensor(Image.open(image_path)) for image_path in x]
    return torch.stack(x).float() / 255


def batch_accuracy(xb, yb):
    predictions = xb.sigmoid()
    correct = (predictions > 0.5) == yb
    return correct.float().mean()

    
class BasicOptim:
    def __init__(self, params, learning_rate):
        self.params = list(params)
        self.learning_rate = learning_rate
        
    def step(self, *args, **kwargs):
        for param in self.params:
            param.data -= param.grad.data * self.learning_rate
            
    def zero_grad(self, *args, **kwargs):
        for param in self.params:
            param.grad = None 
            
            
class CustomLearner:
    def __init__(self, dls, model, opt, loss_func, metrics=accuracy):
        self.train_dl = dls[0]
        self.valid_dl = dls[1]
        self.model = model 
        self.opt = opt
        self.loss_func = loss_func
        self.metrics = metrics
        
    def fit(self, n_epochs, learning_rate):
        self.train_model(n_epochs)
        
    def train_model(self, n_epochs):
        for _ in range(n_epochs):
            self.train_epoch()
            print(self.validate_epoch(), end="\n")
            
    def train_epoch(self):
        for xb, yb in self.train_dl:
            self.calc_grad(xb, yb)
            self.opt.step()
            self.opt.zero_grad()
            
    def calc_grad(self, xb, yb):
        predictions = self.model(xb)
        loss = self.loss_func(predictions, yb)
        loss.backward()
            
    def validate_epoch(self):
        accuracies = [self.metrics(self.model(xb), yb) for xb, yb in self.valid_dl]
        return round(torch.stack(accuracies).mean().item(), 4)

path = untar_data(URLs.MNIST_SAMPLE)
path.ls()

train_dset = build_dset(path/"train"/"3", path/"train"/"7")
valid_dset = build_dset(path/"valid"/"3", path/"valid"/"7")

train_dl = DataLoader(train_dset, batch_size=256)
valid_dl = DataLoader(valid_dset, batch_size=256)

dls = DataLoaders(train_dl, valid_dl)

linear_model = nn.Linear(28 * 28, 1)

opt = BasicOptim(linear_model.parameters(), 1.)

custom_learner = CustomLearner(
    dls, 
    linear_model, 
    opt=opt,
    loss_func=mnist_loss,
    metrics=batch_accuracy,
)

custom_learner.fit(5, 0.1)

2. Complete all the steps in this chapter using the full MNIST datasets (that is, for all digits, not just 3s and 7s).

In [None]:
from fastai.vision.all import *
from fastbook import *

# first we call `get_image_files_path`
# to get a list of paths to each images in a given dataset

# we convert each image from this lists to a tensor,
# the result is a list of tensors, where each tensor represents an image

# then, we stack all of those tensors togethers into a single tensor 
# this tensor represent the average of a given category,
# if the path was pointing to the `4` dataset,
# then, this tensor represents the average of all 4s in the dataset 

# finally, we each element within the tensor are converted to a float
# and divided by 255
# so that the intensity of grayness lies between 0 and 1
# and not between 0 and 255 anymore.

# the `view` methods with the -1 and 28 * 28 parameters are used
# to change the tensor's shape from `torch.Size([28, 28])` 
# to a vector whose shape is `torch.Size([28 * 28])`.

# build_x helps us build the dependent variable, `x`,
# i.e. the input image which we are trying to categorize.
def build_x(path):
    x = get_image_files(path).sorted()
    x = [tensor(Image.open(image_path)) for image_path in x]
    x = torch.stack(x)
    x = x.float() / 255
    return x.view(-1, 28 * 28)


# first we call `get_image_files_path`
# to get a list of paths to each images in a given dataset

# all of those image_paths are located in a given directory
# where the directory represents the category of the image
# so all 4s are located under the `4` directory.
# for all images in validation/testing data set, we get their label (by infering which directory they belong to)
# as a result, we have a long list of labels for each images

# then we conver this list of labels into a tensor

# each label corresponds to a given x. 
# this is why we needed to sorted the image path in x and y.
# we want to build a dataset with an input image, and its target label.

# `build_y` helps us build the independent variable,
# i.e. `y`
def build_y(path):
    y = get_image_files(path).sorted()
    y = [int(image_path.parent.name) for image_path in y]
    return tensor(y)

# `build_dl` zips the lists x and y
# it creates a list of tuples
# where the first element of the tuple is x, an input image,
# and the second element is y, the label of x.

# this dataset is then fed into a `DataLoader` with a batch size of 256
# I do not know why it needs to be shuffled ...
def build_dl(x, y):
    dset = list(zip(x, y))
    return DataLoader(dset, batch_size=256, shuffle=True)

# download the dataset from the dataset's url
# get a path to the dataset
path = untar_data(URLs.MNIST)
path.ls()

# build the x and y, (the input image and their labels)
# also create a `DataLoader` from the x and y
(train_x, train_y) = (build_x(path/"training"), build_y(path/"training"))
train_dl = build_dl(train_x, train_y)

# do the same thing as the previous step,
# but this time, with the validation data
(valid_x, valid_y) = (build_x(path/"testing"), build_y(path/"testing"))
valid_dl = build_dl(valid_x, valid_y)

# create a `DataLoaders` instance with `train_dl` and `valid_dl`
dls = DataLoaders(train_dl, valid_dl)

# use a simple sequential model 
simple_net = nn.Sequential(
    nn.Linear(28 * 28, 50),
    nn.ReLU(),
    nn.Linear(50, 10)
)

# configure the learner with the dataloaders;
# Stochastic Gradient Descent as a optimization function;
# since we have more than two categories to predict
# we will need to use `nn.CrossEntropyLoss` as a loss function
# instead of the binary encoding that we used previously with `mnist_loss`;
# accuracy will be used as our metrics
# the learner's default learning_rate will be 0.1
learner = Learner(
    dls, 
    simple_net, 
    opt_func=SGD, 
    loss_func=nn.CrossEntropyLoss(),
    metrics=accuracy,
    lr=0.1
)

# then we call the `fit` method passing the number of epochs (10) as a parameter
learner.fit(20)