# Optimizer tweaks

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from exp.nb_08 import *

## Imagenette data

In [3]:
path = datasets.untar_data(datasets.URLs.IMAGENETTE_160)

In [4]:
tfms = [make_rgb, ResizeFixed(128), to_byte_tensor, to_float_tensor]
bs = 128

il = ImageList.from_files(path, tfms=tfms)
sd = SplitData.split_by_func(il, partial(grandparent_splitter, valid_name='val'))
ll = label_by_func(sd, parent_labeler, proc_y=CategoryProcessor())
data = ll.to_databunch(bs, c_in=3, c_out=10, num_workers=4)

In [5]:
nfs = [32, 64, 128, 256]

In [6]:
cbfs = [partial(AvgStatsCallback, accuracy),
        CudaCallback,
        partial(BatchTransformXCallback, norm_imagenette)]

In [11]:
learn, run = get_learn_run(nfs, data, .4, conv_layer, cbs=cbfs)

In [12]:
run.fit(1, learn)

train: [1.6827741161102063, tensor(0.4204, device='cuda:0')]
valid: [1.45182470703125, tensor(0.5140, device='cuda:0')]


## Refining the optimizer

In [29]:
class Optimizer():
    def __init__(self, params, steppers, **defaults):
        self.param_groups = list(params)
        if not isinstance(self.param_groups[0], list):
            self.param_groups = [self.param_groups]
        self.hypers = [{**defaults} for _ in self.param_groups]
    def grad_params(self):
        return [(p, hyper) for pg, hyper in zip(self.param_groups, self.hypers)
               for p in pg if p.grad is not None]
    def zero_grad(self):
        for p, _ in self.grad_params():
            p.grad.detach_()
            p.grad.zero_()
            
    def step(self):
        for p, hyper in self.grad_params():
            compose(p, self.steppers, **hyper)

In [13]:
list([[4, 5], [1, 3]])

[[4, 5], [1, 3]]

In [23]:
d = dict(h=10, p=90)

In [24]:
ds = [{**d} for _ in range(3)]

In [25]:
ds

[{'h': 10, 'p': 90}, {'h': 10, 'p': 90}, {'h': 10, 'p': 90}]

In [26]:
ds[0]

{'h': 10, 'p': 90}

In [27]:
ds[0]['p'] = 101

In [28]:
ds

[{'h': 10, 'p': 101}, {'h': 10, 'p': 90}, {'h': 10, 'p': 90}]