In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
#export
from exp.nb_03Magda import *
from exp.nb_04Magda_corrected import *
from exp.nb_05bMagda import *
from exp.nb_06Magda import *

In [3]:
train_x, train_y, valid_x, valid_y, test_x, test_y = get_mnist()

In [4]:
train_x, valid_x, test_x = norm_all(train_x, valid_x, test_x)

In [5]:
mnist_bunch = DataBunch(train_x, train_y, valid_x, valid_y, 5000)

## Manual monitoring

In [None]:
class SequentialModel(nn.Module):
    def __init__(self, layers):
        super(SequentialModel, self).__init__()
        self.layers = nn.ModuleList(layers)
        self.means = [[] for _ in layers]
        self.stds = [[] for _ in layers]
    
    def forward(self, x):
        for lidx, layer in enumerate(self.layers):
            x = layer(x)
            self.means[lidx].append(x.mean().item())
            self.stds[lidx].append(x.std().item())
        return x


In [None]:
class LearnerSequential(Learner):
    def __init__(self, data_bunch, callback_list=[], layers=[]):
        self.layers = layers
        super(LearnerSequential, self).__init__(data_bunch, callback_list)
    
    def _get_model(self, **kwargs):
        self.model = SequentialModel(self.layers)

In [None]:
callback_list = [CudaCallback(torch.device('cuda')), ReshapeMnist(), LossCallback(), AccuracyCallback(), ParamScheduler('lr', 'fixed_sched', 0.5)]
layer_list = [nn.Conv2d(1, 8, 5, padding=2, stride=2), nn.ReLU(),
            nn.Conv2d(8, 16, 3, padding=1, stride=2), nn.ReLU(),
            nn.Conv2d(16, 32, 3, padding=1, stride=2), nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1, stride=2), nn.ReLU(),
            nn.AdaptiveAvgPool2d(1),
            Flatten(),
            nn.Linear(32, 10)
]
learner_seq = LearnerSequential(mnist_bunch, callback_list, layer_list)

In [None]:
learner_seq.fit(10)

In [None]:
for lidx, l in enumerate(learner_seq.model.means):
    plt.plot(l, label=lidx)
plt.legend()

In [None]:
for lidx, l in enumerate(learner_seq.model.stds):
    plt.plot(l, label=lidx)
plt.legend()

## pytorch hooks

In [None]:
from functools import partial
class LearnerCnn(Learner):
    def _get_model(self, num_out=10):
        self.model = nn.Sequential(
            nn.Conv2d(1, 8, 5, padding=2, stride=2), nn.ReLU(),
            nn.Conv2d(8, 16, 3, padding=1, stride=2), nn.ReLU(),
            nn.Conv2d(16, 32, 3, padding=1, stride=2), nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1, stride=2), nn.ReLU(),
            nn.AdaptiveAvgPool2d(1),
            Flatten(),
            nn.Linear(32, num_out)
        )
        self.means = [[] for _ in self.model]
        self.stds = [[] for _ in self.model]
        for lidx, m in enumerate(self.model):
            m.register_forward_hook(partial(self.append_stats, lidx))
            
    def append_stats(self, idx, mod, ins, outs):
        self.means[idx].append(outs.mean().item())
        self.stds[idx].append(outs.std().item())

In [None]:
device = torch.device('cuda')
cb_list = [CudaCallback(device), ReshapeMnist(), LossCallback(), AccuracyCallback()]
# cb_list.append(CombineScheduler('lr', [0.3, 0.7], ['cosine_sched', 'cosine_sched'], 0.01, 0.5, 0.01))
learner_cnn = LearnerCnn(mnist_bunch, cb_list)

In [None]:
learner_cnn.fit(10)

In [None]:
for m in learner_cnn.means:
    plt.plot(m)

In [None]:
for m in learner_cnn.stds:
    plt.plot(m)

## Inspect weights and gradients

In [None]:
#export
class WeightsMonitoringCallback(Callback):
    def fit_begin(self):
        self.learner.metrics['weights_mean'] = [[] for _ in self.learner.model.children()]
        self.learner.metrics['weights_std'] = [[] for _ in self.learner.model.children()]
        self.learner.metrics['weights_grad_mean'] = [[] for _ in self.learner.model.children()]
        self.learner.metrics['weights__grad_std'] = [[] for _ in self.learner.model.children()]
        
    def before_optim_step(self):
        for lidx, layer in enumerate(self.learner.model.children()):
            try:
                weight = layer.weight.detach()
                self.learner.metrics['weights_mean'][lidx].append(weight.mean().item())
                self.learner.metrics['weights_std'][lidx].append(weight.std().item())
                self.learner.metrics['weights_grad_mean'][lidx].append(weight.grad.mean().item())
                self.learner.metrics['weights__grad_std'][lidx].append(weight.grad.std().item())
            except AttributeError:
                pass

In [None]:
device = torch.device('cuda')
cb_list = [CudaCallback(device), ReshapeMnist(), LossCallback(), AccuracyCallback(), WeightsMonitoringCallback()]
# cb_list.append(CombineScheduler('lr', [0.3, 0.7], ['cosine_sched', 'cosine_sched'], 0.01, 0.5, 0.01))
learner_cnn = LearnerCnn(mnist_bunch, cb_list)

In [None]:
learner_cnn.fit(5)

In [None]:
for weights in learner_cnn.metrics['weights_mean']:
    plt.plot(weights)

In [None]:
for weights in learner_cnn.metrics['weights_std']:
    plt.plot(weights)

## hook class

In [None]:
learner_cnn = LearnerCnn(mnist_bunch, cb_list)

In [6]:
#export
class Hook():
    def __init__(self, layer, hook_func):
        self.hook = layer.register_forward_hook(hook_func)
    
    def __del__(self):
        self.remove()
        
    def remove(self):
        self.hook.remove()
        
    @staticmethod
    def forward_stats(self, inputs, outputs):
        if not hasattr(self, 'output_stats'):
            self.forward_stats = {'means': [], 'stds': []}
        self.output_stats['means'].append(outputs.mean().item())
        self.output_stats['stds'].append(outputs.std().item())   

In [None]:
hooks = [Hook(x, Hook.forward_stats) for x in learner_cnn.model.children()]

In [None]:
hooks

In [None]:
learner_cnn.fit(10)

In [None]:
for lidx, layer in enumerate(learner_cnn.model.children()):
    plt.plot(layer.forward_stats['means'], label=lidx)
    plt.legend()

### hooks callback

In [7]:
#export
class HooksCallback(Callback):
    def fit_begin(self):
        self.hooks = [Hook(x, Hook.forward_stats) for x in self.learner.model.children()]
    
    def fit_end(self):
        for hook in self.hooks:
            hook.remove()

In [9]:
device = torch.device('cuda')
cb_list = [CudaCallback(device), ReshapeMnist(), LossCallback(), AccuracyCallback()]
cb_list.append(HooksCallback())
learner_cnn = LearnerCnn(mnist_bunch, cb_list)

In [10]:
learner_cnn.fit(5)

train:  2.294724702835083 validation:  2.294064521789551
train:  2.262110471725464 validation:  2.2482378482818604
train:  2.2525384426116943 validation:  2.226407527923584
train:  2.3146629333496094 validation:  2.3179354667663574
train:  2.3042073249816895 validation:  2.3037664890289307
Final accuracy: 0.106400
<torch.utils.hooks.RemovableHandle object at 0x7f9013db07d0> removing
<torch.utils.hooks.RemovableHandle object at 0x7f9013db0810> removing
<torch.utils.hooks.RemovableHandle object at 0x7f9013d75a10> removing
<torch.utils.hooks.RemovableHandle object at 0x7f9013d75a90> removing
<torch.utils.hooks.RemovableHandle object at 0x7f9013d75b50> removing
<torch.utils.hooks.RemovableHandle object at 0x7f9013d75c50> removing
<torch.utils.hooks.RemovableHandle object at 0x7f9013d75d10> removing
<torch.utils.hooks.RemovableHandle object at 0x7f9013d75dd0> removing
<torch.utils.hooks.RemovableHandle object at 0x7f9013d75e90> removing
<torch.utils.hooks.RemovableHandle object at 0x7f9013d

In [12]:
for layer in learner_cnn.model.children():
    print(layer.forward_stats)
    break

{'means': [0.03696437180042267, 0.037063874304294586, 0.03648197650909424, 0.0361950546503067, 0.037466954439878464, 0.03745844215154648, 0.038411397486925125, 0.03806674852967262, 0.038821298629045486, 0.03871595114469528, 0.03770044445991516, 0.03909255191683769, 0.0406080037355423, 0.040049873292446136, 0.04147544503211975, 0.0454460047185421, 0.04423719644546509, 0.04502612352371216, 0.046562690287828445, 0.04781987518072128, 0.052782557904720306, 0.050199802964925766, 0.053444940596818924, 0.056484200060367584, 0.06119702011346817, 0.06612163782119751, 0.07296962291002274, 0.07601623237133026, 0.08375708758831024, 0.09531799703836441, 0.05292227119207382, 0.05444929003715515, 0.05246994271874428, 0.0566687174141407, 0.06018519401550293, 0.06624256819486618, 0.0739092156291008, 0.07959616929292679, 0.09257527440786362, 0.11484327167272568, 0.10340968519449234, -0.169892355799675, -0.16700275242328644, -0.15564754605293274, -0.16684943437576294, -0.16607481241226196, -0.169820949435

In [13]:
#export
from IPython.display import display, Javascript
def nb_auto_export():
    display(Javascript("""{
const ip = IPython.notebook
if (ip) {
    ip.save_notebook()
    console.log('a')
    const s = `!python notebook2script.py ${ip.notebook_name}`
    if (ip.kernel) { ip.kernel.execute(s) }
}
}"""))

In [None]:
nb_auto_export()