In [0]:
from google.colab import drive
drive.mount('/content/gdrive')
import sys
sys.path.append('//content/gdrive/My Drive/Colab Notebooks/')

In [0]:
#export
from exp.nb_07 import *

In [0]:
x_train,y_train,x_valid,y_valid = get_data()

x_train,x_valid = normalize_to(x_train,x_valid)
train_ds,valid_ds = Dataset(x_train, y_train),Dataset(x_valid, y_valid)

nh,bs = 50,512
c = y_train.max().item()+1
loss_func = F.cross_entropy

data = DataBunch(*get_dls(train_ds, valid_ds, bs), c)

In [0]:
mnist_view = view_tfm(1,28,28)
cbfs = [Recorder,
        partial(AvgStatsCallback,accuracy),
        CudaCallback,
        partial(BatchTransformXCallback, mnist_view)]

In [0]:
nfs = [8,16,32,64,64]

In [0]:
class ConvLayer(nn.Module):
  def __init__(self,ni,nf,ks=3,stride=2,sub=0.,**kwargs):
    super().__init__()
    self.conv = nn.Conv2d(ni,nf,ks,padding=ks//2,stride=stride,bias=True)
    self.relu = GeneralRelu(sub=sub, **kwargs)
   
  def forward(self,x): return self.relu(self.conv(x))
  
  @property
  def bias(self): return -self.relu.sub
  @bias.setter
  def bias(self,v): self.relu.sub = -v
  @property
  def weight(self): return self.conv.weight

In [0]:
learn,run = get_learn_run(nfs,data,0.6,ConvLayer, cbs=cbfs)

In [8]:
run.fit(2,learn)

train: [1.7463371040894813, tensor(0.3955, device='cuda:0')]
valid: [0.52816435546875, tensor(0.8336, device='cuda:0')]
train: [0.36176760172106553, tensor(0.8899, device='cuda:0')]
valid: [0.18754342041015626, tensor(0.9429, device='cuda:0')]


In [0]:
learn,run = get_learn_run(nfs,data,0.6,ConvLayer, cbs=cbfs)

In [0]:
#export
def get_batch(dl,run):
  run.xb,run.yb=next(iter(dl))
  for cb in run.cbs: cb.set_runner(run)
  run('begin_batch')
  return run.xb,run.yb

In [0]:
xb,yb=get_batch(data.train_dl,run)

In [0]:
#export
def find_modules(m,cond):
  if cond(m): return [m]
  return sum([find_modules(o,cond) for o in m.children()],[])

def is_lin_layer(l):
  lin_layers = (nn.Conv1d,nn.Conv2d,nn.Conv3d,nn.Linear,nn.ReLU)
  return isinstance(l,lin_layers)

In [0]:
mods = find_modules(learn.model, lambda o:isinstance(o,ConvLayer))

In [14]:
mods

[ConvLayer(
   (conv): Conv2d(1, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
   (relu): GeneralRelu()
 ), ConvLayer(
   (conv): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
   (relu): GeneralRelu()
 ), ConvLayer(
   (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
   (relu): GeneralRelu()
 ), ConvLayer(
   (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
   (relu): GeneralRelu()
 ), ConvLayer(
   (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
   (relu): GeneralRelu()
 )]

In [0]:
def append_stat(hook,mod,inp,outp):
  d=outp.data
  hook.mean,hook.std = d.mean().item(),d.std().item()

In [0]:
mdl = learn.model.cuda()

In [17]:
with Hooks(mods,append_stat) as hooks:
  mdl(xb)
  for hook in hooks: print(hook.mean,hook.std)

0.459943950176239 0.9293095469474792
0.3730125427246094 0.7919470071792603
0.35924965143203735 0.6614606380462646
0.34195855259895325 0.5717957615852356
0.2402404397726059 0.3327358067035675


In [0]:
#export 
def lsuv_module(m,xb):
  h = Hook(m,append_stat)
  
  while mdl(xb) is not None and abs(h.mean) > 1e-3: m.bias -= h.mean
  while mdl(xb) is not None and abs(h.std-1) > 1e-3: m.weight.data /= h.std
    
  h.remove()
  return h.mean,h.std

In [19]:
for m in mods: print(lsuv_module(m,xb))

(0.034986890852451324, 1.0)
(0.09375637024641037, 1.0)
(0.14342403411865234, 1.0)
(0.15085163712501526, 1.0)
(0.31061404943466187, 0.9999998807907104)


In [20]:
%time run.fit(2,learn)

train: [0.524281806552533, tensor(0.8320, device='cuda:0')]
valid: [0.1550531005859375, tensor(0.9514, device='cuda:0')]
train: [0.1099058819800308, tensor(0.9664, device='cuda:0')]
valid: [0.0853405029296875, tensor(0.9740, device='cuda:0')]
CPU times: user 1.85 s, sys: 373 ms, total: 2.23 s
Wall time: 2.24 s


In [0]:
!python notebook2script.py 07a_lsuv.ipynb