In [1]:
from fastai.vision import *
from fastai.callbacks import *

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

In [3]:
import sys
sys.path.append('../')

from fasterai.bn_folder import *

In [4]:
path = untar_data(URLs.IMAGENETTE_160)

In [5]:
data = (ImageList.from_folder(path)
                .split_by_folder(train='train', valid='val')
                .label_from_folder()
                .transform(get_transforms(), size=128)
                .databunch(bs=64)
                .normalize(imagenet_stats))

In [6]:
def count_parameters(model):
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f'Total parameters : {num_params:,}' )

## VGG16

In [7]:
learn = Learner(data, models.vgg16_bn(num_classes=10), metrics=[accuracy])

In [9]:
learn.fit_one_cycle(5, 1e-3)

epoch,train_loss,valid_loss,accuracy,time
0,1.97284,2.065817,0.362803,00:31
1,1.85823,1.861822,0.368408,00:31
2,1.499569,1.655234,0.482803,00:31
3,1.271293,1.074006,0.650955,00:31
4,1.051444,0.927527,0.690701,00:31


In [10]:
learn.validate()

[0.9275266, tensor(0.6907)]

In [11]:
model = learn.model.eval()

In [12]:
x,y = data.one_batch()

In [13]:
%%timeit
model(x[0][None].cuda())

2.86 ms ± 326 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [14]:
learn.summary()

VGG
Layer (type)         Output Shape         Param #    Trainable 
Conv2d               [64, 128, 128]       1,792      True      
______________________________________________________________________
BatchNorm2d          [64, 128, 128]       128        True      
______________________________________________________________________
ReLU                 [64, 128, 128]       0          False     
______________________________________________________________________
Conv2d               [64, 128, 128]       36,928     True      
______________________________________________________________________
BatchNorm2d          [64, 128, 128]       128        True      
______________________________________________________________________
ReLU                 [64, 128, 128]       0          False     
______________________________________________________________________
MaxPool2d            [64, 64, 64]         0          False     
__________________________________________________________

In [15]:
count_parameters(model)

Total parameters : 134,309,962


In [16]:
new_model = models.vgg16_bn(num_classes=10)

In [17]:
new_model.load_state_dict(model.state_dict())

<All keys matched successfully>

In [18]:
new_model.eval()
folded_model = bn_folding_model(new_model).cuda()

In [19]:
%%timeit
folded_model(x[0][None].cuda())

2.43 ms ± 133 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [20]:
count_parameters(folded_model)

Total parameters : 134,301,514


In [21]:
folded_learner = Learner(data, folded_model, metrics=[accuracy])

In [22]:
folded_learner.validate()

[0.9275267, tensor(0.6907)]

In [23]:
folded_learner.summary()

VGG
Layer (type)         Output Shape         Param #    Trainable 
Conv2d               [64, 128, 128]       1,792      True      
______________________________________________________________________
ReLU                 [64, 128, 128]       0          False     
______________________________________________________________________
Conv2d               [64, 128, 128]       36,928     True      
______________________________________________________________________
ReLU                 [64, 128, 128]       0          False     
______________________________________________________________________
MaxPool2d            [64, 64, 64]         0          False     
______________________________________________________________________
Conv2d               [128, 64, 64]        73,856     True      
______________________________________________________________________
ReLU                 [128, 64, 64]        0          False     
__________________________________________________________

We have removed the computation of the BN layers, reason why we gain some computation time but we haven't removed the parameters from the model, BN are still hold in memory. This is tricky to do because the model is sequentially built, so the number of layer do not correspond anymore. This is easier to do on ResNets because each layer is named.

## Resnet18

In [24]:
learn = Learner(data, models.resnet50(num_classes=10), metrics=[accuracy])

In [25]:
learn.fit_one_cycle(5, 1e-3)

epoch,train_loss,valid_loss,accuracy,time
0,2.090443,2.62319,0.243057,00:19
1,1.671145,1.487767,0.515414,00:19
2,1.290635,1.212664,0.616561,00:20
3,1.021132,0.9505,0.69172,00:20
4,0.815341,0.790772,0.749045,00:19


In [26]:
learn.validate()

[0.7907724, tensor(0.7490)]

In [27]:
model = learn.model.eval()

In [28]:
x,y = data.one_batch()

In [29]:
%%timeit
model(x[0][None].cuda())

6.07 ms ± 129 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [30]:
count_parameters(model)

Total parameters : 23,528,522


In [31]:
from fasterai.resnet.folded_resnet import *

In [32]:
new_model = resnet50(num_classes=10)

In [33]:
new_model.load_state_dict(model.state_dict())

<All keys matched successfully>

In [34]:
new_model.eval()
folded_model = bn_folding_model(new_model).cuda()

In [35]:
%%timeit
folded_model(x[0][None].cuda())

4.34 ms ± 1.23 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [36]:
count_parameters(new_model)

Total parameters : 23,528,522


In [37]:
folded_learner = Learner(data, folded_model, metrics=[accuracy])

In [38]:
folded_learner.validate()

[0.79077244, tensor(0.7490)]

In [39]:
from fasterai.resnet.folded_resnet_nobn import *

In [40]:
final_model = resnet50(num_classes=10)

In [41]:
final_model.load_state_dict(folded_model.state_dict(),strict=False)

<All keys matched successfully>

In [42]:
count_parameters(final_model)

Total parameters : 23,501,962


In [43]:
final_learner = Learner(data, final_model, metrics=[accuracy])

In [44]:
%%timeit
final_model(x[0][None].cuda())

4.29 ms ± 10.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [45]:
final_learner.validate()

[0.79077244, tensor(0.7490)]

In [47]:
learn.summary()

ResNet
Layer (type)         Output Shape         Param #    Trainable 
Conv2d               [64, 64, 64]         9,408      True      
______________________________________________________________________
BatchNorm2d          [64, 64, 64]         128        True      
______________________________________________________________________
ReLU                 [64, 64, 64]         0          False     
______________________________________________________________________
MaxPool2d            [64, 32, 32]         0          False     
______________________________________________________________________
Conv2d               [64, 32, 32]         4,096      True      
______________________________________________________________________
BatchNorm2d          [64, 32, 32]         128        True      
______________________________________________________________________
Conv2d               [64, 32, 32]         36,864     True      
_______________________________________________________