In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
import torch
from torch import *

#let me be on my mac
def cuda(self, device=None, non_blocking=False) : return self
torch.Tensor.cuda = cuda

In [3]:
from lib.callbacks import *
from lib.model import *
from lib.data import *
from lib.optimizers import *
from pathlib import Path

## create basemodel on imagenette

In [4]:
#path = datasets.untar_data(datasets.URLs.IMAGEWOOF_160)
path = untar_data(datasets.URLs.IMAGENETTE_160)
path

PosixPath('/Users/kasparlund/.fastai/data/imagenette-160')

In [5]:
size = 128
bs   = 64

tfms     = [make_rgb, RandomResizedCrop(size, scale=(0.35,1)), PilRandomFlip(), to_byte_tensor, to_float_tensor]
val_tfms = [make_rgb, CenterCrop(size), to_byte_tensor, to_float_tensor]
files    = ImageList.from_files(path, tfms=tfms)

sd       = SplitData.split_by_func(files, partial(grandparent_splitter, valid_name='val'))
data     = label_train_valid_data(sd, parent_labeler, proc_y=CategoryProcessor())
data.valid.x.tfms  = val_tfms

imagenette_features = max(data.train.y)+1
print(f"number of training, validation images: {len(data.train)},  {len(data.valid)}")
print(f"imagenette_features:{imagenette_features}")

train_dl,valid_dl = ( DataLoader(data.train, batch_size=bs,   num_workers=4, shuffle=True),
                      DataLoader(data.valid, batch_size=bs*2, num_workers=4))
databunch = DataBunch(train_dl, valid_dl, c_in=3, c_out=imagenette_features)

number of training, validation images: 12894,  500
imagenette_features:10


In [6]:
layers_sizes = [64,64,128,256]
layer = partial( conv_layer, stride=2, bn=True, zero_bn=False, act=partial(torch.nn.ReLU,inplace=True) )
model = get_cnn_model(layers_sizes, databunch.c_in, databunch.c_out, layer)
init_cnn( model )

cbfs_base = [TrainableModelCallback, TrainEvalCallback, OptimizerCallback, 
#        partial(ParamScheduler, 'lr', sched),
        partial(BatchTransformXCallback, norm_imagenette),
#        partial(MixUp,α=0.4),
        
        #CudaCallback,
        ProgressCallback,
       ]
cbfs = cbfs_base.copy() + [Recorder, partial(AvgStatsCallback,[accuracy])]
cbfs_lr_Finder = cbfs_base.copy() + [LR_Finder]

sched = combine_scheds([0.3, 0.7], [sched_cos(0.3, 0.6), sched_cos(0.6, 0.2)]) 

In [7]:
xb,_ = getFirstbatch( model, databunch, partial(BatchTransformXCallback, tfm = norm_imagenette))
model_summary(model, xb, find_all=True, print_mod=True)


model_summary

Conv2d(3, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
torch.Size([64, 64, 64, 64]), requires_grad:True

Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
torch.Size([64, 64, 32, 32]), requires_grad:True

Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
torch.Size([64, 128, 16, 16]), requires_grad:True

Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
torch.Size([64, 256, 8, 8]), requires_grad:True

Linear(in_features=256, out_features=10, bias=True)
torch.Size([64, 10]), requires_grad:True


In [8]:
learn = Learner( model, databunch, loss_func=LabelSmoothingCrossEntropy())
%time learn.fit(3, opt=Adam(sched,max_lr=3e-4, moms=(0.85,0.95), max_wd = 1e-4), cb_funcs=cbfs)

epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.995635,0.352877,1.765346,0.472,01:59
1,1.76727,0.469598,1.662404,0.526,01:58
2,1.668685,0.521405,1.615334,0.54,02:07


CPU times: user 28min 27s, sys: 5min 18s, total: 33min 45s
Wall time: 6min 5s


In [9]:
save_model(path, learn.model)

## Trains Pets dataset 

In [10]:
pets = datasets.untar_data(datasets.URLs.PETS)

In [11]:
list(pets.iterdir())[:10]

[PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/annotations')]

In [12]:
pets_path = pets/'images'
list(pets_path.iterdir())[:10]

[PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/Egyptian_Mau_167.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/pug_52.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/basset_hound_112.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/Siamese_193.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/shiba_inu_122.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/Siamese_53.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/Birman_167.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/leonberger_6.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/Siamese_47.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/shiba_inu_136.jpg')]

In [13]:
import re
def random_splitter(fn, p_valid): return random.random() < p_valid
def pet_labeler(fn): return re.findall(r'^(.*)_\d+.jpg$', fn.name)[0]

files = ImageList.from_files(pets_path, tfms=tfms)
sd    = SplitData.split_by_func(files, partial(random_splitter, p_valid=0.1))

proc  = CategoryProcessor()
data  = label_train_valid_data(sd, pet_labeler, proc_y=proc)
data.valid.x.tfms = val_tfms

pets_features     = len(proc.vocab)
print(f"number of training, validation images: {len(data.train)},  {len(data.valid)}")
print(f"pets_features:{pets_features}")

train_dl,valid_dl = ( DataLoader(data.train, batch_size=bs,   num_workers=4, shuffle=True),
                      DataLoader(data.valid, batch_size=bs*2, num_workers=4))
databunch = DataBunch(train_dl, valid_dl, c_in=3, c_out=pets_features)

number of training, validation images: 6661,  729
pets_features:37


In [25]:
print(f"categories:\n{ ', '.join(proc.vocab) }")

categories:
Egyptian_Mau, pug, basset_hound, Siamese, shiba_inu, Birman, leonberger, saint_bernard, Abyssinian, miniature_pinscher, wheaten_terrier, scottish_terrier, pomeranian, german_shorthaired, english_setter, Sphynx, newfoundland, British_Shorthair, boxer, great_pyrenees, samoyed, Russian_Blue, japanese_chin, Ragdoll, english_cocker_spaniel, Maine_Coon, havanese, Bengal, american_pit_bull_terrier, keeshond, american_bulldog, chihuahua, Bombay, staffordshire_bull_terrier, beagle, Persian, yorkshire_terrier


In [26]:
#train with from scratch
model = get_cnn_model(layers_sizes, databunch.c_in, databunch.c_out, layer)
init_cnn( model )

In [27]:
xb,_ = getFirstbatch( learn.model, databunch, partial(BatchTransformXCallback, tfm = norm_imagenette))
model_summary(learn.model, xb, find_all=True, print_mod=True)

model_summary

Conv2d(3, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
torch.Size([64, 64, 64, 64]), requires_grad:True

Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
torch.Size([64, 64, 32, 32]), requires_grad:True

Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
torch.Size([64, 128, 16, 16]), requires_grad:True

Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
torch.Size([64, 256, 8, 8]), requires_grad:True

Linear(in_features=256, out_features=37, bias=True)
torch.Size([64, 37]), requires_grad:True


In [None]:
learn = Learner( model, databunch, loss_func=LabelSmoothingCrossEntropy() )
cbfs  = cbfs_base.copy() + [Recorder, partial(AvgStatsCallback,[accuracy])]
%time learn.fit(2, opt=Adam(sched,max_lr=3e-4, moms=(0.85,0.95), max_wd = 1e-4), cb_funcs=cbfs)

epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,3.534791,0.060201,3.464581,0.101509,01:03


## use pretrained imagenette model for training with gradual unfreezing

In [136]:
print(f"path to pretrained model:{path}")

path to pretrained model:/Users/kasparlund/.fastai/data/imagenette-160


In [147]:
#load pretrained on imagewoof
model = get_cnn_model(layers_sizes, 3, imagenette_features, layer)
load_model(path, model)

In [148]:
def adapt_model(model, data, norm):
    #get rid of norm
    cut   = next( i for i,o in enumerate(model.children()) if isinstance(o,nn.AdaptiveAvgPool2d) )
    m_cut = model[:cut]
    
    xb,_  = getFirstbatch( model, data, partial(BatchTransformXCallback, tfm = norm))
    pred  = m_cut(xb)
    ni    = pred.shape[1]
    
    m_new = nn.Sequential(
        m_cut, 
        AdaptiveConcatPool2d(), 
        Flatten(),
        nn.Linear(ni*2, data.c_out))
    return m_new

model = adapt_model(model, databunch, norm=norm_imagenette)

In [149]:
xb,_ = getFirstbatch( model, databunch, partial(BatchTransformXCallback, tfm = norm_imagenette))
model_summary(model, xb, find_all=True, print_mod=False)


model_summary
torch.Size([64, 64, 64, 64]), requires_grad=True
torch.Size([64, 64, 32, 32]), requires_grad=True
torch.Size([64, 128, 16, 16]), requires_grad=True
torch.Size([64, 256, 8, 8]), requires_grad=True
torch.Size([64, 37]), requires_grad=True


In [151]:
def set_grad(module, requires_grad, train_bn=False):
    #if isinstance(module, (nn.Linear,nn.BatchNorm2d)): 
    #    for p in module.parameters(recurse=False):
    #        p.requires_grad_(train_bn)    
        #print(f"{type(module)} ignored")
    #else:   
        #if len(list(module.parameters(recurse=False)))> 0: print(f"{type(module)} setting requires_grad:{requires_grad}")
    for p in module.parameters(recurse=False):
        p.requires_grad_(requires_grad)    
 
def change_requires_grad(module:nn.Module, requires_grad, train_bn):
    if isinstance(module, nn.Module):
        set_grad(module, requires_grad, train_bn)
        for m in module.children(): 
            change_requires_grad(m, requires_grad, train_bn)
def freeze( model, train_bn=False ):
    print("setting all to requires_grad=True")
    change_requires_grad(model,    requires_grad=True, train_bn=train_bn)
    print("\nsetting pretrained to requires_grad=False")
    change_requires_grad(model[0], requires_grad=False, train_bn=train_bn)
def unfreeze( model, train_bn=False ):
    print("setting all to requires_grad=True")
    change_requires_grad(model,    requires_grad=True, train_bn=train_bn)
    
def print_requires_grad(module:nn.Module):
    if isinstance(module, nn.Module):
        print(f"\nmodule:{type(module)}")
        for m in module.children(): 
            print_requires_grad(m)
        if len(list(module.children()))==0:
            for p in module.parameters():
                print(f"{type(p)} requires_grad:{p.requires_grad}")
            #print(f"\n")


In [152]:
freeze(model)
model_summary(model, xb, find_all=True, print_mod=False)

print("\nprint_requires_grad")
print_requires_grad(learn.model)

setting all to requires_grad=True

setting pretrained to requires_grad=False

model_summary
torch.Size([64, 64, 64, 64]), requires_grad=False
torch.Size([64, 64, 32, 32]), requires_grad=False
torch.Size([64, 128, 16, 16]), requires_grad=False
torch.Size([64, 256, 8, 8]), requires_grad=False
torch.Size([64, 37]), requires_grad=True

print_requires_grad

module:<class 'torch.nn.modules.container.Sequential'>

module:<class 'torch.nn.modules.container.Sequential'>

module:<class 'torch.nn.modules.container.Sequential'>

module:<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.parameter.Parameter'> requires_grad:False

module:<class 'torch.nn.modules.batchnorm.BatchNorm2d'>
<class 'torch.nn.parameter.Parameter'> requires_grad:False
<class 'torch.nn.parameter.Parameter'> requires_grad:False

module:<class 'torch.nn.modules.activation.ReLU'>

module:<class 'torch.nn.modules.container.Sequential'>

module:<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.parameter.Parameter'> requ

In [150]:
cbfs  = cbfs_base.copy() + [Recorder, partial(AvgStatsCallback,[accuracy])]
learn = Learner( model, databunch, loss_func=LabelSmoothingCrossEntropy() )

In [153]:
%time learn.fit(3, opt=Adam(sched,max_lr=2e-3, moms=(0.85,0.95), max_wd = 1e-4), cb_funcs=cbfs)

epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,3.630612,0.050097,3.512664,0.071923,00:28
1,3.41465,0.094645,3.396963,0.102351,00:28
2,3.31476,0.128544,3.373232,0.105118,00:28


CPU times: user 6min 21s, sys: 1min 20s, total: 7min 41s
Wall time: 1min 25s


In [154]:
unfreeze(learn.model)
#print_requires_grad(learn.model)
#print("")
model_summary(model, xb, find_all=True, print_mod=False)

print("\nprint_requires_grad")
print_requires_grad(learn.model)

setting all to requires_grad=True

model_summary
torch.Size([64, 64, 64, 64]), requires_grad=True
torch.Size([64, 64, 32, 32]), requires_grad=True
torch.Size([64, 128, 16, 16]), requires_grad=True
torch.Size([64, 256, 8, 8]), requires_grad=True
torch.Size([64, 37]), requires_grad=True

print_requires_grad

module:<class 'torch.nn.modules.container.Sequential'>

module:<class 'torch.nn.modules.container.Sequential'>

module:<class 'torch.nn.modules.container.Sequential'>

module:<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.parameter.Parameter'> requires_grad:True

module:<class 'torch.nn.modules.batchnorm.BatchNorm2d'>
<class 'torch.nn.parameter.Parameter'> requires_grad:True
<class 'torch.nn.parameter.Parameter'> requires_grad:True

module:<class 'torch.nn.modules.activation.ReLU'>

module:<class 'torch.nn.modules.container.Sequential'>

module:<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.parameter.Parameter'> requires_grad:True

module:<class 'torch.nn.modules.ba

In [155]:
learn.fit(3, opt=Adam(sched,max_lr=5e-5, moms=(0.85,0.95), max_wd = 1e-6), cb_funcs=cbfs)

epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,3.245641,0.149243,3.301595,0.141079,01:10
1,3.211513,0.158542,3.271284,0.149378,01:05
2,3.179389,0.170991,3.249076,0.141079,01:05
3,3.154111,0.177291,3.232889,0.15491,01:05
4,3.143253,0.184791,3.22258,0.157676,01:05


In [70]:
model_summary(model, xb, find_all=True, print_mod=True)


Conv2d(3, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
torch.Size([64, 64, 64, 64]), requires_grad:True

Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
torch.Size([64, 64, 32, 32]), requires_grad:True

Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
torch.Size([64, 128, 16, 16]), requires_grad:True

Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
torch.Size([64, 256, 8, 8]), requires_grad:True

Linear(in_features=512, out_features=37, bias=True)
torch.Size([64, 37]), requires_grad:True


In [72]:
print_requires_grad(learn.model)

module:<class 'torch.nn.modules.container.Sequential'>
module:<class 'torch.nn.modules.container.Sequential'>
module:<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.parameter.Parameter'> requires_grad:True


module:<class 'torch.nn.modules.batchnorm.BatchNorm2d'>
<class 'torch.nn.parameter.Parameter'> requires_grad:True
<class 'torch.nn.parameter.Parameter'> requires_grad:True


module:<class 'torch.nn.modules.activation.ReLU'>


<class 'torch.nn.parameter.Parameter'> requires_grad:True
<class 'torch.nn.parameter.Parameter'> requires_grad:True
<class 'torch.nn.parameter.Parameter'> requires_grad:True


module:<class 'torch.nn.modules.container.Sequential'>
module:<class 'torch.nn.modules.conv.Conv2d'>
<class 'torch.nn.parameter.Parameter'> requires_grad:True


module:<class 'torch.nn.modules.batchnorm.BatchNorm2d'>
<class 'torch.nn.parameter.Parameter'> requires_grad:True
<class 'torch.nn.parameter.Parameter'> requires_grad:True


module:<class 'torch.nn.modules.activation.ReLU