In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
import torch
from torch import *

#let me be on my mac
def cuda(self, device=None, non_blocking=False) : return self
torch.Tensor.cuda = cuda

In [3]:
from lib.modelmanager import *
from lib.model import *
from lib.callbacks import *
from lib.data import *
from lib.optimizers import *
from pathlib import Path

## create basemodel on imagenette

In [4]:
path = datasets.untar_data(datasets.URLs.IMAGEWOOF_160)
#path = untar_data(datasets.URLs.IMAGENETTE_160)
path

PosixPath('/Users/kasparlund/.fastai/data/imagewoof-160')

In [5]:
size = 128
bs   = 64

tfms     = [make_rgb, RandomResizedCrop(size, scale=(0.35,1)), PilRandomFlip(), to_byte_tensor, to_float_tensor]
val_tfms = [make_rgb, CenterCrop(size), to_byte_tensor, to_float_tensor]
files    = ImageList.from_files(path, tfms=tfms)

sd       = SplitData.split_by_func(files, partial(grandparent_splitter, valid_name='val'))
data     = label_train_valid_data(sd, parent_labeler, proc_y=CategoryProcessor())
data.valid.x.tfms  = val_tfms

imagenette_features = max(data.train.y)+1
print(f"number of training, validation images: {len(data.train)},  {len(data.valid)}")
print(f"imagenette_features:{imagenette_features}")

train_dl,valid_dl = ( DataLoader(data.train, batch_size=bs,   num_workers=4, shuffle=True),
                      DataLoader(data.valid, batch_size=bs*2, num_workers=4))
databunch = DataBunch(train_dl, valid_dl, c_in=3, c_out=imagenette_features)

number of training, validation images: 12454,  500
imagenette_features:10


In [6]:
layers_sizes = [64,64,128,256]
layer = partial( conv_layer, stride=2, bn=True, zero_bn=False, act=partial(torch.nn.ReLU,inplace=True) )
mm    = CnnModelManager( get_cnn_model(layers_sizes, databunch.c_in, databunch.c_out, layer) )
mm.initialize(is_resnet=False)

cbfs_base = [TrainableModelCallback, TrainEvalCallback, OptimizerCallback, 
#        partial(ParamScheduler, 'lr', sched),
        partial(BatchTransformXCallback, norm_imagenette),
#        partial(MixUp,α=0.4),
        
        #CudaCallback,
        ProgressCallback,
       ]
cbfs = cbfs_base.copy() + [Recorder, partial(AvgStatsCallback,[accuracy])]
cbfs_lr_Finder = cbfs_base.copy() + [LR_Finder]

sched = combine_scheds([0.3, 0.7], [sched_cos(0.3, 0.6), sched_cos(0.6, 0.2)]) 

In [7]:
mm.grads_summary()
#xb,_ = getFirstbatch( model, databunch, partial(BatchTransformXCallback, tfm = norm_imagenette))
#model_summary(model, xb, only_leaves=True, print_mod=False)
print(f"\nmodel hierarchy:\n{mm.model}")

requires_grad: True  : Conv2d
requires_grad: True  : BatchNorm2d
requires_grad: None  : ReLU
requires_grad: True  : Conv2d
requires_grad: True  : BatchNorm2d
requires_grad: None  : ReLU
requires_grad: True  : Conv2d
requires_grad: True  : BatchNorm2d
requires_grad: None  : ReLU
requires_grad: True  : Conv2d
requires_grad: True  : BatchNorm2d
requires_grad: None  : ReLU
requires_grad: None  : AdaptiveAvgPool2d
requires_grad: None  : Lambda
requires_grad: True  : Linear

model hierarchy:
Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
  )
  (1): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
  )
  (2): Sequential(
    (0): Conv2d(64, 128, kernel_siz

In [8]:
learn = Learner( mm.model, databunch, loss_func=LabelSmoothingCrossEntropy())
%time learn.fit(5, opt=Adam(sched,max_lr=3e-4, moms=(0.85,0.95), max_wd = 1e-4), cb_funcs=cbfs)

epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,2.201428,0.211338,2.153249,0.232,02:49
1,2.067876,0.280071,2.090931,0.25,02:25
2,2.003838,0.321423,2.038854,0.28,02:37
3,1.954257,0.342219,1.975465,0.3,02:38
4,1.917405,0.363096,1.969318,0.328,02:34


CPU times: user 38min 13s, sys: 8min 29s, total: 46min 43s
Wall time: 13min 5s


In [9]:
mm.save(path)

## Trains Pets dataset 

In [10]:
pets = datasets.untar_data(datasets.URLs.PETS)

In [11]:
list(pets.iterdir())[:10]

[PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/annotations')]

In [12]:
pets_path = pets/'images'
list(pets_path.iterdir())[:10]

[PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/Egyptian_Mau_167.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/pug_52.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/basset_hound_112.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/Siamese_193.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/shiba_inu_122.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/Siamese_53.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/Birman_167.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/leonberger_6.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/Siamese_47.jpg'),
 PosixPath('/Users/kasparlund/.fastai/data/oxford-iiit-pet/images/shiba_inu_136.jpg')]

In [13]:
import re
def random_splitter(fn, p_valid): return random.random() < p_valid
def pet_labeler(fn): return re.findall(r'^(.*)_\d+.jpg$', fn.name)[0]

files = ImageList.from_files(pets_path, tfms=tfms)
sd    = SplitData.split_by_func(files, partial(random_splitter, p_valid=0.1))

proc  = CategoryProcessor()
data  = label_train_valid_data(sd, pet_labeler, proc_y=proc)
data.valid.x.tfms = val_tfms

pets_features     = len(proc.vocab)
print(f"number of training, validation images: {len(data.train)},  {len(data.valid)}")
print(f"pets_features:{pets_features}")

train_dl,valid_dl = ( DataLoader(data.train, batch_size=bs,   num_workers=4, shuffle=True),
                      DataLoader(data.valid, batch_size=bs*2, num_workers=4))
databunch = DataBunch(train_dl, valid_dl, c_in=3, c_out=pets_features)

number of training, validation images: 6698,  692
pets_features:37


In [14]:
print(f"categories:\n{ ', '.join(proc.vocab) }")

categories:
Egyptian_Mau, pug, Siamese, shiba_inu, Birman, leonberger, saint_bernard, Abyssinian, miniature_pinscher, wheaten_terrier, scottish_terrier, pomeranian, german_shorthaired, english_setter, newfoundland, Sphynx, British_Shorthair, Bombay, boxer, great_pyrenees, samoyed, Russian_Blue, Persian, japanese_chin, Ragdoll, english_cocker_spaniel, Maine_Coon, havanese, Bengal, american_pit_bull_terrier, keeshond, american_bulldog, chihuahua, beagle, yorkshire_terrier, staffordshire_bull_terrier, basset_hound


In [15]:
#train with from scratch
mm    = CnnModelManager( get_cnn_model(layers_sizes, databunch.c_in, databunch.c_out, layer) )
mm.initialize(is_resnet=False)

In [16]:
#xb,_ = getFirstbatch( learn.model, databunch, partial(BatchTransformXCallback, tfm = norm_imagenette))
#model_summary(model, xb, only_leaves=True, print_mod=False)
mm.grads_summary()
print(f"model hierarchy:\n{mm.model}")

requires_grad: True  : Conv2d
requires_grad: True  : BatchNorm2d
requires_grad: None  : ReLU
requires_grad: True  : Conv2d
requires_grad: True  : BatchNorm2d
requires_grad: None  : ReLU
requires_grad: True  : Conv2d
requires_grad: True  : BatchNorm2d
requires_grad: None  : ReLU
requires_grad: True  : Conv2d
requires_grad: True  : BatchNorm2d
requires_grad: None  : ReLU
requires_grad: None  : AdaptiveAvgPool2d
requires_grad: None  : Lambda
requires_grad: True  : Linear
model hierarchy:
Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
  )
  (1): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
  )
  (2): Sequential(
    (0): Conv2d(64, 128, kernel_size

In [17]:
learn = Learner( mm.model, databunch, loss_func=LabelSmoothingCrossEntropy() )
cbfs  = cbfs_base.copy() + [Recorder, partial(AvgStatsCallback,[accuracy])]
%time learn.fit(2, opt=Adam(sched,max_lr=3e-4, moms=(0.85,0.95), max_wd = 1e-4), cb_funcs=cbfs)

epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,3.522268,0.067931,3.462243,0.073699,01:32
1,3.364242,0.114512,3.40095,0.104046,01:31


CPU times: user 8min 32s, sys: 2min 3s, total: 10min 35s
Wall time: 3min 3s


## use pretrained imagewoff model for training with gradual unfreezing

In [18]:
print(f"path to pretrained model:{path}")

path to pretrained model:/Users/kasparlund/.fastai/data/imagewoof-160


In [19]:
#load pretrained on imagewoof
mm    = CnnModelManager( get_cnn_model(layers_sizes, databunch.c_in, imagenette_features, layer) )
mm.load(path)

print(f"model hierarchy:\n{mm.model}")
mm.grads_summary()

model hierarchy:
Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
  )
  (1): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
  )
  (2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
  )
  (3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
  )
  (4): AdaptiveAvgPool2d(output_size=1)
  (5): Lambda()
  (6): Linear(in_featur

In [20]:
mm.adapt_model(databunch, normalization=norm_imagenette)

In [21]:
mm.freeze()
mm.grads_summary()

requires_grad: False : Conv2d
requires_grad: True  : BatchNorm2d
requires_grad: None  : ReLU
requires_grad: False : Conv2d
requires_grad: True  : BatchNorm2d
requires_grad: None  : ReLU
requires_grad: False : Conv2d
requires_grad: True  : BatchNorm2d
requires_grad: None  : ReLU
requires_grad: False : Conv2d
requires_grad: True  : BatchNorm2d
requires_grad: None  : ReLU
requires_grad: None  : AdaptiveAvgPool2d
requires_grad: None  : Flatten
requires_grad: True  : Linear


In [22]:
cbfs  = cbfs_base.copy() + [Recorder, partial(AvgStatsCallback,[accuracy])]
learn = Learner( mm.model, databunch, loss_func=LabelSmoothingCrossEntropy() )

In [23]:
%time learn.fit(1, opt=Adam(sched,max_lr=1e-2, moms=(0.85,0.95), max_wd = 1e-4), cb_funcs=cbfs)

epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,3.451682,0.09361,3.302769,0.117052,01:15


CPU times: user 2min 58s, sys: 44.5 s, total: 3min 42s
Wall time: 1min 15s


In [24]:
mm.unfreeze()
mm.grads_summary()

requires_grad: True  : Conv2d
requires_grad: True  : BatchNorm2d
requires_grad: None  : ReLU
requires_grad: True  : Conv2d
requires_grad: True  : BatchNorm2d
requires_grad: None  : ReLU
requires_grad: True  : Conv2d
requires_grad: True  : BatchNorm2d
requires_grad: None  : ReLU
requires_grad: True  : Conv2d
requires_grad: True  : BatchNorm2d
requires_grad: None  : ReLU
requires_grad: None  : AdaptiveAvgPool2d
requires_grad: None  : Flatten
requires_grad: True  : Linear


In [25]:
learn.fit(1, opt=Adam(sched,max_lr=5e-5, moms=(0.85,0.95), max_wd = 1e-6), cb_funcs=cbfs)

epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,3.267389,0.133473,3.271721,0.122832,01:41
