In [1]:
import pytorch_lightning as pl
from src import Model, DataModule

In [2]:
size = 256
config = {
    # optimization
    'lr': 3e-4,
    'optimizer': 'Adam',
    'batch_size': 128,
    # data
    'extra_data': 1,
    'subset': 0.1,
    'num_workers': 0,
    'pin_memory': True,
    # model
    'backbone': 'efficientnet_b3a',
    'pretrained': True,
    'unfreeze': 0,
    # data augmentation
    'size': size,
    'train_trans': {
        'RandomCrop': {
            'height': size, 
            'width': size
        },
        'HorizontalFlip': {},
        'VerticalFlip': {},
        'Normalize': {}
    },
    'val_trans': {
        'CenterCrop': {
            'height': size, 
            'width': size
        },
        'Normalize': {}
    },
    # training params
    'precision': 16,
    'max_epochs': 50,
    'val_batches': 5,
    'es_start_from': 0
}

In [3]:
dm = DataModule(
    file = 'data_extra' if config['extra_data'] else 'data_old', 
    **config
)

model = Model(config)

In [4]:

trainer = pl.Trainer(
    gpus=1,
    precision=config['precision'],
    limit_val_batches=config['val_batches'],
    auto_scale_batch_size='binsearch'
)

trainer.tune(model, dm)

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.


Training samples:  21642
Validation samples:  5411
Training only on 2165 samples


Batch size 2 succeeded, trying batch size 4
Batch size 4 succeeded, trying batch size 8
Batch size 8 succeeded, trying batch size 16
Batch size 16 succeeded, trying batch size 32
Batch size 32 succeeded, trying batch size 64
Batch size 64 succeeded, trying batch size 128
Batch size 128 failed, trying batch size 96


KeyboardInterrupt: 

In [7]:
model.hparams.batch_size = 32
model.hparams

"backbone":      efficientnet_b3a
"batch_size":    32
"es_start_from": 0
"extra_data":    1
"lr":            0.0003
"max_epochs":    50
"num_workers":   0
"optimizer":     Adam
"pin_memory":    True
"precision":     16
"pretrained":    True
"size":          256
"subset":        0.1
"train_trans":   {'RandomCrop': {'height': 256, 'width': 256}, 'HorizontalFlip': {}, 'VerticalFlip': {}, 'Normalize': {}}
"unfreeze":      0
"val_batches":   5
"val_trans":     {'CenterCrop': {'height': 256, 'width': 256}, 'Normalize': {}}

In [8]:
trainer = pl.Trainer(
    gpus=1,
    precision=config['precision'],
    limit_val_batches=config['val_batches'],
    auto_lr_find=True
)

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.


In [9]:
lr_finder = trainer.tuner.lr_find(model, dm)


  | Name     | Type                 | Params
--------------------------------------------------
0 | backbone | EfficientNetFeatures | 10.1 M
1 | head     | Sequential           | 1.9 K 
--------------------------------------------------
10.1 M    Trainable params
0         Non-trainable params
10.1 M    Total params


HBox(children=(FloatProgress(value=0.0, description='Finding best initial lr', style=ProgressStyle(description…






LR finder stopped early due to diverging loss.
