In [1]:
import torch
from torch.utils.data import DataLoader
from dataset.dataset import get_cdiscount_dataset
from model.model import assemble_model, assemble_model_with_classifier, cut_and_concatenate_model
from model.utils import freeze_layers
from trainer.trainer import get_trainer

from torch_deform_conv.layers import ConvOffset2D
from torch_deform_conv.cnn import get_vgg11_bn_deform

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# redirect print to file
# import sys
# sys.stdout = open("PyTorch-resnet34-log.txt", "w")

In [2]:
# configuration
config = {
    'train_batch_size': 128, 'val_batch_size': 128,
    'arch': 'vgg19_bn', 'pretrained': True,
    'optimizer': 'Adam', 'learning_rate': 1e-3, 'decay_lr_freq': 4e4, 'weight_decay': 1e-5,
    'resume': None,
    'start_epoch': 0, 'epochs': 10,
    'print_freq': 10, 'validate_freq': 1e3, 'save_freq': 1e4,
    'best_val_prec1': 0
}

In [3]:
import torchvision.models as models

# get dataset
print('getting dataset...')
train_dataset = get_cdiscount_dataset(offsets_csv="train_offsets.csv",
                                      images_csv="train_images.csv",
                                      bson_file_path="/mnt/data/cdiscount/train.bson",
                                      with_label=True,
                                      resize=160)
val_dataset = get_cdiscount_dataset(offsets_csv="train_offsets.csv",
                                    images_csv="val_images.csv",
                                    bson_file_path="/mnt/data/cdiscount/train.bson",
                                    with_label=True,
                                    resize=160)

# get data loader
print('getting data loader...')
train_dataloader = DataLoader(train_dataset, batch_size=config['train_batch_size'], shuffle=True, num_workers=6)
val_dataloader = DataLoader(val_dataset, batch_size=config['val_batch_size'], shuffle=True, num_workers=6)

# define model
print("=> using pre-trained model '{}'".format(config['arch']))
model = models.__dict__[config['arch']](pretrained=config['pretrained'])

# model = torch.nn.Sequential(*list(model.children())[:-1])
# model.add_module('classifier', torch.nn.Linear(in_features=2048, out_features=5270))

# freeze layers except last block
for name, module in model.named_children():
    if name == 'features':
        for subname, submodule in module.named_children():
            if subname in [str(i) for i in range(40)]:
                print(submodule)
                for param in submodule.parameters():
                    param.requires_grad = False


classifier_layer = [
    torch.nn.Linear(in_features=12800, out_features=5270)
]

classifier = torch.nn.Sequential(*classifier_layer)

model = assemble_model_with_classifier(model, -1, classifier)
model = torch.nn.DataParallel(model).cuda()
print(model)

getting dataset...
getting data loader...
=> using pre-trained model 'vgg19_bn'
Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
ReLU (inplace)
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
ReLU (inplace)
MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
ReLU (inplace)
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
ReLU (inplace)
MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
ReLU (inplace)
Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
ReL

In [4]:
# define loss function (criterion) and optimizer
criterion = torch.nn.CrossEntropyLoss().cuda()

# get trainer
Trainer = get_trainer(train_dataloader, val_dataloader, model, criterion, config)

# Run!
Trainer.run()

start training
Epoch: [0][0/77359]	Time 20.469 (20.469)	Data 4.199 (4.199)	Loss 8.5981 (8.5981)	Prec@1 0.000 (0.000)	Prec@5 0.000 (0.000)
Epoch: [0][10/77359]	Time 0.940 (2.701)	Data 0.012 (0.395)	Loss 6.9101 (7.5569)	Prec@1 6.250 (4.830)	Prec@5 14.062 (10.298)
Epoch: [0][20/77359]	Time 0.938 (1.858)	Data 0.012 (0.213)	Loss 6.7617 (7.2380)	Prec@1 9.375 (6.882)	Prec@5 20.312 (14.211)
Epoch: [0][30/77359]	Time 0.939 (1.561)	Data 0.022 (0.149)	Loss 6.1159 (6.9435)	Prec@1 17.969 (8.795)	Prec@5 30.469 (17.692)
Epoch: [0][40/77359]	Time 0.929 (1.409)	Data 0.011 (0.116)	Loss 6.0619 (6.7569)	Prec@1 14.062 (10.194)	Prec@5 27.344 (20.179)
Epoch: [0][50/77359]	Time 0.923 (1.315)	Data 0.014 (0.096)	Loss 6.1037 (6.5852)	Prec@1 14.844 (11.550)	Prec@5 25.781 (22.212)
Epoch: [0][60/77359]	Time 0.940 (1.252)	Data 0.014 (0.083)	Loss 6.0413 (6.4591)	Prec@1 10.938 (12.398)	Prec@5 28.906 (23.770)
Epoch: [0][70/77359]	Time 0.951 (1.208)	Data 0.012 (0.073)	Loss 5.7897 (6.3240)	Prec@1 16.406 (13.567)	Prec@5 2

Process Process-5:
Process Process-6:
Process Process-2:
Process Process-4:
Process Process-3:
Process Process-1:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/process.py", line

KeyboardInterrupt: 