In [1]:
import torch
from torch.utils.data import DataLoader
from dataset.dataset import get_cdiscount_dataset
from model.model import assemble_model, assemble_model_with_classifier, cut_and_concatenate_model
from model.utils import freeze_layers
from trainer.trainer import get_trainer

from torch_deform_conv.layers import ConvOffset2D
from torch_deform_conv.cnn import get_vgg11_bn_deform

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# redirect print to file
# import sys
# sys.stdout = open("PyTorch-resnet34-log.txt", "w")

In [2]:
# configuration
config = {
    'train_batch_size': 256, 'val_batch_size': 256,
    'arch': 'resnet50', 'pretrained': True,
    'optimizer': 'Adam', 'learning_rate': 1e-3, 'decay_lr_freq': 4e4, 'weight_decay': 1e-5,
    'resume': None,
    'start_epoch': 0, 'epochs': 10,
    'print_freq': 10, 'validate_freq': 1e3, 'save_freq': 1e4,
    'best_val_prec1': 0
}

In [4]:
import torchvision.models as models

# get dataset
print('getting dataset...')
train_dataset = get_cdiscount_dataset(offsets_csv="train_offsets.csv",
                                      images_csv="train_images.csv",
                                      bson_file_path="/mnt/data/cdiscount/train.bson",
                                      with_label=True,
                                      resize=224)
val_dataset = get_cdiscount_dataset(offsets_csv="train_offsets.csv",
                                    images_csv="val_images.csv",
                                    bson_file_path="/mnt/data/cdiscount/train.bson",
                                    with_label=True,
                                    resize=224)

# get data loader
print('getting data loader...')
train_dataloader = DataLoader(train_dataset, batch_size=config['train_batch_size'], shuffle=True, num_workers=6)
val_dataloader = DataLoader(val_dataset, batch_size=config['val_batch_size'], shuffle=True, num_workers=6)

# define model
# print("=> using pre-trained model '{}'".format(config['arch']))
# model = models.__dict__[config['arch']](pretrained=config['pretrained'])

# model = torch.nn.Sequential(*list(model.children())[:-1])
# model.add_module('classifier', torch.nn.Linear(in_features=2048, out_features=5270))
# model = torch.nn.DataParallel(model).cuda()
# print(model)

getting dataset...
getting data loader...
VGG (
  (features): Sequential (
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (2): ReLU (inplace)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (5): ReLU (inplace)
    (6): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
    (9): ReLU (inplace)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
    (12): ReLU (inplace)
    (13): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=Tr

In [8]:
model = get_vgg11_bn_deform(pretrained=True)

#freeze_layers(model, 7)

# model = cut_and_concatenate_model(model, -1, ConvOffset2D(2048), 2048, 5270)
# model = torch.nn.DataParallel(model).cuda()

# print(model)

VGG (
  (features): Sequential (
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (2): ReLU (inplace)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (5): ReLU (inplace)
    (6): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
    (9): ReLU (inplace)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
    (12): ReLU (inplace)
    (13): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
    (16): ReLU (inplace)
    (17): Con

In [14]:
for name, module in model.named_children():
    if name == 'features':
        for subname, submodule in module.named_children():
            print(subname)
            print(submodule)

0
Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1
BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
2
ReLU (inplace)
3
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
4
BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
5
ReLU (inplace)
6
MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
7
Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
8
BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
9
ReLU (inplace)
10
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
11
BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
12
ReLU (inplace)
13
MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
14
Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
15
BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
16
ReLU (inplace)
17
Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
18
BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
19
ReLU (inplace)
20
Conv2d(256, 256

In [4]:
# define loss function (criterion) and optimizer
criterion = torch.nn.CrossEntropyLoss().cuda()

# get trainer
Trainer = get_trainer(train_dataloader, val_dataloader, model, criterion, config)

# Run!
Trainer.run()

start training
Epoch: [0][0/38680]	Time 52.296 (52.296)	Data 7.834 (7.834)	Loss 8.6159 (8.6159)	Prec@1 0.000 (0.000)	Prec@5 0.000 (0.000)
Epoch: [0][10/38680]	Time 2.336 (6.829)	Data 0.011 (0.733)	Loss 7.0755 (7.5434)	Prec@1 11.719 (6.960)	Prec@5 23.047 (13.707)
Epoch: [0][20/38680]	Time 2.366 (4.691)	Data 0.027 (0.391)	Loss 6.3250 (7.1251)	Prec@1 13.281 (8.017)	Prec@5 27.344 (16.964)
Epoch: [0][30/38680]	Time 2.347 (3.933)	Data 0.014 (0.269)	Loss 6.2648 (6.9350)	Prec@1 14.453 (8.745)	Prec@5 28.516 (18.599)
Epoch: [0][40/38680]	Time 2.369 (3.543)	Data 0.011 (0.207)	Loss 5.8797 (6.7134)	Prec@1 17.969 (9.851)	Prec@5 32.812 (20.979)
Epoch: [0][50/38680]	Time 2.355 (3.309)	Data 0.011 (0.169)	Loss 5.7711 (6.5428)	Prec@1 14.453 (10.669)	Prec@5 30.859 (22.541)
Epoch: [0][60/38680]	Time 2.376 (3.151)	Data 0.014 (0.144)	Loss 5.6413 (6.3988)	Prec@1 16.797 (11.488)	Prec@5 30.859 (24.110)
Epoch: [0][70/38680]	Time 2.358 (3.038)	Data 0.012 (0.125)	Loss 5.5684 (6.2747)	Prec@1 16.016 (12.236)	Prec@5 

Process Process-2:
Process Process-6:
Process Process-1:
Process Process-3:
Process Process-4:
Process Process-5:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rlock:
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    

KeyboardInterrupt: 