In [1]:
import torch
from torch.utils.data import DataLoader
from dataset.dataset import get_cdiscount_dataset
from model.model import assemble_model, assemble_model_with_classifier
from trainer.trainer import get_trainer

import numpy as np
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

from torch_deform_conv.layers import ConvOffset2D
from torch_deform_conv.cnn import get_vgg11_bn, get_vgg11_bn_deform
from torch_deform_conv.utils import transfer_weights

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# redirect print to file
# import sys
# sys.stdout = open("PyTorch-resnet34-log.txt", "w")

In [2]:
# configuration
config = {
    'train_batch_size': 200, 'val_batch_size': 200,
    'arch': 'vgg11_bn', 'pretrained': True,
    'optimizer': 'Adam', 'learning_rate': 1e-4, 'decay_lr_freq': 4e4, 'weight_decay': 1e-5,
    'resume': None,
    'start_epoch': 0, 'epochs': 10,
    'print_freq': 10, 'validate_freq': 7e4, 'save_freq': 1e3,
    'best_val_prec1': 0
}

In [3]:
import torchvision.models as models

# get dataset
print('getting dataset...')
train_dataset = get_cdiscount_dataset(offsets_csv="train_offsets.csv",
                                      images_csv="train_images.csv",
                                      bson_file_path="/mnt/data/cdiscount/train.bson",
                                      with_label=True,
                                      resize=160)
val_dataset = get_cdiscount_dataset(offsets_csv="train_offsets.csv",
                                    images_csv="val_images.csv",
                                    bson_file_path="/mnt/data/cdiscount/train.bson",
                                    with_label=True,
                                    resize=160)

# get data loader
print('getting data loader...')
train_dataloader = DataLoader(train_dataset, batch_size=config['train_batch_size'], shuffle=True, num_workers=6)
val_dataloader = DataLoader(val_dataset, batch_size=config['val_batch_size'], shuffle=True, num_workers=6)



getting dataset...
getting data loader...


In [4]:
# define base line cnn model
#model = get_vgg11_bn()
#model = models.__dict__[config['arch']](pretrained=False)
model = get_vgg11_bn()

model = assemble_model(model, -1, 12800, 5270)
model = torch.nn.DataParallel(model).cuda()
print(model)

DataParallel (
  (module): AssembledModel (
    (model): Sequential (
      (0): Sequential (
        (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True)
        (2): ReLU (inplace)
        (3): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
        (4): ConvOffset2D(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (5): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (6): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True)
        (7): ReLU (inplace)
        (8): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
        (9): ConvOffset2D(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (10): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (11): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (12): ReLU (inplace)
        (13): ConvOffset2D(64, 128, kernel_size=(3

In [5]:
# define loss function (criterion) and optimizer
criterion = torch.nn.CrossEntropyLoss().cuda()

# get trainer
Trainer = get_trainer(train_dataloader, val_dataloader, model, criterion, config)

# Run!
Trainer.run()

start training
Epoch: [0][0/49501]	Time 20.848 (20.848)	Data 4.972 (4.972)	Loss 8.7327 (8.7327)	Prec@1 0.000 (0.000)	Prec@5 0.000 (0.000)
Epoch: [0][10/49501]	Time 2.046 (3.777)	Data 0.006 (0.461)	Loss 8.1464 (8.0806)	Prec@1 1.000 (1.409)	Prec@5 6.500 (4.636)
Epoch: [0][20/49501]	Time 2.048 (2.976)	Data 0.018 (0.246)	Loss 7.7671 (7.9978)	Prec@1 4.500 (2.357)	Prec@5 11.000 (7.024)
Epoch: [0][30/49501]	Time 2.121 (2.689)	Data 0.011 (0.169)	Loss 7.2416 (7.8362)	Prec@1 5.500 (3.000)	Prec@5 10.500 (8.306)
Epoch: [0][40/49501]	Time 2.111 (2.549)	Data 0.012 (0.130)	Loss 6.4630 (7.6329)	Prec@1 10.000 (3.720)	Prec@5 19.000 (9.573)
Epoch: [0][50/49501]	Time 2.106 (2.462)	Data 0.012 (0.106)	Loss 6.5111 (7.4899)	Prec@1 14.500 (4.441)	Prec@5 21.500 (10.716)
Epoch: [0][60/49501]	Time 2.056 (2.402)	Data 0.007 (0.090)	Loss 6.7042 (7.3764)	Prec@1 8.500 (5.000)	Prec@5 15.500 (11.680)
Epoch: [0][70/49501]	Time 2.038 (2.358)	Data 0.008 (0.079)	Loss 6.2002 (7.2587)	Prec@1 11.500 (5.570)	Prec@5 21.000 (12.5

Process Process-2:
Process Process-5:
Process Process-6:
Process Process-1:
Process Process-3:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Process Process-4:
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "/home/weiso/.local/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
  File "/usr/lib/python3.5/multiprocessing/queues.py", line 343, in get
    res = self._reader.recv_bytes()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  F

KeyboardInterrupt: 