In [1]:
import torch
from torch.utils.data import DataLoader
from dataset.dataset import get_cdiscount_dataset
from model.model import assemble_model, assemble_model_with_classifier
from trainer.trainer import get_trainer

import numpy as np
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

from torch_deform_conv.layers import ConvOffset2D
from torch_deform_conv.cnn import get_vgg11_bn, get_vgg11_bn_deform
from torch_deform_conv.utils import transfer_weights

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# redirect print to file
# import sys
# sys.stdout = open("PyTorch-resnet34-log.txt", "w")

In [2]:
# configuration
config = {
    'train_batch_size': 128, 'val_batch_size': 128,
    'arch': 'vgg11_bn', 'pretrained': True,
    'optimizer': 'Adam', 'learning_rate': 1e-4, 'decay_lr_freq': 4e4, 'weight_decay': 1e-5,
    'resume': None,
    'start_epoch': 0, 'epochs': 10,
    'print_freq': 10, 'validate_freq': 7e4, 'save_freq': 1e3,
    'best_val_prec1': 0
}

In [3]:
import torchvision.models as models

# get dataset
print('getting dataset...')
train_dataset = get_cdiscount_dataset(offsets_csv="train_offsets.csv",
                                      images_csv="train_images.csv",
                                      bson_file_path="/mnt/data/cdiscount/train.bson",
                                      with_label=True,
                                      resize=160)
val_dataset = get_cdiscount_dataset(offsets_csv="train_offsets.csv",
                                    images_csv="val_images.csv",
                                    bson_file_path="/mnt/data/cdiscount/train.bson",
                                    with_label=True,
                                    resize=160)

# get data loader
print('getting data loader...')
train_dataloader = DataLoader(train_dataset, batch_size=config['train_batch_size'], shuffle=True, num_workers=6)
val_dataloader = DataLoader(val_dataset, batch_size=config['val_batch_size'], shuffle=True, num_workers=6)



getting dataset...
getting data loader...


In [4]:
# define base line cnn model
#model = get_vgg11_bn()
#model = models.__dict__[config['arch']](pretrained=False)
model = get_vgg11_bn()

model = assemble_model(model, -1, 12800, 5270)
model = torch.nn.DataParallel(model).cuda()
print(model)

DataParallel (
  (module): AssembledModel (
    (model): Sequential (
      (0): Sequential (
        (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (2): ReLU (inplace)
        (3): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
        (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
        (6): ReLU (inplace)
        (7): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
        (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
        (10): ReLU (inplace)
        (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
        (13): ReLU (inplace)
        (14): MaxPool2d (size=(2, 2), stride=(2, 2), dilation

In [5]:
# define loss function (criterion) and optimizer
criterion = torch.nn.CrossEntropyLoss().cuda()

# get trainer
Trainer = get_trainer(train_dataloader, val_dataloader, model, criterion, config)

# Run!
Trainer.run()

start training
Epoch: [0][0/49501]	Time 35.872 (35.872)	Data 4.897 (4.897)	Loss 8.7517 (8.7517)	Prec@1 0.000 (0.000)	Prec@5 0.000 (0.000)
Epoch: [0][10/49501]	Time 1.936 (4.954)	Data 0.006 (0.455)	Loss 8.4973 (8.2207)	Prec@1 2.500 (1.955)	Prec@5 5.000 (5.591)
Epoch: [0][20/49501]	Time 1.913 (3.511)	Data 0.008 (0.243)	Loss 7.3200 (8.0143)	Prec@1 4.000 (2.619)	Prec@5 8.000 (6.833)
Epoch: [0][30/49501]	Time 1.940 (2.999)	Data 0.007 (0.167)	Loss 6.5663 (7.7106)	Prec@1 4.000 (3.210)	Prec@5 10.500 (8.081)
Epoch: [0][40/49501]	Time 1.908 (2.742)	Data 0.007 (0.129)	Loss 7.1862 (7.5485)	Prec@1 6.500 (3.695)	Prec@5 14.500 (9.000)
Epoch: [0][50/49501]	Time 1.928 (2.584)	Data 0.008 (0.105)	Loss 6.5850 (7.4256)	Prec@1 5.000 (4.039)	Prec@5 16.000 (9.833)
Epoch: [0][60/49501]	Time 1.945 (2.478)	Data 0.008 (0.089)	Loss 6.7448 (7.3513)	Prec@1 3.000 (4.279)	Prec@5 12.500 (10.484)
Epoch: [0][70/49501]	Time 1.942 (2.403)	Data 0.011 (0.078)	Loss 7.0017 (7.2745)	Prec@1 4.000 (4.570)	Prec@5 10.500 (10.796)


Process Process-1:
Process Process-4:
Process Process-3:
Process Process-5:
Traceback (most recent call last):
Process Process-2:
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Process Process-6:
  File "/home/weiso/.local/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self.

KeyboardInterrupt: 