In [1]:
import torch
from torch.utils.data import DataLoader
from dataset.dataset import get_cdiscount_dataset
from model.model import assemble_model, assemble_model_with_classifier
from trainer.trainer import get_trainer

import numpy as np
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

from torch_deform_conv.layers import ConvOffset2D
from torch_deform_conv.cnn import get_vgg11_bn, get_vgg11_bn_deform
from torch_deform_conv.utils import transfer_weights

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# redirect print to file
# import sys
# sys.stdout = open("PyTorch-resnet34-log.txt", "w")

In [2]:
# configuration
config = {
    'train_batch_size': 200, 'val_batch_size': 200,
    'arch': 'vgg11_bn', 'pretrained': True,
    'optimizer': 'Adam', 'learning_rate': 1e-4, 'decay_lr_freq': 4e4, 'weight_decay': 1e-5,
    'resume': None,
    'start_epoch': 0, 'epochs': 10,
    'print_freq': 10, 'validate_freq': 7e4, 'save_freq': 1e3,
    'best_val_prec1': 0
}

In [3]:
import torchvision.models as models

# get dataset
print('getting dataset...')
train_dataset = get_cdiscount_dataset(offsets_csv="train_offsets.csv",
                                      images_csv="train_images.csv",
                                      bson_file_path="/mnt/data/cdiscount/train.bson",
                                      with_label=True,
                                      resize=160)
val_dataset = get_cdiscount_dataset(offsets_csv="train_offsets.csv",
                                    images_csv="val_images.csv",
                                    bson_file_path="/mnt/data/cdiscount/train.bson",
                                    with_label=True,
                                    resize=160)

# get data loader
print('getting data loader...')
train_dataloader = DataLoader(train_dataset, batch_size=config['train_batch_size'], shuffle=True, num_workers=6)
val_dataloader = DataLoader(val_dataset, batch_size=config['val_batch_size'], shuffle=True, num_workers=6)



getting dataset...
getting data loader...


In [4]:
# define base line cnn model
#model = get_vgg11_bn()
#model = models.__dict__[config['arch']](pretrained=False)
model = get_vgg11_bn()

model = assemble_model(model, -1, 12800, 5270)
model = torch.nn.DataParallel(model).cuda()
print(model)

DataParallel (
  (module): AssembledModel (
    (model): Sequential (
      (0): Sequential (
        (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True)
        (2): ReLU (inplace)
        (3): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
        (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True)
        (6): ReLU (inplace)
        (7): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
        (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (9): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (10): ReLU (inplace)
        (11): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (12): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (13): ReLU (inplace)
        (14): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))

In [5]:
# define loss function (criterion) and optimizer
criterion = torch.nn.CrossEntropyLoss().cuda()

# get trainer
Trainer = get_trainer(train_dataloader, val_dataloader, model, criterion, config)

# Run!
Trainer.run()

start training
Epoch: [0][0/49501]	Time 12.371 (12.371)	Data 5.029 (5.029)	Loss 8.7455 (8.7455)	Prec@1 0.000 (0.000)	Prec@5 0.500 (0.500)
Epoch: [0][10/49501]	Time 0.470 (1.553)	Data 0.027 (0.480)	Loss 8.2420 (8.2587)	Prec@1 0.500 (0.955)	Prec@5 4.500 (3.318)
Epoch: [0][20/49501]	Time 0.464 (1.047)	Data 0.044 (0.266)	Loss 7.7987 (8.1182)	Prec@1 4.000 (1.929)	Prec@5 12.500 (5.881)
Epoch: [0][30/49501]	Time 0.512 (0.899)	Data 0.317 (0.272)	Loss 7.4404 (7.9263)	Prec@1 6.500 (2.887)	Prec@5 11.000 (7.468)
Epoch: [0][40/49501]	Time 0.684 (0.845)	Data 0.500 (0.299)	Loss 6.8873 (7.7259)	Prec@1 7.000 (3.659)	Prec@5 13.000 (8.744)
Epoch: [0][50/49501]	Time 0.484 (0.823)	Data 0.111 (0.330)	Loss 6.8153 (7.5647)	Prec@1 8.000 (4.431)	Prec@5 14.000 (9.902)
Epoch: [0][60/49501]	Time 0.783 (0.805)	Data 0.596 (0.346)	Loss 6.5430 (7.4379)	Prec@1 10.000 (5.057)	Prec@5 22.500 (11.090)
Epoch: [0][70/49501]	Time 0.674 (0.781)	Data 0.476 (0.340)	Loss 6.6788 (7.3265)	Prec@1 10.500 (5.697)	Prec@5 17.000 (12.007

Process Process-2:
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/weiso/.local/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 40, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
Process Process-4:
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Process Process-1:
Process Process-6:
Process Process-3:
  File "/home/weiso/.local/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 40, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
Process Process-5:
  File "/home/weiso/data_science_competition/Cdiscount/src/dataset/dataset.py", line 50, in __getitem__
    img = io.imread(BytesIO(bson_img))
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing

  File "/home/weiso/.local/lib/python3.5/site-packages/skimage/io/_plugins/pil_plugin.py", line 53, in pil_to_ndarray
    im.getdata()[0]
  File "/home/weiso/.local/lib/python3.5/site-packages/PIL/Image.py", line 1216, in getdata
    self.load()
  File "/home/weiso/.local/lib/python3.5/site-packages/PIL/ImageFile.py", line 236, in load
    n, err_code = decoder.decode(b)
KeyboardInterrupt


KeyboardInterrupt: 