In [2]:
import torch
from torch.utils.data import DataLoader
from dataset.dataset import get_cdiscount_dataset
from model.model import assemble_model, assemble_model_with_classifier
from model.utils import freeze_layers
from trainer.trainer import get_trainer

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# redirect print to file
# import sys
# sys.stdout = open("PyTorch-resnet34-log.txt", "w")

In [3]:
# configuration
config = {
    'train_batch_size': 256, 'val_batch_size': 256,
    'arch': 'resnet152', 'pretrained': True,
    'optimizer': 'Adam', 'learning_rate': 1e-3, 'decay_lr_freq': 4e4, 'weight_decay': 1e-5,
    'resume': None,
    'start_epoch': 0, 'epochs': 10,
    'print_freq': 10, 'validate_freq': 1e4, 'save_freq': 1e4,
    'best_val_prec1': 0
}

In [4]:
import torchvision.models as models

# get dataset
print('getting dataset...')
train_dataset = get_cdiscount_dataset(offsets_csv="train_offsets.csv",
                                      images_csv="train_images.csv",
                                      bson_file_path="/mnt/data/cdiscount/train.bson",
                                      with_label=True,
                                      resize=224)
val_dataset = get_cdiscount_dataset(offsets_csv="train_offsets.csv",
                                    images_csv="val_images.csv",
                                    bson_file_path="/mnt/data/cdiscount/train.bson",
                                    with_label=True,
                                    resize=224)

# get data loader
print('getting data loader...')
train_dataloader = DataLoader(train_dataset, batch_size=config['train_batch_size'], shuffle=True, num_workers=6)
val_dataloader = DataLoader(val_dataset, batch_size=config['val_batch_size'], shuffle=True, num_workers=6)

# define model
print("=> using pre-trained model '{}'".format(config['arch']))
model = models.__dict__[config['arch']](pretrained=config['pretrained'])

# model = torch.nn.Sequential(*list(model.children())[:-1])
# model.add_module('classifier', torch.nn.Linear(in_features=2048, out_features=5270))
# model = torch.nn.DataParallel(model).cuda()
# print(model)

freeze_layers(model, 7)

classifier_layer = [
    torch.nn.Linear(in_features=2048, out_features=5270)
]

# classifier_layer = [
#     torch.nn.Linear(in_features=2048, out_features=5270),
# ]

classifier = torch.nn.Sequential(*classifier_layer)

model = assemble_model_with_classifier(model, -1, classifier)
model = torch.nn.DataParallel(model).cuda()
print(model)

getting dataset...
getting data loader...
=> using pre-trained model 'resnet50'
0 freezing Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
1 freezing BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
2 freezing ReLU (inplace)
3 freezing MaxPool2d (size=(3, 3), stride=(2, 2), padding=(1, 1), dilation=(1, 1))
4 freezing Sequential (
  (0): Bottleneck (
    (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
    (relu): ReLU (inplace)
    (downsample): Sequential (
      (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(256, eps=1e-05, momen

In [5]:
# define loss function (criterion) and optimizer
criterion = torch.nn.CrossEntropyLoss().cuda()

# get trainer
Trainer = get_trainer(train_dataloader, val_dataloader, model, criterion, config)

# Run!
Trainer.run()

start training
Epoch: [0][0/38680]	Time 29.644 (29.644)	Data 8.393 (8.393)	Loss 8.5846 (8.5846)	Prec@1 0.000 (0.000)	Prec@5 0.000 (0.000)
Epoch: [0][10/38680]	Time 2.205 (4.676)	Data 0.013 (0.778)	Loss 6.7859 (7.4837)	Prec@1 7.812 (5.256)	Prec@5 20.312 (13.352)
Epoch: [0][20/38680]	Time 2.189 (3.503)	Data 0.021 (0.415)	Loss 6.4568 (7.1852)	Prec@1 10.156 (6.492)	Prec@5 22.656 (15.699)
Epoch: [0][30/38680]	Time 2.213 (3.087)	Data 0.012 (0.286)	Loss 6.0839 (6.9030)	Prec@1 11.719 (7.850)	Prec@5 24.609 (18.196)
Epoch: [0][40/38680]	Time 2.224 (2.873)	Data 0.015 (0.220)	Loss 6.3960 (6.7499)	Prec@1 12.891 (9.137)	Prec@5 22.656 (20.255)
Epoch: [0][50/38680]	Time 2.215 (2.744)	Data 0.015 (0.181)	Loss 5.7896 (6.5743)	Prec@1 13.672 (9.995)	Prec@5 28.906 (22.051)
Epoch: [0][60/38680]	Time 2.214 (2.658)	Data 0.012 (0.154)	Loss 5.4124 (6.4484)	Prec@1 14.844 (10.540)	Prec@5 33.594 (23.239)
Epoch: [0][70/38680]	Time 2.221 (2.597)	Data 0.016 (0.134)	Loss 5.4261 (6.3200)	Prec@1 18.359 (11.356)	Prec@5 35

Epoch: [0][650/38680]	Time 2.222 (2.276)	Data 0.014 (0.029)	Loss 3.4940 (4.4526)	Prec@1 39.062 (27.775)	Prec@5 58.594 (45.845)
Epoch: [0][660/38680]	Time 2.220 (2.275)	Data 0.016 (0.029)	Loss 3.6670 (4.4413)	Prec@1 38.672 (27.897)	Prec@5 55.078 (45.980)
Epoch: [0][670/38680]	Time 2.231 (2.275)	Data 0.013 (0.029)	Loss 3.5576 (4.4294)	Prec@1 39.453 (28.033)	Prec@5 56.250 (46.126)
Epoch: [0][680/38680]	Time 2.226 (2.274)	Data 0.018 (0.029)	Loss 3.6043 (4.4185)	Prec@1 35.156 (28.141)	Prec@5 58.203 (46.257)
Epoch: [0][690/38680]	Time 2.246 (2.274)	Data 0.025 (0.029)	Loss 3.5067 (4.4081)	Prec@1 37.109 (28.254)	Prec@5 57.812 (46.388)
Epoch: [0][700/38680]	Time 2.233 (2.273)	Data 0.016 (0.029)	Loss 3.5294 (4.3961)	Prec@1 36.719 (28.381)	Prec@5 54.297 (46.518)
Epoch: [0][710/38680]	Time 2.228 (2.273)	Data 0.022 (0.028)	Loss 3.7241 (4.3848)	Prec@1 33.203 (28.493)	Prec@5 54.688 (46.646)
Epoch: [0][720/38680]	Time 2.252 (2.272)	Data 0.015 (0.028)	Loss 3.2291 (4.3729)	Prec@1 39.453 (28.617)	Prec@5 

Process Process-4:
KeyboardInterrupt
Process Process-3:
Process Process-2:
Process Process-5:
Process Process-6:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Process Process-1:
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, 

KeyboardInterrupt: 

  File "/usr/lib/python3.5/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/usr/lib/python3.5/multiprocessing/queues.py", line 343, in get
    res = self._reader.recv_bytes()
KeyboardInterrupt
  File "/home/weiso/.local/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
  File "/home/weiso/.local/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 40, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/usr/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rlock:
  File "/usr/lib/python3.5/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/usr/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rlock:
  File "/home/weiso/data_science_competition/Cdiscount/src/dataset/dataset.py", line 53, in __getitem__
    img = self.transform(img)
