In [1]:
import torch
from torch.utils.data import DataLoader
from dataset.dataset import get_cdiscount_dataset
from model.model import assemble_model, assemble_model_with_classifier
from trainer.trainer import get_trainer

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# redirect print to file
# import sys
# sys.stdout = open("PyTorch-resnet34-log.txt", "w")

In [2]:
checkpoint_file = "vgg19_bn_checkpoint_epoch_0_iter_100.pth.tar"

In [3]:
#checkpoint = torch.load(checkpoint_file)
checkpoint = torch.load(checkpoint_file, map_location=lambda storage, loc: storage) # load on CPU to save some GPU memory

print('Epoch: [{0}] iter: [{1}]\t'
      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
          checkpoint['epoch'], checkpoint['iter'],
          loss=checkpoint['loss'],
          top1=checkpoint['top1'],
          top5=checkpoint['top5']))

config = checkpoint['config']
print(config)

# set to resume mode
config['resume'] = checkpoint_file

Epoch: [0] iter: [100]	Loss 5.2104 (6.5985)	Prec@1 31.000 (11.881)	Prec@5 43.000 (21.545)
{'pretrained': True, 'val_batch_size': 100, 'start_epoch': 0, 'save_freq': 100, 'train_batch_size': 100, 'learning_rate': 0.0001, 'best_val_prec1': 0, 'optimizer': 'Adam', 'validate_freq': 70000.0, 'epochs': 1, 'arch': 'vgg19_bn', 'print_freq': 10, 'resume': None, 'decay_lr_freq': 30000.0, 'weight_decay': 1e-05}


In [4]:
import torchvision.models as models

# get dataset
print('getting dataset...')
train_dataset = get_cdiscount_dataset(offsets_csv="train_offsets.csv",
                                      images_csv="train_images.csv",
                                      bson_file_path="/mnt/data/cdiscount/train.bson",
                                      with_label=True,
                                      resize=160)
val_dataset = get_cdiscount_dataset(offsets_csv="train_offsets.csv",
                                    images_csv="val_images.csv",
                                    bson_file_path="/mnt/data/cdiscount/train.bson",
                                    with_label=True,
                                    resize=160)

# get data loader
print('getting data loader...')
train_dataloader = DataLoader(train_dataset, batch_size=config['train_batch_size'], shuffle=True, num_workers=6)
val_dataloader = DataLoader(val_dataset, batch_size=config['val_batch_size'], shuffle=True, num_workers=6)

# define model
print("=> using pre-trained model '{}'".format(config['arch']))
model = models.__dict__[config['arch']](pretrained=config['pretrained'])

classifier_layers = [
    torch.nn.Linear(in_features=12800, out_features=8192),
    torch.nn.ReLU(inplace=True),
    torch.nn.Dropout(p=0.5),
    torch.nn.Linear(in_features=8192, out_features=5270),
]
classifier = torch.nn.Sequential(*classifier_layers)

model = assemble_model_with_classifier(model, -1, classifier)
model = torch.nn.DataParallel(model).cuda()
print(model)

# define loss function (criterion) and optimizer
criterion = torch.nn.CrossEntropyLoss().cuda()

# get trainer
Trainer = get_trainer(train_dataloader, val_dataloader, model, criterion, config)

# Run!
Trainer.run()

getting dataset...
getting data loader...
=> using pre-trained model 'vgg19_bn'
DataParallel (
  (module): AssembledModel (
    (model): Sequential (
      (0): Sequential (
        (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (2): ReLU (inplace)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (5): ReLU (inplace)
        (6): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
        (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
        (9): ReLU (inplace)
        (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
        (12): ReLU (inplace)
        (13): MaxPool2d (size=(2, 2), stride=(2, 2), d

Process Process-5:
Process Process-1:
Process Process-3:
Process Process-6:
Process Process-4:
Traceback (most recent call last):
Traceback (most recent call last):
Process Process-2:
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/weiso/.local/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _boot

KeyboardInterrupt: 