In [1]:
import torch
from torch.utils.data import DataLoader
from dataset.dataset import get_cdiscount_dataset
from model.model import assemble_model, assemble_model_with_classifier
from trainer.trainer import get_trainer

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# redirect print to file
# import sys
# sys.stdout = open("PyTorch-resnet34-log.txt", "w")

In [None]:
# configuration
config = {
    'train_batch_size': 124, 'val_batch_size': 124,
    'arch': 'resnet50', 'pretrained': True,
    'optimizer': 'Adam', 'learning_rate': 1e-4, 'decay_lr_freq': 4e4, 'weight_decay': 1e-5,
    'resume': None,
    'start_epoch': 0, 'epochs': 10,
    'print_freq': 10, 'validate_freq': 7e4, 'save_freq': 1e3,
    'best_val_prec1': 0
}

In [None]:
import torchvision.models as models

# get dataset
print('getting dataset...')
train_dataset = get_cdiscount_dataset(offsets_csv="train_offsets.csv",
                                      images_csv="train_images.csv",
                                      bson_file_path="/mnt/data/cdiscount/train.bson",
                                      with_label=True,
                                      resize=224)
val_dataset = get_cdiscount_dataset(offsets_csv="train_offsets.csv",
                                    images_csv="val_images.csv",
                                    bson_file_path="/mnt/data/cdiscount/train.bson",
                                    with_label=True,
                                    resize=224)

# get data loader
print('getting data loader...')
train_dataloader = DataLoader(train_dataset, batch_size=config['train_batch_size'], shuffle=True, num_workers=6)
val_dataloader = DataLoader(val_dataset, batch_size=config['val_batch_size'], shuffle=True, num_workers=6)

# define model
print("=> using pre-trained model '{}'".format(config['arch']))
model = models.__dict__[config['arch']](pretrained=config['pretrained'])

# model = torch.nn.Sequential(*list(model.children())[:-1])
# model.add_module('classifier', torch.nn.Linear(in_features=2048, out_features=5270))
# model = torch.nn.DataParallel(model).cuda()
# print(model)

classifier_layer = [
    torch.nn.Linear(in_features=2048, out_features=5270)
]

# classifier_layer = [
#     torch.nn.Linear(in_features=2048, out_features=5270),
# ]

classifier = torch.nn.Sequential(*classifier_layer)

model = assemble_model_with_classifier(model, -1, classifier)
model = torch.nn.DataParallel(model).cuda()
print(model)

# define loss function (criterion) and optimizer
criterion = torch.nn.CrossEntropyLoss().cuda()

# get trainer
Trainer = get_trainer(train_dataloader, val_dataloader, model, criterion, config)

# Run!
Trainer.run()

getting dataset...
getting data loader...
=> using pre-trained model 'resnet50'
DataParallel (
  (module): AssembledModel (
    (model): Sequential (
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      (2): ReLU (inplace)
      (3): MaxPool2d (size=(3, 3), stride=(2, 2), padding=(1, 1), dilation=(1, 1))
      (4): Sequential (
        (0): Bottleneck (
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
          (relu): ReLU (inplace)
          (downsample): Sequent

Epoch: [0][0/79839]	Time 26.422 (26.422)	Data 5.764 (5.764)	Loss 8.5794 (8.5794)	Prec@1 0.000 (0.000)	Prec@5 0.000 (0.000)
Epoch: [0][10/79839]	Time 2.569 (4.814)	Data 0.011 (0.533)	Loss 7.8500 (8.2494)	Prec@1 5.645 (2.493)	Prec@5 8.871 (5.059)
Epoch: [0][20/79839]	Time 2.579 (3.745)	Data 0.010 (0.284)	Loss 7.1453 (7.8771)	Prec@1 8.065 (4.800)	Prec@5 11.290 (8.372)
Epoch: [0][30/79839]	Time 2.585 (3.370)	Data 0.008 (0.195)	Loss 6.9028 (7.5931)	Prec@1 4.839 (5.957)	Prec@5 17.742 (10.458)
Epoch: [0][40/79839]	Time 2.586 (3.179)	Data 0.008 (0.150)	Loss 6.8441 (7.3576)	Prec@1 13.710 (7.199)	Prec@5 21.774 (12.903)
Epoch: [0][50/79839]	Time 2.630 (3.066)	Data 0.015 (0.123)	Loss 6.2689 (7.1704)	Prec@1 11.290 (8.049)	Prec@5 25.000 (14.801)
Epoch: [0][60/79839]	Time 2.590 (2.990)	Data 0.010 (0.104)	Loss 5.9732 (7.0019)	Prec@1 12.903 (9.122)	Prec@5 29.839 (16.856)
Epoch: [0][70/79839]	Time 2.557 (2.935)	Data 0.010 (0.091)	Loss 5.8693 (6.8872)	Prec@1 18.548 (10.177)	Prec@5 29.839 (18.458)
Epoch: 

Epoch: [0][650/79839]	Time 2.629 (2.647)	Data 0.010 (0.019)	Loss 3.5960 (4.9838)	Prec@1 40.323 (26.023)	Prec@5 56.452 (40.891)
Epoch: [0][660/79839]	Time 2.574 (2.646)	Data 0.011 (0.019)	Loss 3.8096 (4.9691)	Prec@1 35.484 (26.124)	Prec@5 52.419 (41.031)
Epoch: [0][670/79839]	Time 2.614 (2.646)	Data 0.016 (0.019)	Loss 4.5350 (4.9559)	Prec@1 25.806 (26.238)	Prec@5 35.484 (41.158)
Epoch: [0][680/79839]	Time 2.631 (2.645)	Data 0.011 (0.019)	Loss 3.8440 (4.9428)	Prec@1 35.484 (26.361)	Prec@5 48.387 (41.292)
Epoch: [0][690/79839]	Time 2.618 (2.645)	Data 0.013 (0.019)	Loss 3.5308 (4.9294)	Prec@1 38.710 (26.487)	Prec@5 58.065 (41.451)
Epoch: [0][700/79839]	Time 2.602 (2.644)	Data 0.010 (0.019)	Loss 4.2384 (4.9182)	Prec@1 34.677 (26.590)	Prec@5 45.968 (41.582)
Epoch: [0][710/79839]	Time 2.615 (2.644)	Data 0.012 (0.019)	Loss 3.8509 (4.9062)	Prec@1 33.871 (26.690)	Prec@5 50.806 (41.731)
Epoch: [0][720/79839]	Time 2.618 (2.644)	Data 0.014 (0.019)	Loss 4.2388 (4.8951)	Prec@1 31.452 (26.785)	Prec@5 