In [1]:
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

from datasets import WebDataset
from models import WebObjExtractionNet
from train import train_model, evaluate_model
from utils import custom_collate_fn, pkl_load, print_and_log

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
seed = 1
np.random.seed(seed)
torch.manual_seed(seed)
# torch.backends.cudnn.deterministic = True
# torch.backends.cudnn.benchmark = False

<torch._C.Generator at 0x7fbc98239110>

In [3]:
# Parameters
N_CLASSES = 4 # 0: BG, 1: Price, 2: Image, 3: Title
CLASS_NAMES = ['BG', 'Price', 'Image', 'Title']
IMG_HEIGHT = 1280 # Image assumed to have same height and width
EVAL_INTERVAL = 5 # Number of Epochs after which model is evaluated
TRAIN_DATA_DIR = '../data/web_data/train'
TEST_DATA_DIR = '../data/web_data/test'
OUTPUT_DIR = 'results'

# Hyperparameters
LEARNING_RATE = 1e-3
BACKBONE = 'resnet18'
BATCH_SIZE = 10
ROI_POOL_OUTPUT_SIZE = (7,7)
TRAINABLE_CONVNET = True
N_EPOCHS = 20

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

# NOTE: if same hyperparameter configuration is run again, previous log file will be overwritten
LOG_FILE = '%s/logs %s batch-%d roi-%d lr-%.0e.txt' % (OUTPUT_DIR, BACKBONE, BATCH_SIZE, ROI_POOL_OUTPUT_SIZE[0], LEARNING_RATE)
print('logs will be saved in \"%s\"' % (LOG_FILE))
print_and_log('Batch Size: %d' % (BATCH_SIZE), LOG_FILE, 'w')
print_and_log('RoI Pool Output Size: (%d, %d)' % ROI_POOL_OUTPUT_SIZE, LOG_FILE)
print_and_log('Learning Rate: %.0e\n' % (LEARNING_RATE), LOG_FILE)

logs will be saved in "results/logs resnet18 batch-10 roi-7 lr-1e-03.txt"
Batch Size: 10
RoI Pool Output Size: (7, 7)
Learning Rate: 1e-03



In [4]:
train_dataset = WebDataset(TRAIN_DATA_DIR)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4,
                          collate_fn=custom_collate_fn, drop_last=False)

test_dataset = WebDataset(TEST_DATA_DIR)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4,
                         collate_fn=custom_collate_fn, drop_last=False)

In [5]:
print('Train Images:', len(train_dataset))
print('Test  Images:', len(test_dataset))

Train Images: 4891
Test  Images: 1129


In [6]:
model = WebObjExtractionNet(ROI_POOL_OUTPUT_SIZE, IMG_HEIGHT, N_CLASSES, 'resnet18', TRAINABLE_CONVNET, CLASS_NAMES).to(device)

optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss(reduction='sum').to(device)

model = train_model(model, train_loader, optimizer, criterion, N_EPOCHS, device, test_loader, EVAL_INTERVAL, LOG_FILE)

Initializing WebObjExtractionNet model...
Using first few layers of Resnet18 as ConvNet Visual Feature Extractor
ConvNet Feature Map size: torch.Size([1, 64, 320, 320])
Trainable parameters: 170052
WebObjExtractionNet(
  (convnet): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
  