In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

from datasets import WebDataset
from models import WebObjExtractionNet
from train import train_model, evaluate_model
from utils import custom_collate_fn, count_parameters, pkl_load

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
# Parameters
N_CLASSES = 4 # 0: BG, 1: Price, 2: Image, 3: Title
IMG_HEIGHT = 1280 # Image assumed to have same height and width
EVAL_INTERVAL = 5 # Number of Epochs after which model is evaluated

# Hyperparameters
BATCH_SIZE = 10
ROI_POOL_OUTPUT_SIZE = (5,5)
TRAINABLE_CONVNET = False
LEARNING_RATE = 1e-3
N_EPOCHS = 20

In [3]:
train_dataset = WebDataset('../data/web_data/train')
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4,
                          collate_fn=custom_collate_fn, drop_last=False)

test_dataset = WebDataset('../data/web_data/test')
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=4,
                         collate_fn=custom_collate_fn, drop_last=False)

In [4]:
print('Train Images:', len(train_dataset))
print('Test  Images:', len(test_dataset))

Train Images: 4891
Test  Images: 1129


In [5]:
model = WebObjExtractionNet(ROI_POOL_OUTPUT_SIZE, IMG_HEIGHT, N_CLASSES, TRAINABLE_CONVNET).to(device)
print('Trainable parameters in model:', count_parameters(model))

optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss(reduction='sum').to(device)

model = train_model(model, train_loader, optimizer, criterion, N_EPOCHS, device, test_loader, EVAL_INTERVAL)

Initializing WebObjExtractionNet model...
Using first few layers of Resnet18 as ConvNet Visual Feature Extractor
ConvNet weights Freezed
ConvNet Feature Map size: torch.Size([1, 64, 320, 320])
WebObjExtractionNet(
  (convnet): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (