In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [2]:
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

from datasets import WebDataset
from models import WebObjExtractionNet
from train import train_model, evaluate_model
from utils import custom_collate_fn, pkl_load, print_and_log

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
seed = 1
np.random.seed(seed)
torch.manual_seed(seed)
# torch.backends.cudnn.deterministic = True
# torch.backends.cudnn.benchmark = False

<torch._C.Generator at 0x7f362eb0d130>

In [4]:
# Parameters
N_CLASSES = 4 # 0: BG, 1: Price, 2: Image, 3: Title
CLASS_NAMES = ['BG', 'Price', 'Image', 'Title']
IMG_HEIGHT = 1280 # Image assumed to have same height and width
EVAL_INTERVAL = 5 # Number of Epochs after which model is evaluated
TRAIN_DATA_DIR = '../data/web_data/train'
TEST_DATA_DIR = '../data/web_data/test'
OUTPUT_DIR = 'results'

# Hyperparameters
N_EPOCHS = 20
LEARNING_RATE = 1e-3
BACKBONE = 'alexnet' # 'resnet'
BATCH_SIZE = 50
ROI_POOL_OUTPUT_SIZE = (3,3)
TRAINABLE_CONVNET = True
WEIGHTED_LOSS = False

if WEIGHTED_LOSS:
    WEIGHTS = torch.Tensor([1,100,100,100]) # weight inversely proportional to number of examples for the class
    print('Weighted loss with class weights:', WEIGHTS)
else:
    WEIGHTS = torch.ones(N_CLASSES)

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

# NOTE: if same hyperparameter configuration is run again, previous log file will be overwritten
LOG_FILE = '%s/logs %s batch-%d roi-%d lr-%.0e wt_loss-%d.txt' % (OUTPUT_DIR, BACKBONE, BATCH_SIZE, ROI_POOL_OUTPUT_SIZE[0],
                                                                  LEARNING_RATE, WEIGHTED_LOSS)
print('logs will be saved in \"%s\"' % (LOG_FILE))
print_and_log('Batch Size: %d' % (BATCH_SIZE), LOG_FILE, 'w')
print_and_log('RoI Pool Output Size: (%d, %d)' % ROI_POOL_OUTPUT_SIZE, LOG_FILE)
print_and_log('Learning Rate: %.0e\n' % (LEARNING_RATE), LOG_FILE)

logs will be saved in "results/logs alexnet batch-50 roi-3 lr-1e-03 wt_loss-0.txt"
Batch Size: 50
RoI Pool Output Size: (3, 3)
Learning Rate: 1e-03



In [5]:
train_dataset = WebDataset(TRAIN_DATA_DIR)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4,
                          collate_fn=custom_collate_fn, drop_last=False)

test_dataset = WebDataset(TEST_DATA_DIR)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4,
                         collate_fn=custom_collate_fn, drop_last=False)

In [6]:
print('Train Images:', len(train_dataset))
print('Test  Images:', len(test_dataset))

Train Images: 4891
Test  Images: 1129


In [None]:
model = WebObjExtractionNet(ROI_POOL_OUTPUT_SIZE, IMG_HEIGHT, N_CLASSES, BACKBONE, TRAINABLE_CONVNET, CLASS_NAMES).to(device)

optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss(weight=WEIGHTS, reduction='sum').to(device)

model = train_model(model, train_loader, optimizer, criterion, N_EPOCHS, device, test_loader, EVAL_INTERVAL, LOG_FILE)

Initializing WebObjExtractionNet model...
Using first few layers of "alexnet" as ConvNet Visual Feature Extractor
ConvNet Feature Map size: torch.Size([1, 384, 79, 79])
Trainable parameters: 1008452
WebObjExtractionNet(
  (convnet): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (roi_pool): RoIPool(output_size=(3, 3), spatial_scale=0.06171875)
  (fc): Linear(in_features=3456, out_features=4, bias=True)
)
--------------------------------------------------
Training Model for 20 epochs...
[TRAIN]	 Epoch:  1	 Loss: 0.3520	 Accuracy: 94.00% (118.36s)
[EVAL]	 Loss: 0.1856	 Ac