In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

from datasets import WebDataset
from models import WebIENet
from train import train_model
from utils import pkl_load

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
# dataset should be in COCO format
train_dataset = WebDataset('data/web_data/train')
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=1, drop_last=False)

In [3]:
# Parameters
N_CLASSES = 4 # 0: BG, 1: Price, 2: Image, 3: Title
IMG_HEIGHT = 1440 # Image assumed to have same height and width

# Hyperparameters
ROI_POOL_OUTPUT_SIZE = (5,5)
LEARNABLE_CONVNET = False
LEARNING_RATE = 1e-3
N_EPOCHS = 20

model = WebIENet(ROI_POOL_OUTPUT_SIZE, IMG_HEIGHT, N_CLASSES, LEARNABLE_CONVNET).to(device)

optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss().to(device)

model = train_model(model, train_loader, optimizer, criterion, N_EPOCHS, device)

Initializing WebIENet model...
Using first few layers of Resnet18 as Image Feature Extractor
Input batch of images: torch.Size([1, 3, 1440, 1440])
ConvNet feature: torch.Size([1, 64, 360, 360])
RoI Pooling Spatial Scale: 0.25
Classifier 1st FC layer input features: 1600
--------------------------------------------------
Training Model for 20 epochs...
Epoch:  1	 Loss: 0.2693	 Accuracy: 85.9187
Epoch:  2	 Loss: 0.1165	 Accuracy: 87.3740
Epoch:  3	 Loss: 0.0970	 Accuracy: 87.6667
Epoch:  4	 Loss: 0.0768	 Accuracy: 87.9593
Epoch:  5	 Loss: 0.0721	 Accuracy: 88.1219
Epoch:  6	 Loss: 0.0630	 Accuracy: 88.2764
Epoch:  7	 Loss: 0.0587	 Accuracy: 88.4228
Epoch:  8	 Loss: 0.0549	 Accuracy: 88.4797
Epoch:  9	 Loss: 0.0507	 Accuracy: 88.5285
Epoch: 10	 Loss: 0.0489	 Accuracy: 88.5285
Epoch: 11	 Loss: 0.0444	 Accuracy: 88.6992
Epoch: 12	 Loss: 0.0520	 Accuracy: 88.4878
Epoch: 13	 Loss: 0.0438	 Accuracy: 88.6829
Epoch: 14	 Loss: 0.0479	 Accuracy: 88.4797
Epoch: 15	 Loss: 0.0436	 Accuracy: 88.6098
E