In [1]:
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from tqdm.notebook import tqdm
import numpy as np

In [2]:
from dataset import MultiModalDataset
from model import FoodItemTagModel

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import models, transforms

In [4]:
base_path = "/Users/santhosh.mohan/Downloads/DSCVAssessment/assignments/food_item_tag"

In [5]:
image_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}


In [6]:
def train_epoch(model, dataloader, loss_func, optim, device = 'cpu'):
    model.train()
    epoch_loss = 0
    total_size = 0
    for batch in tqdm(dataloader):
        image = batch['image'].to(device)
        price = batch['price'].to(device)
        label = batch['label'].to(device)
        text = {}
        text['input_ids'] = batch['input_ids'].to(device)
        text['attention_mask'] = batch['attention_mask'].to(device)
        text['token_type_ids'] = batch['token_type_ids'].to(device)
        optim.zero_grad()
        with torch.set_grad_enabled(True):
            output = model(image,text,price)
            loss = loss_func(output, label)
            loss.backward()
            optim.step()
        epoch_loss += loss.item() * label.size(0)
        total_size += label.size(0)
    epoch_loss = epoch_loss / total_size
    print(f"Training Loss - {epoch_loss}")

def validate_epoch(model, dataloader, loss_func, device = 'cpu'):
    model.eval()
    epoch_loss = 0
    total_size = 0
    for batch in tqdm(dataloader):
        image = batch['image'].to(device)
        price = batch['price'].to(device)
        label = batch['label'].to(device)
        text = {}
        text['input_ids'] = batch['input_ids'].to(device)
        text['attention_mask'] = batch['attention_mask'].to(device)
        text['token_type_ids'] = batch['token_type_ids'].to(device)
        with torch.set_grad_enabled(False):
            output = model(image,text,price)
            loss = loss_func(output, label)
        epoch_loss += loss.item() * label.size(0)
        total_size += label.size(0)
    epoch_loss = epoch_loss / total_size
    print(f"Training Loss - {epoch_loss}")

In [7]:
def train_image_model(device='cpu', epochs = 10):
    model = FoodItemTagModel(512,512,48)
    model = model.to(device)
    loss_func = nn.BCEWithLogitsLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[40,70, 90], gamma=0.15)
    training_dataset = MultiModalDataset(f"{base_path}/data/training_data.csv", f"{base_path}/imgs", image_transforms["train"])
    validation_dataset = MultiModalDataset(f"{base_path}/data/validation_data.csv", f"{base_path}/imgs", image_transforms["val"])


    training_dataloader = torch.utils.data.DataLoader(training_dataset, batch_size=4,
                                                 shuffle=True, num_workers=0)
    validation_dataloader = torch.utils.data.DataLoader(validation_dataset, batch_size=4,
                                                 shuffle=True, num_workers=0)#,collate_fn=lambda x: x)
    for epoch in range(epochs):
        print(f"Epoch - {epoch+1}")
        train_epoch(model, training_dataloader, loss_func, optimizer, device)
        validate_epoch(model, validation_dataloader, loss_func, device)
        scheduler.step()
    return model

In [None]:
model = train_image_model(epochs = 2)
torch.save(model.state_dict(), "model.pth")