In [10]:
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from tqdm.notebook import tqdm
import numpy as np
import json
import os
import os.path as osp

In [2]:
from dataset import MultiModalDataset
from model import FoodItemTagModel

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import models, transforms

In [4]:
# base_path = "/Users/santhosh.mohan/Downloads/DSCVAssessment/assignments/food_item_tag"
base_path = "C:\\Users\\Mercedez\\Downloads\\santhosh\\food_item_tag"

In [5]:
image_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}


In [6]:
def train_epoch(model, dataloader, loss_func, optim, device = 'cpu'):
    model.train()
    epoch_loss = 0
    total_size = 0
    for batch in tqdm(dataloader):
        image = batch['image'].to(device)
        price = batch['price'].to(device)
        label = batch['label'].to(device)
        text = {}
        text['input_ids'] = batch['input_ids'].to(device)
        text['attention_mask'] = batch['attention_mask'].to(device)
        text['token_type_ids'] = batch['token_type_ids'].to(device)
        optim.zero_grad()
        with torch.set_grad_enabled(True):
            output = model(image,text,price)
            loss = loss_func(output, label)
            loss.backward()
            optim.step()
        epoch_loss += loss.item() * label.size(0)
        total_size += label.size(0)
    epoch_loss = epoch_loss / total_size
    print(f"Training Loss - {epoch_loss}")

def validate_epoch(model, dataloader, loss_func, device = 'cpu'):
    model.eval()
    epoch_loss = 0
    total_size = 0
    for batch in tqdm(dataloader):
        image = batch['image'].to(device)
        price = batch['price'].to(device)
        label = batch['label'].to(device)
        text = {}
        text['input_ids'] = batch['input_ids'].to(device)
        text['attention_mask'] = batch['attention_mask'].to(device)
        text['token_type_ids'] = batch['token_type_ids'].to(device)
        with torch.set_grad_enabled(False):
            output = model(image,text,price)
            loss = loss_func(output, label)
        epoch_loss += loss.item() * label.size(0)
        total_size += label.size(0)
    epoch_loss = epoch_loss / total_size
    print(f"Validation Loss - {epoch_loss}")
    return epoch_loss

In [7]:
def train_image_model(device='cpu', epochs = 10):
    model = FoodItemTagModel(512,512,48)
    model = model.to(device)
    ### loading computed class weights
    with open(f"{base_path}\\data\\class_weights.json") as fh:
        line = fh.readline()
        weights = json.loads(line)
    weights = torch.tensor(weights)
    loss_func = nn.BCEWithLogitsLoss(pos_weight=weights)
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    scheduler = lr_scheduler.MultiplicativeLR(optimizer,lr_lambda=lambda epoch: 0.95)
    training_dataset = MultiModalDataset(f"{base_path}\\data\\training_data.csv", f"{base_path}\\imgs", image_transforms["train"])
    validation_dataset = MultiModalDataset(f"{base_path}\\data\\validation_data.csv", f"{base_path}\\imgs", image_transforms["val"])


    training_dataloader = torch.utils.data.DataLoader(training_dataset, batch_size=16,
                                                 shuffle=True, num_workers=0)
    validation_dataloader = torch.utils.data.DataLoader(validation_dataset, batch_size=16,
                                                 shuffle=True, num_workers=0)#,collate_fn=lambda x: x)
    
    min_score = 1000.0
    for epoch in range(epochs):
        print(f"Epoch - {epoch+1}")
        train_epoch(model, training_dataloader, loss_func, optimizer, device)
        score = validate_epoch(model, validation_dataloader, loss_func, device)
        scheduler.step()
        if score < min_score:
            torch.save(model.state_dict(), f"model_{epoch}.pth")
            if osp.exists("best_model.pth"):
                os.remove("best_model.pth")
            os.rename( f"model_{epoch}.pth","best_model.pth")
        
    return model

In [8]:
model = train_image_model(epochs = 20,device='cuda:0')
torch.save(model.state_dict(), "model.pth")

Some weights of the model checkpoint at cahya/bert-base-indonesian-522M were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_le

Epoch - 1


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.6805232460808723


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.6656399912801273
Epoch - 2


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.6439196176343281


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.5999725038794717
Epoch - 3


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.3376007866753388


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.21464361400273538
Epoch - 4


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.22024565506090146


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.21299848288269888
Epoch - 5


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.21886303296280704


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.21262183057609046
Epoch - 6


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.21793869094353763


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.21237223253693782
Epoch - 7


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.2173676663019993


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.21213314263384314
Epoch - 8


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.21688479432493474


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.2120503704017302
Epoch - 9


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.21660413775935336


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.21191274770496665
Epoch - 10


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.2164670895754074


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.21187542227584147
Epoch - 11


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.21599382504789438


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.21177278124940876
Epoch - 12


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.21558656318646102


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.21172347844150344
Epoch - 13


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.21572465636448884


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.21165336086147343
Epoch - 14


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.21522653567149097


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.21159868222785627
Epoch - 15


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.21514243894837728


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.21156774815730456
Epoch - 16


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.2152877071189783


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.21160607710315643
Epoch - 17


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.2150682495421845


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.21148670738538905
Epoch - 18


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.21465861995529148


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.21145244845886166
Epoch - 19


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.21480006675574437


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.2113949682366501
Epoch - 20


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss - 0.21447927217451387


  0%|          | 0/124 [00:00<?, ?it/s]

Training Loss - 0.21143645663458907
