In [1]:
import torch
from torch.utils.data import DataLoader, random_split
from torch import nn, optim
from torchvision import transforms
import pandas as pd

In [10]:
from train_multitask import MMNetwork

MMNetwork(
  (vfc): Linear(in_features=768, out_features=256, bias=True)
  (bigru): LSTM(768, 256, bias=False, batch_first=True)
  (mfc1): Linear(in_features=512, out_features=256, bias=True)
  (cf1): Linear(in_features=256, out_features=1, bias=True)
  (cf2): Linear(in_features=256, out_features=1, bias=True)
  (cf3): Linear(in_features=256, out_features=1, bias=True)
  (cf4): Linear(in_features=256, out_features=1, bias=True)
  (cf5): Linear(in_features=256, out_features=1, bias=True)
  (act): ReLU()
  (vdp): Dropout(p=0.2, inplace=False)
  (tdp): Dropout(p=0.2, inplace=False)
)
75 7
Epoch 1/5
----------
torch.Size([64, 256])
torch.Size([64, 768])
torch.Size([1, 256])
[1,     1] loss: 3.5162, Acc: 54.69
torch.Size([64, 256])
torch.Size([64, 768])
torch.Size([1, 256])
torch.Size([64, 256])
torch.Size([64, 768])
torch.Size([1, 256])
torch.Size([64, 256])
torch.Size([64, 768])
torch.Size([1, 256])
torch.Size([64, 256])
torch.Size([64, 768])
torch.Size([1, 256])
torch.Size([64, 256])
tor

AttributeError: 'Args' object has no attribute 'net'

In [None]:
from text_normalizer import preprocess
from helper_functions import get_text_processor
from transformers import BertTokenizer
from your_dataset_file import CustomDatasetFixed  # Update if filename is different

In [6]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
learning_rate = 1e-4
batch_size = 64
num_epochs = 20
val_split = 0.1
max_seq_length = 128

# Transforms and tokenizer
img_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize(text):
    enc = tokenizer.encode_plus(
        text, max_length=max_seq_length, padding='max_length',
        truncation=True, return_attention_mask=True, return_tensors='pt'
    )
    return enc['input_ids'].squeeze(0), enc['attention_mask'].squeeze(0)

# Load dataset
df = pd.read_csv("data/train.csv")
dataset = CustomDatasetFixed(df, phase="train", img_transform=img_transform,
                              preprocess=preprocess, tokenize=tokenize, max_length=max_seq_length)

# Train-val split
val_size = int(len(dataset) * val_split)
train_size = len(dataset) - val_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

# Model (ResNet out dim = 2048, BERT hidden size = 768)
model = MMNetwork(vdim=2048, tdim=768, n_cls=5).to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

FileNotFoundError: [Errno 2] No such file or directory: 'data/train.csv'

In [None]:

# Training loop
for epoch in range(num_epochs):
    if epoch % 5 == 0 and epoch > 0:
        for g in optimizer.param_groups:
            g['lr'] *= 0.5  # Halve LR every 5 epochs

    model.train()
    total_loss = 0
    for imgs, input_ids, masks, label1, label2, label3, label4, label5 in train_loader:
        imgs = imgs.to(device)
        input_ids = input_ids.to(device)
        labels = torch.stack([label1, label2, label3, label4, label5], dim=1).float().to(device)

        with torch.no_grad():
            # you may want to get text features from a BERT encoder separately
            tx = model.bigru.embedding(input_ids)  # if you use BERT elsewhere, change this

        optimizer.zero_grad()
        out1, out2, out3, out4, out5 = model(imgs, tx)
        outputs = torch.stack([out1, out2, out3, out4, out5], dim=1).squeeze(-1)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss:.4f}")

# Save model
torch.save(model.state_dict(), "trained_model.pth")
