In [3]:
import torch
from torch.optim import Adam
from torch.utils.data import DataLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, default_data_collator, \
    DataCollatorForLanguageModeling
from datasets import load_dataset
from arguments import DataTrainingArguments, ModelArguments, TrainingArguments
from collections import defaultdict
import cv2
import numpy as np
import random
import matplotlib.pyplot as plt
from utils import color_idx, room_idx

# Initialize the tokenizer
tokenizer = T5Tokenizer.from_pretrained('t5-base', legacy=True)

# Initialize the data arguments
data_args = DataTrainingArguments(
    datasets='floorplan',
    data_dir=None,
    train_split='train',
    max_seq_length=512,
    max_output_seq_length=512,
    max_seq_length_eval=512,
    max_output_seq_length_eval=512,
    exp='full',
    boundary_in_where='Encoder'
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
model = T5ForConditionalGeneration.from_pretrained('t5-base')

In [5]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

model.to(device)

T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dro

In [6]:
from datasets import load_dataset

floorplan_dataset = load_dataset('floorplan', data_args, split='finetune',
                                 max_input_length=data_args.max_seq_length,
                                 max_output_length=data_args.max_output_seq_length,
                                 tokenizer=tokenizer, seed=1)



In [7]:
optimizer = Adam(model.parameters(), lr=5e-4, betas=(0.9, 0.999), eps=1e-08)

In [8]:
loss_fn = torch.nn.CrossEntropyLoss()

In [9]:
train_dataloader = DataLoader(floorplan_dataset, batch_size=1, shuffle=False)

In [11]:
for epoch in range(1):
    model.train()
    total_loss = 0.0
    for batch in train_dataloader:
        optimizer.zero_grad()

        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        # num_rooms = batch['num_rooms']
        # reg_labels = batch['reg_labels']
        # decoder_boundary_ids = batch['decoder_boundary_ids']
        # decoder_boundary_mask = batch['decoder_boundary_mask']
        labels = batch['labels']

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            # num_rooms=num_rooms,
            # reg_labels=reg_labels,
            # decoder_boundary_ids=decoder_boundary_ids,
            # decoder_boundary_mask=decoder_boundary_mask,
            labels=labels
        )

        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()

        # Gradient clipping
        max_grad_norm = 1.0
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)

        optimizer.step()

    avg_loss = total_loss / len(train_dataloader)
    print(f"Epoch {epoch+1}, Average Loss: {avg_loss}")

AttributeError: 'list' object has no attribute 'size'