# Finetune Pix2Struct model on Synthetic Bootstrap dataset

## Setup Envirnoment

In [3]:
pip install transformers==4.36.2

Defaulting to user installation because normal site-packages is not writeable
Collecting tokenizers<0.19,>=0.14 (from transformers==4.36.2)
  Using cached tokenizers-0.15.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
Installing collected packages: tokenizers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.21.0
    Uninstalling tokenizers-0.21.0:
      Successfully uninstalled tokenizers-0.21.0
Successfully installed tokenizers-0.15.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/opt/software-current/2023.06/x86_64/generic/software/Python/3.11.3-GCCcore-12.3.0/bin/python -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
# !pip install -q wandb

In [3]:
# pip install torchvision nltk wandb tqdm Pillow

## Import necessary libraries

In [1]:
# from google.colab import drive
import os
import zipfile
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import re
from transformers import Pix2StructForConditionalGeneration, AutoProcessor
import torch
from torch.nn import functional as F
from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
from pathlib import Path
from nltk import edit_distance
import numpy as np
import wandb
from tqdm import tqdm
from nltk.translate.bleu_score import corpus_bleu, sentence_bleu, SmoothingFunction
from torch.utils.data import random_split
import random

## Define variables and parameters

In [22]:
# G_DRIVE_FOLDER = '/content/drive/MyDrive/Datasets/'

# DATASET_NAME = 'synthBootstrap_mini'
# ZIP_NAME = DATASET_NAME + '.zip'
# DESTINATION_FOLDER= 'data/'
# DATASET_FOLDER = DESTINATION_FOLDER + DATASET_NAME

# HTML_FILES_FOLDER = DESTINATION_FOLDER + "html/"
# home/seyeon/data/synthBootstrap_mini/html

FOLDER_CHECKPOINTS = ''
DATASET_NAME = 'WebSight/'
# ZIP_NAME = DATASET_NAME + '.zip'
DESTINATION_FOLDER= 'data/'
DATASET_FOLDER = DESTINATION_FOLDER + DATASET_NAME
HTML_FILES_FOLDER = DATASET_FOLDER + "html/"


EXPERIMENT_NAME = "WebSight"

MAX_SENTENCE_LEN = 4096

CHUNK_LENGTH =  1024
CONTEXT_OVERLAP_LENGTH = 256

MAX_PATCHES = 512 #1024

DEBUG = False
VERBOSE = True

BATCH_SIZE = 4
NUM_WARMUP_STEPS = 1000
MAX_EPOCHS = 200
LR = 1e-4
CHECK_VAL_EVERY_N_EPOCH = 5
GRADIENT_CLIP_VAL = 1.0
ACCUMULATE_GRAD_BATCHES = 8 / BATCH_SIZE

TRAIN_SET_PERCENTAGE = 0.88
VALID_SET_PERCENTAGE = 0.02 # Use 20 for validation
# TEST_SET_PERCENTAGE is 1 - TRAIN_SET_PERCENTAGE - VALID_SET_PERCENTAGE # Use 100 for test

RANDOM_SEED = 123

LOAD_FROM_CHECKPOINT = False
LAST_CHECKPOINT_NAME = ""

In [23]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [24]:
MAX_N_CHUNKS_PER_SENTENCE = 1 + (MAX_SENTENCE_LEN - CHUNK_LENGTH) // (CHUNK_LENGTH - CONTEXT_OVERLAP_LENGTH)
print("MAX_N_CHUNKS_PER_SENTENCE", MAX_N_CHUNKS_PER_SENTENCE)

MAX_N_CHUNKS_PER_SENTENCE 5



## Load Synthetic Bootstrap Dataset

## Load Model and Processor

In [25]:
# pip install tokenizers==0.21.0

In [26]:

repo_id = "google/pix2struct-base"
processor = AutoProcessor.from_pretrained(repo_id)
model = Pix2StructForConditionalGeneration.from_pretrained(repo_id, is_encoder_decoder=True)

## Create Dataset class

### Preprocessing functions

In [27]:
def round_floats_in_text(text, precision=0):
    # match float numbers with 2 or more decimal places in the text
    pattern = r"\b\d+\.\d{2,}\b"

    def replace(match):
        float_number = float(match.group())
        return f"{float_number:.{precision}f}"

    text = re.sub(pattern, replace, text)
    return text

In [28]:
def remove_html_comments(text):
    # match html comments
    pattern = r"<!--.*?-->"

    text = re.sub(pattern, '', text, flags=re.DOTALL)
    return text

In [29]:
def preprocess_html_file(html_text):
    text_cleaned = html_text.replace('\n', ' ')
    text_cleaned_without_multiple_spaces = re.sub(r'\s+', ' ', text_cleaned)
    text_without_comments = remove_html_comments(text_cleaned_without_multiple_spaces)
    text_without_long_floats = round_floats_in_text(text_without_comments)
    return text_without_long_floats

### Find max sentence length and new unknown tokens

In [30]:
# import os

# def safe_read(filepath):
#     try:
#         with open(filepath, "r", encoding="utf-8") as f:
#             return f.read()
#     except UnicodeDecodeError:
#         try:
#             with open(filepath, "r", encoding="latin-1") as f:
#                 return f.read()
#         except Exception as e:
#             print(f"Skipping {filepath} due to decode error: {e}")
#             return None

# HTML_FILES_FOLDER = "data/new_data/html/"
# all_paths = [f for f in os.listdir(HTML_FILES_FOLDER) if f.endswith(".html")]

# max_length = 0
# tokens_to_add = set()

# for html_file in all_paths:
#     text = safe_read(os.path.join(HTML_FILES_FOLDER, html_file))
#     if text is None:
#         continue

#     processed_text = preprocess_html_file(text)
#     tokens = processor.tokenizer(processed_text).tokens()

#     tokens_to_add.update(tokens)
#     max_length = max(max_length, len(tokens))

# print(f"Max sentence length = {max_length}")

# newly_added_num = processor.tokenizer.add_tokens(list(tokens_to_add))
# print(f"Number of new tokens = {newly_added_num}")

# if newly_added_num > 0:
#     model.decoder.resize_token_embeddings(len(processor.tokenizer))


In [31]:
# import shutil
# import os

# checkpoint_path = "data/WebSight/html/.ipynb_checkpoints"
# if os.path.exists(checkpoint_path):
#     shutil.rmtree(checkpoint_path)
#     print("✅ Deleted:", checkpoint_path)
# else:
#     print("⚠️ No .ipynb_checkpoints folder found.")


In [32]:
# Find max length
all_paths = os.listdir(HTML_FILES_FOLDER)

max_length = 0

# Read text files and add new tokens to dictionary
tokens_to_add = set()
for html_file_path in all_paths:
    with open(HTML_FILES_FOLDER + html_file_path, "r") as reader:
        splitted_text = processor.tokenizer(preprocess_html_file(reader.read())).tokens()
        tokens_to_add = tokens_to_add.union(set(splitted_text))

        # Check if the current sentence has the largest number of tokens
        if len(splitted_text) > max_length:
            max_length = len(splitted_text)

print(f"Max sentence length = {max_length}")

newly_added_num = processor.tokenizer.add_tokens(list(tokens_to_add))
print(f"Number of new tokens = {newly_added_num}")

# Resize the model's token embeddings if there are new tokens
if newly_added_num > 0:
    model.decoder.resize_token_embeddings(len(processor.tokenizer))

Max sentence length = 1493
Number of new tokens = 4447


### Split files into training - validation - test sets

In [33]:
random.seed(RANDOM_SEED)
random.shuffle(sorted(all_paths))

train_len = int(TRAIN_SET_PERCENTAGE * len(all_paths))
valid_len = int(VALID_SET_PERCENTAGE * len(all_paths))

train_paths = all_paths[:train_len]
valid_paths = all_paths[train_len:train_len+valid_len]
test_paths = all_paths[train_len+valid_len:]

print(f"TRAIN_SET size = {len(train_paths)}")
print(f"VALID_SET size = {len(valid_paths)}")
print(f"TEST_SET size = {len(test_paths)}")

TRAIN_SET size = 880
VALID_SET size = 20
TEST_SET size = 100


In [34]:
class SythBootstrapTrainingDataset(Dataset):
    # This is a modification of the dataset used for validation and testing
    # In this one the sentences are already split into chunks, already having
    # the context from the previous chunk, empty chunks are discarded
    def __init__(self, root_dir, transform, text_files_paths):

        self.root_dir = root_dir
        self.transform = transform
        self.text_files_paths = text_files_paths

        self.max_patches = MAX_PATCHES
        self.max_length = MAX_SENTENCE_LEN
        self.ignore_id = -100

        self.data = []
        self.images_encoding = []

        for text_file in tqdm(text_files_paths):
            image_file = text_file.replace('.html', '.png')

            # Directly process the text files, and save them in the ram
            # Do the same also for images, if there is enough space in memory
            text_file_path = os.path.join(root_dir + "html/", text_file)
            image_file_path = os.path.join(root_dir + "images/", image_file)

            # Each data entry has the following structure
            # labels, image_encoding_idx, part

            # image_encoding_idx points to an entry of images_encoding, which contains attention_mask and flattened_patches for the image
            # Since a single image is used for multiple slices of the same text, this approach is used to save memory

            # Load image
            image = Image.open(image_file_path).convert('RGB')

            if DEBUG:
                image.show()

            if self.transform:
                image = self.transform(image)

            encoding = processor(images=image, max_patches=self.max_patches, return_tensors="pt")
            encoding = {k:v.squeeze() for k,v in encoding.items()}

            self.images_encoding.append(encoding)
            image_encoding_idx = len(self.images_encoding) - 1

            # Load text
            with open(text_file_path, 'r') as f:
                text = f.read()
                text_cleaned = preprocess_html_file(text)

            if DEBUG:
              print("text:")
              print(text)
              print("\n\n\ntext_cleaned:")
              print(text_cleaned)

            input_ids = processor.tokenizer(
                text_cleaned,
                max_length=self.max_length,
                padding="max_length",
                truncation=True,
                return_tensors="pt",
            ).input_ids

            input_ids_slices = []

            start_index = 0
            end_index = CHUNK_LENGTH
            while end_index <= MAX_SENTENCE_LEN:
                input_ids_slices.append(input_ids[:, start_index:end_index])
                start_index = end_index - CONTEXT_OVERLAP_LENGTH
                end_index = start_index + CHUNK_LENGTH

            for part, input_ids_slice in enumerate(input_ids_slices):
                labels = input_ids_slice.squeeze().clone()

                labels[labels == processor.tokenizer.pad_token_id] = self.ignore_id  # model doesn't need to predict pad token

                # Skip slices with only padding tokens, ignore context from the previous chunk
                if part != 0 and all(x == self.ignore_id for x in labels[CONTEXT_OVERLAP_LENGTH:]):
                    continue

                # labels, image_encoding_idx, part
                # Save them as int32 to save ram memory
                self.data.append((labels.to(torch.int32), image_encoding_idx, part))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        labels, image_encoding_idx, part = self.data[idx]
        encoding = self.images_encoding[image_encoding_idx]
        encoding["labels"] = labels.to(torch.int64)

        return encoding, part

In [35]:
class SythBootstrapDataset(Dataset):
    def __init__(self, root_dir, transform, text_files_paths):

        self.root_dir = root_dir
        self.transform = transform
        self.text_files_paths = text_files_paths

        self.max_patches = MAX_PATCHES
        self.max_length = MAX_SENTENCE_LEN
        self.ignore_id = -100

        self.encodings = []

        for text_file in tqdm(text_files_paths):
            image_file = text_file.replace('.html', '.png')

            # Directly process the text files, and save them in the ram
            # Do the same also for images, if there is enough space in memory
            text_file_path = os.path.join(root_dir + "html/", text_file)
            image_file_path = os.path.join(root_dir + "images/", image_file)

            # Load image
            image = Image.open(image_file_path).convert('RGB')

            if DEBUG:
                image.show()

            if self.transform:
                image = self.transform(image)

            encoding = processor(images=image, max_patches=self.max_patches, return_tensors="pt")
            encoding = {k:v.squeeze() for k,v in encoding.items()}

            # Load text
            with open(text_file_path, 'r') as f:
                text = f.read()
                text_cleaned = preprocess_html_file(text)

            if DEBUG:
              print("text:")
              print(text)
              print("\n\n\ntext_cleaned:")
              print(text_cleaned)

            input_ids = processor.tokenizer(
                text_cleaned,
                max_length=self.max_length,
                padding="max_length",
                truncation=True,
                return_tensors="pt",
            ).input_ids

            labels = input_ids.squeeze().clone()
            labels[labels == processor.tokenizer.pad_token_id] = self.ignore_id  # model doesn't need to predict pad token

            encoding["labels"] = labels.to(torch.int32)

            # For each sample save directly the encoding of both text and image
            self.encodings.append(encoding)

    def __len__(self):
        return len(self.encodings)

    def __getitem__(self, idx):
        return self.encodings[idx]

In [36]:
# Transformations for the image
transform = transforms.Compose([
    transforms.ToTensor(),  # convert PIL Image to PyTorch Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # normalize for pretrained models
])

# Instantiate the CustomDataset
train_dataset = SythBootstrapTrainingDataset(DATASET_FOLDER, transform, train_paths)
val_dataset = SythBootstrapDataset(DATASET_FOLDER, transform, valid_paths)

# Use DataLoader for batching and shuffling
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=10, shuffle=False) # Use 10 as batch for testing

100%|██████████| 880/880 [02:12<00:00,  6.63it/s]
100%|██████████| 20/20 [00:03<00:00,  6.04it/s]


In [37]:
print(f"train_dataloader size = {len(train_dataloader)}")
print(f"val_dataloader size = {len(val_dataloader)}")

train_dataloader size = 240
val_dataloader size = 2


## Training

In [38]:
START_TOKEN_ID = PAD_TOKEN_ID = processor.tokenizer.pad_token_id

### Utility functions

In [39]:
def move_to_device(data):
    if isinstance(data, (list,tuple)):
        return [move_to_device(x) for x in data]
    elif isinstance(data, dict):
        return {k: move_to_device(v) for k, v in data.items()}
    elif isinstance(data, torch.Tensor):
        return data.to(DEVICE)
    else:
        return data

In [40]:
def create_extended_attention_mask_for_decoder_with_context(input_shape, attention_mask, part):
    device = attention_mask.device
    batch_size, seq_length = input_shape
    seq_ids = torch.arange(seq_length, device=device)

    causal_mask = seq_ids[None, None, :].repeat(batch_size, seq_length, 1) <= seq_ids[None, :, None]

    # Expand part to have the same shape as the relevant slice of causal_mask
    part_expanded = part.unsqueeze(-1).unsqueeze(-1).expand(-1, seq_length, CONTEXT_OVERLAP_LENGTH)

    # Create a mask with ones where part is not zero
    context_mask = (part_expanded != 0).float()

    # Apply the context_mask to the corresponding part of causal_mask
    causal_mask[:, :, :CONTEXT_OVERLAP_LENGTH] = causal_mask[:, :, :CONTEXT_OVERLAP_LENGTH] * (1 - context_mask) + context_mask

    # in case past_key_values are used we need to add a prefix ones mask to the causal mask
    causal_mask = causal_mask.to(attention_mask.dtype)

    if causal_mask.shape[1] < attention_mask.shape[1]:
        print("!!should not enter here in my case!!")
        prefix_seq_len = attention_mask.shape[1] - causal_mask.shape[1]
        causal_mask = torch.cat(
            [
                torch.ones((batch_size, seq_length, prefix_seq_len), device=device, dtype=causal_mask.dtype),
                causal_mask,
            ],
            axis=-1,
        )

    extended_attention_mask = causal_mask[:, :, :] * attention_mask[:, None, :]
    return extended_attention_mask


In [41]:
def get_attention_mask(decoder_input_ids, part):
    decoder_attention_mask = (decoder_input_ids.ne(PAD_TOKEN_ID).float())

    # always attend on first token
    decoder_attention_mask[:, 0] = 1

    # Expand part to have the same shape as the relevant slice of decoder_attention_mask
    part_expanded = part.unsqueeze(-1).expand(-1, CONTEXT_OVERLAP_LENGTH)

    # Create a mask with ones where part is not zero
    context_mask = (part_expanded != 0).float()

    # Apply the context_mask to the corresponding part of decoder_attention_mask
    decoder_attention_mask[:, 0:CONTEXT_OVERLAP_LENGTH] = decoder_attention_mask[:, 0:CONTEXT_OVERLAP_LENGTH] * (1 - context_mask) + context_mask

    return decoder_attention_mask

In [42]:
def shift_right_modified(input_ids, decoder_starting_token_idx):

    # shift inputs to the right
    shifted_input_ids = input_ids.new_zeros(input_ids.shape)
    shifted_input_ids[..., 1:] = input_ids[..., :-1].clone()
    shifted_input_ids[..., 0] = decoder_starting_token_idx

    # replace possible -100 values in labels by `pad_token_id`
    shifted_input_ids.masked_fill_(shifted_input_ids == -100, PAD_TOKEN_ID)

    return shifted_input_ids

In [43]:
def get_decoder_input_ids(labels_chunk, start_id):
    return shift_right_modified(labels_chunk, start_id)

In [44]:
def get_decoder_input_ids_and_attention_mask(labels, part):
    decoder_input_ids = get_decoder_input_ids(labels, START_TOKEN_ID)
    decoder_attention_mask = get_attention_mask(decoder_input_ids, part)
    extended_decoder_attention_mask = create_extended_attention_mask_for_decoder_with_context(decoder_input_ids.shape, decoder_attention_mask, part)

    return decoder_input_ids, extended_decoder_attention_mask

### Main training function

In [45]:
def train_model(config, processor, model, train_dataloader, val_dataloader):
    # Extract configuration values
    lr = config.get("lr")
    max_epochs = config.get("max_epochs")
    num_warmup_steps = config.get("num_warmup_steps")

    model.to(DEVICE)

    optimizer = Adafactor(model.parameters(), scale_parameter=False, relative_step=False, lr=lr, weight_decay=1e-05)

    # Use total steps (i.e., max_epochs * length_of_train_data)
    total_steps = max_epochs * len(train_dataloader)
    scheduler = get_cosine_schedule_with_warmup(optimizer,
                                                num_warmup_steps=num_warmup_steps,
                                                num_training_steps=total_steps)

    global_step = 0  # to keep track of total steps
    epoch_start = 0

    if LOAD_FROM_CHECKPOINT:
        print("Loading model from checkpoint:", LAST_CHECKPOINT_NAME)
        checkpoint = torch.load(LAST_CHECKPOINT_NAME)
        model.resize_token_embeddings(50244) ### retrain
        model.load_state_dict(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        scheduler.load_state_dict(checkpoint["scheduler_state_dict"])
        epoch_start = checkpoint["epoch"] + 1
        global_step = checkpoint["global_step"] + 1
        wandb_run_id = checkpoint["wandb_run_id"]

        # Resume the WandB run
        wandb.init(project="Pix2Struct", name="run-" + EXPERIMENT_NAME, config=config,     resume="must", id="iaegqr6z")
    else:
        wandb.init(project="Pix2Struct", name="run-" + EXPERIMENT_NAME, config=config)

    epoch_last = epoch_start + max_epochs - 1
    for epoch in range(epoch_start, epoch_start + max_epochs):
        global_step, moving_avg_loss = training_loop(epoch, train_dataloader, model, config, optimizer, scheduler, global_step, epoch_last)

        if epoch == 0 + epoch_start or epoch == epoch_last or (epoch + 1) % config.get("check_val_every_n_epoch") == 0:
            avg_bleu_score = testing_loop(val_dataloader, model, processor, config, f"Epoch {epoch}/{epoch_last} - valid loop")

            # Save the model after each validation step
            save_checkpoint(model, optimizer, scheduler, epoch, global_step, wandb.run.id, avg_bleu_score, EXPERIMENT_NAME, FOLDER_CHECKPOINTS)

            if config.get("verbose", False):
                print(f"Moving Avg Loss: {moving_avg_loss:.3f}")
                print(f" Avg Bleu Score: {avg_bleu_score:.2f}")

            wandb.log({"moving_avg_loss": moving_avg_loss, "bleu": avg_bleu_score, **{f'lr_{i}': param_group['lr'] for i, param_group in enumerate(optimizer.param_groups)}})

    wandb.finish()

In [56]:
def training_loop(epoch, train_dataloader, model, config, optimizer, scheduler, global_step, epoch_last):


    model.train()
    train_loop = tqdm(enumerate(train_dataloader), total=len(train_dataloader), desc=f"Epoch {epoch}/{epoch_last} - train loop")

    # Extract configuration values
    accumulate_grad_batches = config.get('accumulate_grad_batches', 1)
    gradient_clip_val = config.get("gradient_clip_val")

    moving_avg_loss = 0
    alpha = 0.1 # Smoothing factor

    for step, batch in train_loop:
        encoding, part = map(move_to_device, batch)
        labels, flattened_patches, attention_mask = encoding["labels"], encoding["flattened_patches"], encoding["attention_mask"]

        decoder_input_ids, decoder_attention_mask = get_decoder_input_ids_and_attention_mask(labels, part)
        
        outputs = model(labels=labels, flattened_patches=flattened_patches, attention_mask=attention_mask, decoder_input_ids=decoder_input_ids)#, decoder_attention_mask=decoder_attention_mask)
        loss = outputs.loss
        loss.backward()

        if global_step % accumulate_grad_batches == 0 or step == len(train_dataloader) - 1:
            if gradient_clip_val:
                torch.nn.utils.clip_grad_norm_(model.parameters(), gradient_clip_val)
            optimizer.step()
            optimizer.zero_grad()

        # Update the progress bar
        train_loop.set_postfix({'loss': loss.item()}, refresh=True)

        scheduler.step()
        global_step += 1

        # Update the moving average loss
        moving_avg_loss = loss.item() if moving_avg_loss == 0 else alpha * loss.item() + (1 - alpha) * moving_avg_loss

        # Log Loss after each step
        wandb.log({"loss": loss.item()})

    return global_step, moving_avg_loss

In [57]:
# def testing_loop(testing_dataloader, model, processor, config, description):
#     model.eval()
#     bleu_scores = []

#     with torch.no_grad():
#         test_loop = tqdm(enumerate(testing_dataloader), total=len(testing_dataloader), desc=description)
#         for i, batch in test_loop:
#             encoding = move_to_device(batch)
#             labels, flattened_patches, attention_mask = encoding["labels"], encoding["flattened_patches"], encoding["attention_mask"]

#             # Initialize total_outputs with zeros
#             total_outputs = None
#             context_from_last = None

#             # Initialize a mask to track which sentences are finished
#             finished_sentences_mask = torch.zeros(flattened_patches.size(0), dtype=torch.bool, device=flattened_patches.device)

#             for iteration in range(MAX_N_CHUNKS_PER_SENTENCE):

#                 generate_args = {
#                     "flattened_patches": flattened_patches[~finished_sentences_mask],
#                     "attention_mask": attention_mask[~finished_sentences_mask],
#                     "max_new_tokens": CHUNK_LENGTH - (CONTEXT_OVERLAP_LENGTH if iteration else 0),
#                 }

#                 if iteration and context_from_last is not None:
#                     generate_args["decoder_input_ids"] = context_from_last[~finished_sentences_mask]

#                 outputs = model.generate(**generate_args)

#                 # Remove context overlap only from the second iteration onwards
#                 new_chunks = outputs if iteration == 0 else outputs[:, CONTEXT_OVERLAP_LENGTH:]

#                 if iteration == 0:
#                     total_outputs = new_chunks
#                 else:
#                     # Update total_outputs by concatenating new chunks
#                     new_chunks_with_padding_chunks = torch.full((flattened_patches.shape[0], new_chunks.shape[1]), PAD_TOKEN_ID, dtype=new_chunks.dtype, device=new_chunks.device)
#                     new_chunks_with_padding_chunks[~finished_sentences_mask] = new_chunks
#                     total_outputs = torch.cat((total_outputs, new_chunks_with_padding_chunks), dim=1)

#                 # Update the finished_sentences_mask
#                 finished_sentences_mask[~finished_sentences_mask] |= (outputs == processor.tokenizer.eos_token_id).any(dim=1)

#                 # If all sentences are finished, exit the loop
#                 if finished_sentences_mask.all():
#                     break

#                 if outputs.shape[1] < CHUNK_LENGTH:
#                     print("ERROR: !! should have already exited because all sentences reached the end!!")

#                 # -1 because it will put in front a START_TOKEN automatically
#                 context_from_last = total_outputs[:, -(CONTEXT_OVERLAP_LENGTH-1):]

#             predictions = processor.tokenizer.batch_decode(total_outputs, skip_special_tokens=True)

#             labels[labels == -100] = 0
#             answers = processor.tokenizer.batch_decode(labels, skip_special_tokens=True)

#             bleu_scores += [corpus_bleu([[answer]], [pred], smoothing_function=SmoothingFunction().method4) for pred, answer in zip(predictions, answers)]

#             avg_bleu_score = np.mean(bleu_scores)
#             test_loop.set_postfix(bleu_score=avg_bleu_score)

#             if config.get("verbose", False):
#                 for pred, answer, bleu_score in zip(predictions, answers, bleu_scores):
#                     tqdm.write(f"\nPrediction: {pred}\n    Answer: {answer}\n      Bleu: {bleu_score:.2f}")


#     return avg_bleu_score


In [58]:
def testing_loop(testing_dataloader, model, processor, config, description):
    model.eval()
    bleu_scores = []

    with torch.no_grad():
        test_loop = tqdm(enumerate(testing_dataloader), total=len(testing_dataloader), desc=description)
        for i, batch in test_loop:
            encoding = move_to_device(batch)
            labels, flattened_patches, attention_mask = encoding["labels"], encoding["flattened_patches"], encoding["attention_mask"]

            # Initialize total_outputs with zeros
            total_outputs = None
            context_from_last = None

            # Initialize a mask to track which sentences are finished
            finished_sentences_mask = torch.zeros(flattened_patches.size(0), dtype=torch.bool, device=flattened_patches.device)

            for iteration in range(MAX_N_CHUNKS_PER_SENTENCE):

                generate_args = {
                    "flattened_patches": flattened_patches[~finished_sentences_mask],
                    "attention_mask": attention_mask[~finished_sentences_mask],
                    "max_new_tokens": CHUNK_LENGTH - (CONTEXT_OVERLAP_LENGTH if iteration else 0),
                }

                if iteration and context_from_last is not None:
                    generate_args["decoder_input_ids"] = context_from_last[~finished_sentences_mask]

                outputs = model.generate(**generate_args)

                # Remove context overlap only from the second iteration onwards
                new_chunks = outputs if iteration == 0 else outputs[:, CONTEXT_OVERLAP_LENGTH:]

                if iteration == 0:
                    total_outputs = new_chunks
                else:
                    # Update total_outputs by concatenating new chunks
                    new_chunks_with_padding_chunks = torch.full((flattened_patches.shape[0], new_chunks.shape[1]), PAD_TOKEN_ID, dtype=new_chunks.dtype, device=new_chunks.device)
                    new_chunks_with_padding_chunks[~finished_sentences_mask] = new_chunks
                    total_outputs = torch.cat((total_outputs, new_chunks_with_padding_chunks), dim=1)

                # Update the finished_sentences_mask
                finished_sentences_mask[~finished_sentences_mask] |= (outputs == processor.tokenizer.eos_token_id).any(dim=1)

                # If all sentences are finished, exit the loop
                if finished_sentences_mask.all():
                    break

                if outputs.shape[1] < CHUNK_LENGTH:
                    print("ERROR: !! should have already exited because all sentences reached the end!!")

                # -1 because it will put in front a START_TOKEN automatically
                context_from_last = total_outputs[:, -(CONTEXT_OVERLAP_LENGTH-1):]

            predictions = processor.tokenizer.batch_decode(total_outputs, skip_special_tokens=True)

            labels[labels == -100] = 0
            answers = processor.tokenizer.batch_decode(labels, skip_special_tokens=True)

            bleu_scores += [corpus_bleu([[answer]], [pred], smoothing_function=SmoothingFunction().method4) for pred, answer in zip(predictions, answers)]

            avg_bleu_score = np.mean(bleu_scores)
            test_loop.set_postfix(bleu_score=avg_bleu_score)

            if config.get("verbose", False):
                for pred, answer, bleu_score in zip(predictions, answers, bleu_scores):
                    tqdm.write(f"\nPrediction: {pred}\n    Answer: {answer}\n      Bleu: {bleu_score:.2f}")


    return avg_bleu_score


In [59]:
def save_checkpoint(model, optimizer, scheduler, epoch, global_step, wandb_run_id, avg_bleu_score, experiment_name, folder_path):
    checkpoint = {
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
        "scheduler_state_dict": scheduler.state_dict(),
        "epoch": epoch,
        "global_step": global_step,
        'wandb_run_id': wandb_run_id
    }
    model_name = f"{experiment_name}_epoch[{epoch}]_bleu[{avg_bleu_score:.2f}].pth"
    torch.save(checkpoint, folder_path + model_name)


In [60]:
config = {
          "batch_size": BATCH_SIZE,
          "num_warmup_steps": NUM_WARMUP_STEPS,
          "max_epochs": MAX_EPOCHS,
          "lr": LR,
          "check_val_every_n_epoch": CHECK_VAL_EVERY_N_EPOCH,
          "gradient_clip_val": GRADIENT_CLIP_VAL,
          "accumulate_grad_batches": ACCUMULATE_GRAD_BATCHES,
          "verbose": VERBOSE,
}

In [61]:
# import os
# import glob

# # Define directories
# html_dir = "data/new_data/html"
# image_dir = "data/new_data/images"

# # Function to delete all files in a directory
# def delete_files_in_dir(directory):
#     files = glob.glob(os.path.join(directory, '*'))
#     for f in files:
#         if os.path.isfile(f):
#             os.remove(f)

# # Delete files
# delete_files_in_dir(html_dir)
# delete_files_in_dir(image_dir)

# print("All files deleted in html and images folders.")


In [62]:
def validate_config(config):
    # Check required keys
    required_keys = [
        "batch_size",
        "num_warmup_steps",
        "max_epochs",
        "lr",
        "check_val_every_n_epoch",
        "gradient_clip_val",
        "accumulate_grad_batches",
        "verbose"
    ]
    for key in required_keys:
        if key not in config:
            raise ValueError(f"Key '{key}' must be present in the configuration.")

    # Check that values are in expected ranges
    if config["batch_size"] <= 0:
        raise ValueError("batch_size must be positive.")
    if config["num_warmup_steps"] < 0:
        raise ValueError("num_warmup_steps must be non-negative.")
    if config["max_epochs"] <= 0:
        raise ValueError("max_epochs must be positive.")
    if config["lr"] <= 0:
        raise ValueError("Learning rate must be positive.")
    if config["check_val_every_n_epoch"] <= 0:
        raise ValueError("check_val_every_n_epoch must be positive.")
    if config["gradient_clip_val"] < 0:
        raise ValueError("gradient_clip_val must be non-negative.")
    if config["accumulate_grad_batches"] <= 0:
        raise ValueError("accumulate_grad_batches must be positive.")
    if not isinstance(config["verbose"], bool):
        raise ValueError("verbose must be a boolean value.")


In [63]:
validate_config(config)
print(config)

{'batch_size': 4, 'num_warmup_steps': 1000, 'max_epochs': 200, 'lr': 0.0001, 'check_val_every_n_epoch': 5, 'gradient_clip_val': 1.0, 'accumulate_grad_batches': 2.0, 'verbose': True}


In [64]:
# checkpoint = torch.load('checkpointsPix2Struct_SynthBootstrap_1000_Complete_epoch[19]_bleu[0.87].pth')
# print("Saved WandB run ID:", checkpoint['wandb_run_id'])


In [65]:
train_model(config, processor, model, train_dataloader, val_dataloader)

Epoch 0/199 - train loop: 100%|██████████| 240/240 [00:51<00:00,  4.69it/s, loss=3.05]
Epoch 0/199 - valid loop:  50%|█████     | 1/2 [04:16<04:16, 256.25s/it, bleu_score=0.00827]


Prediction: <<<img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg> <img_src=nav_bg>

Epoch 0/199 - valid loop: 100%|██████████| 2/2 [08:50<00:00, 265.09s/it, bleu_score=0.0097] 



Prediction: <<<img_src=1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

Epoch 1/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=1.32]
Epoch 2/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.71it/s, loss=1.79] 
Epoch 3/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.71it/s, loss=1.01] 
Epoch 4/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.538]
Epoch 4/199 - valid loop:  50%|█████     | 1/2 [00:59<00:59, 59.02s/it, bleu_score=0.479]


Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4"> <header class="bg-white p-4"> <h1 class="text-2xl font-bold">Real Estate Agency</h1> <p class="text-lg">This is a small house for sale.</p> </header> <main class="container mx-auto p-4"> <section class="flex justify-between items-center"> <h2 class="text-2xl font-bold">Listings</h2> </section> <section class="flex justify-between items-center"> <h2 class="text-2xl font-bold">Property For Sale</h2> <p class="text-lg">This is a small house for sale.</p> </section> <footer class="bg-white p-4"> <p>© 2022 Real Estate Agency.</p> </footer> </body> </html>
    Answer: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <div class="container mx-auto px-4"> <header class="py-4"> <h1 class="text-3xl font-bold">Real Estate Agency</h1> </header> 

Epoch 4/199 - valid loop: 100%|██████████| 2/2 [03:23<00:00, 101.67s/it, bleu_score=0.412]



Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4"> <h1 class="text-4xl font-bold">Non-profit Organization</h1> </header> <main class="container mx-auto p-4"> <section class="container mx-auto p-4"> <h2 class="text-2xl font-bold">Testimonials</h2> </section> <section class="container mx-auto p-4"> <h2 class="text-2xl font-bold">Testimonials</h2> </section> <section class="container mx-auto p-4"> <h2 class="text-2xl font-bold">Testimonials</h2> </section> <section class="container mx-auto p-4"> <h2 class="text-2xl font-bold">Testimonials</h2> </section> <section class="container mx-auto p-4"> <h2 class="text-2xl font-bold">Testimonials</h2> </section> <section class="container mx-auto p-4"> <h2 class="text-2xl font-bold">Testimonials</h2> </section> <section class="container mx-auto p-4"> <h2 class="text-2xl font-bold">Testimonials</h2> </section> <section class="co

Epoch 5/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.528]
Epoch 6/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.431]
Epoch 7/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.623]
Epoch 8/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.71it/s, loss=0.937]
Epoch 9/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.299]
Epoch 9/199 - valid loop:  50%|█████     | 1/2 [01:36<01:36, 96.20s/it, bleu_score=0.58]


Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4 flex justify-between items-center"> <img src="https://source.unsplash.com/random/100x50/?logo" alt="Logo" class="h-10"> </header> <nav class="bg-white p-4 flex justify-between items-center"> <ul class="flex space-x-4"> <li><a href="#" class="text-gray-600 hover:text-gray-800">Home</a></li> <li><a href="#" class="text-gray-600 hover:text-gray-800">About</a></li> <li><a href="#" class="text-gray-600 hover:text-gray-800">Contact</a></li> </ul> </nav> <main class="container mx-auto p-4"> <section class="flex justify-between items-center"> <h1 class="text-2xl font-bold mb-4">Listings</h1> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> </section> <section class="flex justify-between items-center"> <h2 class="text-2xl font-bold mb-4">House for Sale</h2> <p class="mb-4">This is a re

Epoch 9/199 - valid loop: 100%|██████████| 2/2 [03:12<00:00, 96.21s/it, bleu_score=0.61]



Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4 flex justify-between items-center"> <img src="https://source.unsplash.com/random/100x50/?logo" alt="Logo" class="h-10"> </header> <nav class="bg-white p-4 flex justify-between items-center"> <ul class="flex space-x-4"> <li><a href="#" class="hover:underline">Home</a></li> <li><a href="#" class="hover:underline">About</a></li> <li><a href="#" class="hover:underline">Contact</a></li> </ul> </nav> <main class="container mx-auto p-4"> <section class="flex flex-col items-center"> <h1 class="text-4xl font-bold mb-4">Non-Profit Organization</h1> </section> <section class="flex-col items-center"> <h2 class="text-2xl font-bold mb-4">Our mission is to provide a platform for individuals and organizations to create a difference in their communities. We believe in the power of collective action and strive to create a world where

Epoch 10/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.431]
Epoch 11/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.73it/s, loss=0.493]
Epoch 12/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.73it/s, loss=0.489]
Epoch 13/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.19] 
Epoch 14/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.73it/s, loss=0.278]
Epoch 14/199 - valid loop:  50%|█████     | 1/2 [01:04<01:04, 64.39s/it, bleu_score=0.611]


Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4 shadow"> <h1 class="text-2xl font-bold">Real Estate Agency</h1> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> </header> <main class="container mx-auto p-4"> <section class="mb-8"> <h2 class="text-xl font-bold">Listings</h2> <div class="grid grid-cols-3 gap-4"> <div class="bg-white p-4 shadow"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> <h2 class="text-xl font-bold">House for Sale</h2> <p class="text-gray-700">This is a beautiful house for sale.</p> </div> <div class="bg-white p-4 shadow"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> </div> </div> </section> <section class="mb-8"> <div class="bg-white p-4 shadow"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" 

Epoch 14/199 - valid loop: 100%|██████████| 2/2 [03:03<00:00, 91.87s/it, bleu_score=0.594]



Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4 flex justify-center"> <img src="https://source.unsplash.com/random/100x50/?logo" alt="Logo" class="h-10"> <h1 class="text-4xl">Nonprofit Organization</h1> <nav> <ul class="flex space-x-4"> <li><a href="#" class="hover:underline">Donate</a></li> <li><a href="#" class="hover:underline">Volunteer</a></li> <li><a href="#" class="hover:underline">Donate</a></li> </ul> </nav> <main class="p-4"> <section id="mission" class="mb-8"> <h2 class="text-2xl">Our mission is to provide a platform for individuals and organizations to create a platform to make a difference in their communities. We believe in the power of collective action and strive to create a world where everyone has the opportunity to thrive.</p> </section> <section id="testimonials" class="mb-8"> <h2 class="text-2xl">Testimonials</h2> </section> </main> </body> <

Epoch 15/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.71it/s, loss=0.288]
Epoch 16/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.372]
Epoch 17/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.126]
Epoch 18/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.299]
Epoch 19/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.278]
Epoch 19/199 - valid loop:  50%|█████     | 1/2 [01:40<01:40, 100.74s/it, bleu_score=0.61]


Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4 flex justify-between items-center"> <h1 class="text-2xl font-bold">Real Estate Agency</h1> <img src="https://source.unsplash.com/random/100x50/?logo" alt="Logo" class="h-10"> </header> <main class="container mx-auto p-4"> <section class="mb-8"> <h2 class="text-2xl font-bold mb-2">Listings</h2> <div class="grid grid-cols-3 gap-4"> <div class="bg-white p-4 rounded shadow"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full h-64 object-cover"> <h3 class="text-xl font-bold mb-2">House Title Sale</h3> <p class="text-gray-700">This is a beautiful house for sale.</p> </div> </div> </section> <section class="mb-8"> <h2 class="text-2xl font-bold mb-2">Map</h2> <div class="bg-white p-4 rounded shadow"> <img src="https://source.unsplash.com/random/300x200/?map" alt="Map" class="w-full h-64 o

Epoch 19/199 - valid loop: 100%|██████████| 2/2 [03:45<00:00, 112.52s/it, bleu_score=0.57]



Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-cover bg-center h-screen" style="background-image: url('https://source.unsplash.com/random/1600x900/?nonprofit')"> <h1 class="text-4xl text-white font-bold">Non-Profit Organization</h1> <button class="bg-white text-gray-800 px-4 py-2 rounded">Donate Now</button> </header> <main class="container mx-auto px-4 py-8"> <section class="flex flex-col md:flex-row py-8"> <div class="w-full md:w-1/2 p-4"> <h2 class="text-2xl font-bold mb-4">Our mission is to provide a platform for individuals and organizations to create a difference in their communities. We believe in the power of collective action and strive to create a world where everyone has the opportunity to thrive.</h2> </div> </section> <section class="w-full md:w-1/2 p-4"> <h2 class="text-2xl font-bold mb-4">Testimonials</h2> </div> </section> </main> <footer class="bg-gray-80

Epoch 20/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.71it/s, loss=0.356]
Epoch 21/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.312]
Epoch 22/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.215]
Epoch 23/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.258]
Epoch 24/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.305]
Epoch 24/199 - valid loop:  50%|█████     | 1/2 [01:40<01:40, 100.73s/it, bleu_score=0.601]


Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <div class="container mx-auto px-4 py-8"> <h1 class="text-3xl font-bold mb-4">Real Estate Agency</h1> <div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4"> <div class="bg-white rounded-lg shadow-md overflow-hidden"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> </div> <div class="bg-white rounded-lg shadow-md overflow-hidden"> <h2 class="text-2xl font-bold mb-4">Listings</h2> <div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4"> <div class="bg-white rounded-lg shadow-md overflow-hidden"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> <h3 class="text-xl font-bold mb-2">House for Sale</h3> <p class="text-gray-700">This is a beautiful house located in the heart of the city.</p> </div> </div> </div> </div> </div> <div class="container mx-

Epoch 24/199 - valid loop: 100%|██████████| 2/2 [03:34<00:00, 107.44s/it, bleu_score=0.59] 



Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-cover bg-center h-screen" style="background-image: url('https://source.unsplash.com/random/1600x900/?nonprofit')"> <div class="container mx-auto px-6 md:flex md:items-center md:justify-between py-4"> <div class="text-center"> <h1 class="text-5xl font-bold mb-4">Non-Profit Organization</h1> <p class="text-xl mb-8">We are a non-profit organization dedicated to making a difference in the world.</p> </div> <div class="flex flex-wrap -mx-4"> <div class="w-full md:w-1/2 px-4"> <h2 class="text-2xl font-bold mb-4">Our mission is to provide a platform for individuals and organizations to create a world where everyone has the opportunity to thrive.</h2> </div> <div class="w-full md:w-1/2 px-4"> <h2 class="text-2xl font-bold mb-4">Testimonials</h2> </div> </div> </body> </html>
    Answer: <html> <link href="https://cdn.jsdelivr.net/npm

Epoch 25/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.71it/s, loss=0.217]
Epoch 26/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.71it/s, loss=0.151]
Epoch 27/199 - train loop: 100%|██████████| 240/240 [00:53<00:00,  4.46it/s, loss=0.225] 
Epoch 28/199 - train loop: 100%|██████████| 240/240 [00:52<00:00,  4.53it/s, loss=0.261] 
Epoch 29/199 - train loop: 100%|██████████| 240/240 [01:21<00:00,  2.93it/s, loss=0.0418]
Epoch 29/199 - valid loop:  50%|█████     | 1/2 [00:55<00:55, 55.48s/it, bleu_score=0.636]


Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4"> <h1 class="text-2xl font-bold">Real Estate Agency</h1> <nav class="mt-4"> <ul class="flex space-x-4"> <li><a href="#" class="hover:underline">Home</a></li> <li><a href="#" class="hover:underline">About</a></li> <li><a href="#" class="hover:underline">Contact</a></li> </ul> </nav> </header> <main class="p-4"> <section class="mb-4"> <img src="https://source.unsplash.com/random/1200x400/?realestate" alt="Real Estate" class="w-full"> </section> <section class="mb-4"> <h2 class="text-xl font-bold mb-2">Listings</h2> <div class="grid grid-cols-2 gap-4"> <div class="bg-white p-4"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> <h3 class="text-lg font-bold mb-2">House for Sale</h3> <p>This is a beautiful house for sale.</p> </div> </div> </section> <section class="mb-4"> <h2 class

Epoch 29/199 - valid loop: 100%|██████████| 2/2 [01:47<00:00, 53.92s/it, bleu_score=0.665]



Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="w-full h-64 bg-cover bg-center" style="background-image: url('https://source.unsplash.com/random/1600x900/?nonprofit')"> <div class="container mx-auto px-6 md:px-12 relative z-10 flex items-center h-full"> <div class="w-full"> <h1 class="text-4xl font-bold mb-4">Non-Profit Organization</h1> </div> <button class="bg-white text-gray-800 font-bold rounded-full mt-4 py-2 px-8 shadow-lg">Donate Now</button> </div> </div> </header> <main class="container mx-auto px-6 md:px-12 py-12"> <p class="text-xl mb-4">Our mission is to provide a platform for individuals and organizations to make a difference in their communities. We believe in the power of collective action and strive to create a world where everyone has the opportunity to thrive.</p> </main> <footer class="bg-white"> <div class="container mx-auto px-6 md:px-12 py-12"> <h2 class

Epoch 30/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.71it/s, loss=0.158] 
Epoch 31/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0532]
Epoch 32/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.162] 
Epoch 33/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.139] 
Epoch 34/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.217] 
Epoch 34/199 - valid loop:  50%|█████     | 1/2 [00:48<00:48, 48.30s/it, bleu_score=0.684]


Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <div class="container mx-auto px-4 py-8"> <h1 class="text-3xl font-bold mb-4">Real Estate Agency</h1> <div class="grid grid-cols-3 gap-4"> <div class="bg-white rounded-lg shadow-md overflow-hidden"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> <h2 class="text-xl font-bold mb-2">House for Sale</h2> <p class="text-gray-700">This is a beautiful house for sale.</p> </div> <div class="bg-white rounded-lg shadow-md overflow-hidden"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> </div> </div> </div> <div class="container mx-auto px-4 py-8"> <h2 class="text-xl font-bold mb-2">Listings</h2> <div class="grid grid-cols-3 gap-4"> <div class="bg-white rounded-lg shadow-md overflow-hidden"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-

Epoch 34/199 - valid loop: 100%|██████████| 2/2 [01:51<00:00, 55.99s/it, bleu_score=0.687]



Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="w-full"> <img src="https://source.unsplash.com/random/1600x400/?nonprofit" alt="Nonprofit Organization" class="w-full"> <div class="flex justify-center items-center h-64 bg-red-500 text-white"> <h1 class="text-4xl">Nonprofit Organization</h1> <p class="text-xl">We are a nonprofit organization dedicated to making a difference in their communities.</p> <button class="bg-white text-red-500 px-4 py-2 rounded">Donate Now</button> </div> </header> <section class="py-8 px-4"> <h2 class="text-2xl mb-4">Our mission is to provide a platform for individuals and organizations to create support to make a difference in their communities. We believe in the power of collective action and strive to create a world where everyone has the opportunity to thrive.</p> </section> <section class="py-8 px-4"> <h2 class="text-2xl mb-4">Testimonials</h2> <

Epoch 35/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.73it/s, loss=0.226] 
Epoch 36/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.73it/s, loss=0.177] 
Epoch 37/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.73it/s, loss=0.132] 
Epoch 38/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.129] 
Epoch 39/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.101] 
Epoch 39/199 - valid loop:  50%|█████     | 1/2 [00:59<00:59, 59.16s/it, bleu_score=0.659]


Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4"> <h1 class="text-2xl font-bold">Real Estate Agency</h1> <div class="mt-4"> <img src="https://source.unsplash.com/random/1200x400/?realestate" alt="Real Estate Banner" class="w-full"> </div> </header> <main class="p-4"> <section class="mb-4"> <h2 class="text-xl font-bold mb-2">Listings</h2> <div class="grid grid-cols-3 gap-4"> <div class="bg-white p-4 rounded shadow"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> <h3 class="text-xl font-bold mb-2">House for Sale</h3> <p class="text-gray-700">This is a beautiful house for you.</p> </div> <div class="bg-white p-4 rounded shadow"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> </div> </section> </main> <footer class="bg-white p-4"> <p>© 2022 Real Estate Agency</p> </footer> </body> </h

Epoch 39/199 - valid loop: 100%|██████████| 2/2 [01:59<00:00, 59.95s/it, bleu_score=0.66] 



Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="w-full h-64 bg-cover bg-center" style="background-image: url('https://source.unsplash.com/random/1600x900/?nonprofit')"> <div class="container mx-auto px-6 py-12"> <h1 class="text-4xl font-bold text-center text-white">Nonprofit Organization</h1> </div> </header> <nav class="bg-white px-6 py-4"> <ul class="flex justify-around"> <li><a href="#" class="text-blue-500 hover:text-blue-700">Home</a></li> <li><a href="#" class="text-blue-500 hover:text-blue-700">About</a></li> <li><a href="#" class="text-blue-500 hover:text-blue-700">Get Involved</a></li> <li><a href="#" class="text-blue-500 hover:text-blue-700">Contact</a></li> </ul> </nav> <main class="max-w-screen-lg mx-auto px-6 py-12"> <p class="text-lg text-center">Our mission is to provide a platform for individuals and organizations to create opportunities to make a difference i

Epoch 40/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.73it/s, loss=0.241] 
Epoch 41/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0868]
Epoch 42/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.114] 
Epoch 43/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.123] 
Epoch 44/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.103] 
Epoch 44/199 - valid loop:  50%|█████     | 1/2 [01:02<01:02, 62.06s/it, bleu_score=0.688]


Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4"> <h1 class="text-2xl font-bold">Real Estate Agency</h1> <nav> <ul class="flex space-x-4"> <li><a href="#" class="text-gray-600 hover:text-gray-800">Home</a></li> <li><a href="#" class="text-gray-600 hover:text-gray-800">About</a></li> <li><a href="#" class="text-gray-600 hover:text-gray-800">Contact</a></li> </ul> </nav> </header> <main class="p-4"> <section class="mb-4"> <img src="https://source.unsplash.com/random/1200x400/?realestate" alt="Real Estate Banner" class="w-full h-64 object-cover"> </section> <section class="mb-4"> <h2 class="text-xl font-bold mb-2">Listings</h2> <div class="grid grid-cols-3 gap-4"> <div class="bg-white p-4 rounded shadow"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full h-64 object-cover"> <h3 class="text-xl font-bold mt-2">House for Sale</h3> <

Epoch 44/199 - valid loop: 100%|██████████| 2/2 [02:01<00:00, 60.73s/it, bleu_score=0.683]



Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-cover bg-center h-64 flex items-center justify-center" style="background-image: url('https://source.unsplash.com/random/1600x900/?nonprofit')"> <h1 class="text-4xl text-white">Non-Profit Organization</h1> <p class="text-xl text-white">We are a Non-Profit Organization</p> <button class="bg-white text-gray-800 font-bold py-2 px-4 rounded">Donate Now</button> </header> <section class="py-8"> <h2 class="text-2xl text-center">Our mission is to provide a platform for individuals and organizations to create a better world for all difference in their communities. We believe in the power of collective action and strive to create a world where everyone has the opportunity to thrive.</p> </section> <section class="py-8"> <h2 class="text-2xl text-center">Testimonials</h2> </section> </body> </html>
    Answer: <html> <link href="https://

Epoch 45/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.14]  
Epoch 46/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0862]
Epoch 47/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0752]
Epoch 48/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0935]
Epoch 49/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.112] 
Epoch 49/199 - valid loop:  50%|█████     | 1/2 [01:00<01:00, 60.81s/it, bleu_score=0.679]


Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4"> <h1 class="text-2xl font-bold">Real Estate Agency</h1> </header> <main class="flex flex-wrap -mx-4"> <div class="w-full md:w-1/2 px-4 mb-8"> <img src="https://source.unsplash.com/random/800x600/?realestate" alt="Real Estate Image" class="w-full"> </div> <section class="grid grid-cols-3 gap-4"> <div class="bg-white p-4 rounded shadow"> <h2 class="text-xl font-bold mb-2">Lovings</h2> <div class="grid grid-cols-3 gap-4"> <div class="bg-white p-4 rounded shadow"> <img src="https://source.unsplash.com/random/800x600/?house" alt="House" class="w-full"> <h3 class="text-xl font-bold mb-2">House for Sale</h3> <p class="text-gray-700 mb-2">This is a beautiful house for you.</p> </div> <div class="bg-white p-4 rounded shadow"> <img src="https://source.unsplash.com/random/800x600/?apartment" alt="Property Image" class="w-full

Epoch 49/199 - valid loop: 100%|██████████| 2/2 [01:54<00:00, 57.49s/it, bleu_score=0.685]



Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100 font-sans leading-normal tracking-normal"> <header class="w-full h-64 bg-cover bg-center" style="background-image: url('https://source.unsplash.com/random/1600x900/?people')"> <div class="container mx-auto px-6 md:px-12 relative z-10 flex items-center h-full"> <div class="w-full"> <h1 class="text-4xl font-bold text-white leading-tight">Non-Profit Organization</h1> </div> </div> </header> <main class="container mx-auto px-6 md:px-12 py-12"> <section class="my-12"> <h2 class="text-2xl font-bold mb-6">Our mission is to provide a platform for individuals and organizations to create a better world in their communities. We believe in the power of collective action and strive to create a world where everyone has the opportunity to thrive.</p> </section> <section class="my-12"> <h2 class="text-2xl font-bold mb-6">Testimonials</h2> </section> </main> </b

Epoch 50/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0845]
Epoch 51/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0528]
Epoch 52/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.112] 
Epoch 53/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.109] 
Epoch 54/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0423]
Epoch 54/199 - valid loop:  50%|█████     | 1/2 [00:56<00:56, 56.59s/it, bleu_score=0.681]


Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4"> <h1 class="text-2xl font-bold">Real Estate Agency</h1> <nav class="mt-4"> <a href="#" class="text-pink-500 hover:text-pink-700">Home</a> <a href="#" class="text-pink-500 hover:text-pink-700">Listings</a> <a href="#" class="text-pink-500 hover:text-pink-700">About</a> <a href="#" class="text-pink-500 hover:text-pink-700">Contact</a> </nav> </header> <main class="p-4"> <section class="mb-4"> <img src="https://source.unsplash.com/random/800x400/?realestate" alt="Real Estate Banner" class="w-full"> </section> <section class="mb-4"> <h2 class="text-xl font-bold mb-2">Listings</h2> <div class="grid grid-cols-3 gap-4"> <div class="bg-white p-4 rounded shadow"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> <h3 class="text-xl font-bold mb-2">House for Sale</h3> <p class="text-gray

Epoch 54/199 - valid loop: 100%|██████████| 2/2 [01:58<00:00, 59.12s/it, bleu_score=0.691]



Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-cover bg-center h-64 flex items-center justify-center" style="background-image: url('https://source.unsplash.com/random/1600x900/?nonprofit')"> <h1 class="text-4xl text-white font-bold">Nonprofit Organization</h1> <p class="text-xl text-white mt-4">We are a nonprofit organization dedicated to making a difference in their communities.</p> <button class="mt-8 bg-white text-gray-800 font-bold py-2 px-4 rounded">Donate Now</button> </header> <section class="py-8 px-4"> <h2 class="text-3xl text-center">Our mission is to provide a platform for individuals and organizations to create a positive impact to make a difference in their communities. We believe in the power of collective action and strive to create a world where everyone has the opportunity to thrive.</p> </section> <section class="py-8 px-4"> <h2 class="text-3xl text-cent

Epoch 55/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0462]
Epoch 56/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.17]  
Epoch 57/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0595]
Epoch 58/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.146] 
Epoch 59/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.176] 
Epoch 59/199 - valid loop:  50%|█████     | 1/2 [00:58<00:58, 58.88s/it, bleu_score=0.667]


Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4"> <h1 class="text-2xl font-bold">Real Estate Agency</h1> </header> <main class="p-4"> <div class="w-1/2 p-4"> <img src="https://source.unsplash.com/random/800x400/?realestate" alt="Real Estate Banner" class="w-full"> </div> <section class="mt-8"> <h2 class="text-xl font-bold mb-4">Listings</h2> <div class="grid grid-cols-3 gap-4"> <div class="bg-white p-4 rounded shadow"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> <h3 class="text-xl font-bold mb-2">House for Sale</h3> <p class="text-gray-700 mb-2">This is a beautiful house for you.</p> </div> </div> </div> </section> <section class="mt-8"> <h2 class="text-xl font-bold mb-4">FAQ</h2> <div class="grid grid-cols-3 gap-4"> <img src="https://source.unsplash.com/random/300x200/?apartment" alt="Property Image" class="w-full"> <

Epoch 59/199 - valid loop: 100%|██████████| 2/2 [01:39<00:00, 49.58s/it, bleu_score=0.7]  



Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-cover bg-center h-64 flex items-center justify-center" style="background-image: url('https://source.unsplash.com/random/1600x900/?nonprofit')"> <h1 class="text-4xl text-white font-bold">Non-Profit Organization</h1> <p class="text-xl text-white mt-4">We are a non-profit organization dedicated to making a difference in the world.</p> <button class="mt-4 bg-white text-indigo-500 px-4 py-2 rounded">Donate Now</button> </header> <section class="py-8"> <h2 class="text-2xl text-center">Our mission is to provide a platform for individuals and organizations to create a better world for all difference in their communities. We believe in the power of collective action and strive to create a world where everyone has the opportunity to thrive.</p> </section> <section class="py-8"> <h2 class="text-2xl text-center">Testimonials</h2> </secti

Epoch 60/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0373]
Epoch 61/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.73it/s, loss=0.0171]
Epoch 62/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.73it/s, loss=0.0568]
Epoch 63/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.73it/s, loss=0.0385]
Epoch 64/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0315]
Epoch 64/199 - valid loop:  50%|█████     | 1/2 [01:02<01:02, 62.32s/it, bleu_score=0.662]


Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4"> <h1 class="text-2xl font-bold">Real Estate Agency</h1> </header> <main class="container mx-auto p-4"> <section class="my-8"> <h2 class="text-xl font-bold mb-4">Listings</h2> <div class="grid grid-cols-3 gap-4"> <div class="bg-white p-4 rounded shadow"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> <h3 class="text-xl font-bold mb-2">House for Sale</h3> <p class="text-gray-700">This is a beautiful house for sale.</p> </div> <div class="bg-white p-4 rounded shadow"> <img src="https://source.unsplash.com/random/300x200/?apartment" alt="Property Image" class="w-full"> <h3 class="text-xl font-bold mb-2">Apartment for Sale</h3> <p class="text-gray-700">This is a beautiful space for sale.</p> </div> </div> </section> </main> <footer class="bg-white p-4"> <p>© 2022 Real Estate Age

Epoch 64/199 - valid loop: 100%|██████████| 2/2 [02:38<00:00, 79.28s/it, bleu_score=0.655]



Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-cover bg-center h-64 flex items-center justify-center" style="background-image: url('https://source.unsplash.com/random/1600x900/?nonprofit')"> <h1 class="text-4xl text-white">Non-profit Organization</h1> <p class="text-xl text-white">We are a non-profit organization dedicated to making a difference in their communities.</p> <button class="bg-white text-gray-800 font-bold py-2 px-4 rounded">Donate Now</button> </header> <section class="p-8"> <h2 class="text-2xl text-center">Our mission is to provide a platform for individuals and organizations to create a better world where everyone has the opportunity to thrive.</p> </section> <section class="p-8"> <h2 class="text-2xl text-center">Testimonials</h2> </section> </body> </html>
    Answer: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" re

Epoch 65/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0738]
Epoch 66/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.73it/s, loss=0.0269]
Epoch 67/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.73it/s, loss=0.0347]
Epoch 68/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.73it/s, loss=0.0428]
Epoch 69/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.146] 
Epoch 69/199 - valid loop:  50%|█████     | 1/2 [01:01<01:01, 61.61s/it, bleu_score=0.686]


Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4"> <h1 class="text-2xl font-bold">Real Estate Agency</h1> </header> <main class="container mx-auto p-4"> <div class="grid grid-cols-3 gap-4"> <div> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> <h2 class="text-xl font-bold mt-4">Listings</h2> <div> <img src="https://source.unsplash.com/random/300x200/?apartment" alt="Property Image" class="w-full"> <h3 class="text-xl font-bold mt-2">Spacious Apartment for Sale</h3> <p class="mt-2">This is a beautiful apartment for sale.</p> </div> <div> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> </div> </div> </main> <footer class="bg-white p-4"> <h2 class="text-xl font-bold mt-4">About</h2> <p class="mt-2">This is a beautiful house for sale.</p> </footer> </body> </html>
    Answer: <html> <link

Epoch 69/199 - valid loop: 100%|██████████| 2/2 [02:37<00:00, 78.94s/it, bleu_score=0.684]



Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-cover bg-center h-64 flex items-center justify-center" style="background-image: url('https://source.unsplash.com/random/1600x900/?people')"> <h1 class="text-4xl text-white">Non-Profit Organization</h1> <p class="text-xl text-white mt-4"> Join Us </p> <button class="mt-4 bg-white text-gray-800 font-bold rounded-full my-6 py-4 px-8 shadow"> Donate Now </button> </header> <section class="p-8"> <h2 class="text-2xl text-center">Our mission is to provide a platform for individuals and organizations to come together to make a difference in their communities. We believe in the power of collective action and strive to create a world where everyone has the opportunity to thrive. </p> </section> <section class="p-8"> <h2 class="text-2xl text-center">Testimonials</h2> </section> </body> </html>
    Answer: <html> <link href="https://cdn.

Epoch 70/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.129] 
Epoch 71/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0978]
Epoch 72/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0654]
Epoch 73/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0758]
Epoch 74/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0658]
Epoch 74/199 - valid loop:  50%|█████     | 1/2 [00:59<00:59, 59.82s/it, bleu_score=0.656]


Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4"> <h1 class="text-2xl font-bold">Real Estate Agency</h1> <nav class="mt-4"> <ul class="flex space-x-4"> <li><a href="#" class="text-blue-500 hover:text-blue-700">Home</a></li> <li><a href="#" class="text-blue-500 hover:text-blue-700">Listings</a></li> <li><a href="#" class="text-blue-500 hover:text-blue-700">About</a></li> <li><a href="#" class="text-blue-500 hover:text-blue-700">Contact</a></li> </ul> </nav> </header> <main class="p-4"> <section class="mb-4"> <img src="https://source.unsplash.com/random/800x400/?house" alt="House" class="w-full"> <h2 class="text-xl font-bold mb-2">Listings</h2> <div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4"> <div class="bg-white p-4 rounded shadow"> <img src="https://source.unsplash.com/random/300x200/?apartment" alt="Property Image" class="w-full"> <h3 class="te

Epoch 74/199 - valid loop: 100%|██████████| 2/2 [01:38<00:00, 49.29s/it, bleu_score=0.683]



Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100 font-sans leading-normal tracking-normal"> <header class="w-full h-64 bg-cover bg-center" style="background-image: url('https://source.unsplash.com/random/1600x900/?nonprofit')"> <div class="container mx-auto px-6 py-12 flex items-center justify-between"> <h1 class="text-4xl text-white font-bold">Non-Profit Organization</h1> <p class="text-xl text-white mt-4"> Our mission is to provide a platform for individuals and organizations to make a difference in their communities. We believe in the power of collective action and strive to create a world where everyone has the opportunity to thrive. </p> </div> </header> <section class="py-12"> <div class="container mx-auto px-6"> <h2 class="text-3xl text-center">Testimonials</h2> </div> </section> <footer class="bg-white py-6"> <div class="container mx-auto px-6"> <h2 class="text-3xl text-center">Contact

Epoch 75/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.00787]
Epoch 76/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0769]
Epoch 77/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.045] 
Epoch 78/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0334]
Epoch 79/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.095] 
Epoch 79/199 - valid loop:  50%|█████     | 1/2 [01:01<01:01, 61.60s/it, bleu_score=0.65]


Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4"> <h1 class="text-3xl font-bold">Real Estate Agency</h1> <nav class="mt-4"> <ul class="flex space-x-4"> <li><a href="#" class="text-indigo-500 hover:text-indigo-700">Home</a></li> <li><a href="#" class="text-indigo-500 hover:text-indigo-700">Listings</a></li> <li><a href="#" class="text-indigo-500 hover:text-indigo-700">About</a></li> <li><a href="#" class="text-indigo-500 hover:text-indigo-700">Contact</a></li> </ul> </nav> </header> <main class="p-4"> <section class="mb-4"> <img src="https://source.unsplash.com/random/800x400/?house" alt="House" class="w-full"> <h2 class="text-xl font-bold mb-2">Listings</h2> <div class="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4"> <div class="bg-white p-4"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> <h3 class="text-xl font-b

Epoch 79/199 - valid loop: 100%|██████████| 2/2 [01:47<00:00, 53.72s/it, bleu_score=0.686]



Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="w-full h-64 bg-cover bg-center" style="background-image: url('https://source.unsplash.com/random/1600x900/?nonprofit')"> <div class="flex items-center justify-center h-full"> <h1 class="text-4xl text-white font-bold">Non-Profit Organization</h1> </div> </header> <nav class="bg-white p-4"> <ul class="flex space-x-4 justify-center"> <li><a href="#" class="text-blue-500 hover:text-blue-700">Home</a></li> <li><a href="#" class="text-blue-500 hover:text-blue-700">About</a></li> <li><a href="#" class="text-blue-500 hover:text-blue-700">Get Involved</a></li> <li><a href="#" class="text-blue-500 hover:text-blue-700">Contact</a></li> </ul> </nav> <main class="p-4"> <p class="text-center text-lg">Our mission is to provide a platform for individuals and organizations to make a difference in their communities. We believe in the power of col

Epoch 80/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0253]
Epoch 81/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.73it/s, loss=0.0469]
Epoch 82/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.73it/s, loss=0.0755]
Epoch 83/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.73it/s, loss=0.0138]
Epoch 84/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0252]
Epoch 84/199 - valid loop:  50%|█████     | 1/2 [01:00<01:00, 60.94s/it, bleu_score=0.624]


Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="bg-white p-4"> <h1 class="text-2xl font-bold">Real Estate Agency</h1> </header> <main class="container mx-auto p-4"> <div class="grid grid-cols-3 gap-4"> <div class="bg-white p-4 rounded shadow"> <img src="https://source.unsplash.com/random/300x200/?house" alt="House" class="w-full"> <h2 class="text-xl font-bold mt-2">House for Sale</h2> <p class="text-gray-700 mt-2">This is a beautiful house for you.</p> </div> <div class="bg-white p-4 rounded shadow"> <img src="https://source.unsplash.com/random/300x200/?apartment" alt="Property Image" class="w-full"> </div> </div> </main> <footer class="bg-white p-4"> <div class="container mx-auto"> <p>© 2022 Real Estate Agency. All rights reserved.</p> </div> </footer> </body> </html>
    Answer: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="styl

Epoch 84/199 - valid loop: 100%|██████████| 2/2 [02:37<00:00, 78.59s/it, bleu_score=0.644]



Prediction: <html> <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2/dist/tailwind.min.css" rel="stylesheet"> <body class="bg-gray-100"> <header class="w-full h-64 bg-cover bg-center" style="background-image: url('https://source.unsplash.com/random/1600x900/?nonprofit')"> <h1 class="text-4xl text-white">Non-Profit Organization</h1> <p class="text-xl text-white">We are a non-profit organization dedicated to making a difference in their communities.</p> <button class="bg-white text-blue-500 px-4 py-2 rounded">Donate Now</button> </header> <nav class="bg-white p-4"> <ul class="flex justify-center space-x-8"> <li><a href="#" class="text-blue-500 hover:text-blue-700">Home</a></li> <li><a href="#" class="text-blue-500 hover:text-blue-700">About</a></li> <li><a href="#" class="text-blue-500 hover:text-blue-700">Get Involved</a></li> <li><a href="#" class="text-blue-500 hover:text-blue-700">Contact</a></li> </ul> </nav> <main class="p-4"> <section class="mb-8"> <h2 class="text-2xl">Our

Epoch 85/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0219]
Epoch 86/199 - train loop: 100%|██████████| 240/240 [00:50<00:00,  4.72it/s, loss=0.0257]
Epoch 87/199 - train loop:  44%|████▍     | 106/240 [00:22<00:28,  4.71it/s, loss=0.0356]


KeyboardInterrupt: 