# Finetune Pix2Struct model on Synthetic Bootstrap dataset

## Setup Envirnoment

In [3]:
pip install transformers==4.36.2

Defaulting to user installation because normal site-packages is not writeable
Collecting tokenizers<0.19,>=0.14 (from transformers==4.36.2)
  Using cached tokenizers-0.15.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
Installing collected packages: tokenizers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.21.0
    Uninstalling tokenizers-0.21.0:
      Successfully uninstalled tokenizers-0.21.0
Successfully installed tokenizers-0.15.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/opt/software-current/2023.06/x86_64/generic/software/Python/3.11.3-GCCcore-12.3.0/bin/python -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
# !pip install -q wandb

In [3]:
# pip install torchvision nltk wandb tqdm Pillow

## Import necessary libraries

In [1]:
# from google.colab import drive
import os
import zipfile
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import re
from transformers import Pix2StructForConditionalGeneration, AutoProcessor
import torch
from torch.nn import functional as F
from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
from pathlib import Path
from nltk import edit_distance
import numpy as np
import wandb
from tqdm import tqdm
from nltk.translate.bleu_score import corpus_bleu, sentence_bleu, SmoothingFunction
from torch.utils.data import random_split
import random

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


## Define variables and parameters

In [2]:
# G_DRIVE_FOLDER = '/content/drive/MyDrive/Datasets/'

# DATASET_NAME = 'synthBootstrap_mini'
# ZIP_NAME = DATASET_NAME + '.zip'
# DESTINATION_FOLDER= 'data/'
# DATASET_FOLDER = DESTINATION_FOLDER + DATASET_NAME

# HTML_FILES_FOLDER = DESTINATION_FOLDER + "html/"
# home/seyeon/data/synthBootstrap_mini/html

FOLDER_CHECKPOINTS = ''
DATASET_NAME = 'new_data/'
ZIP_NAME = DATASET_NAME + '.zip'
DESTINATION_FOLDER= 'data/'
DATASET_FOLDER = DESTINATION_FOLDER + DATASET_NAME
HTML_FILES_FOLDER = DATASET_FOLDER + "html/"


EXPERIMENT_NAME = "new_data"

MAX_SENTENCE_LEN = 4096

CHUNK_LENGTH =  1024
CONTEXT_OVERLAP_LENGTH = 256

MAX_PATCHES = 512 #1024

DEBUG = False
VERBOSE = True

BATCH_SIZE = 8
NUM_WARMUP_STEPS = 1000
MAX_EPOCHS = 200
LR = 1e-4
CHECK_VAL_EVERY_N_EPOCH = 5
GRADIENT_CLIP_VAL = 1.0
ACCUMULATE_GRAD_BATCHES = 8 / BATCH_SIZE

TRAIN_SET_PERCENTAGE = 0.88
VALID_SET_PERCENTAGE = 0.02 # Use 20 for validation
# TEST_SET_PERCENTAGE is 1 - TRAIN_SET_PERCENTAGE - VALID_SET_PERCENTAGE # Use 100 for test

RANDOM_SEED = 123

LOAD_FROM_CHECKPOINT = False
LAST_CHECKPOINT_NAME = ""

In [3]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
MAX_N_CHUNKS_PER_SENTENCE = 1 + (MAX_SENTENCE_LEN - CHUNK_LENGTH) // (CHUNK_LENGTH - CONTEXT_OVERLAP_LENGTH)
print("MAX_N_CHUNKS_PER_SENTENCE", MAX_N_CHUNKS_PER_SENTENCE)

MAX_N_CHUNKS_PER_SENTENCE 5



## Load Synthetic Bootstrap Dataset

## Load Model and Processor

In [5]:
# pip install tokenizers==0.21.0

In [6]:

repo_id = "google/pix2struct-base"
processor = AutoProcessor.from_pretrained(repo_id)
model = Pix2StructForConditionalGeneration.from_pretrained(repo_id, is_encoder_decoder=True)



## Create Dataset class

### Preprocessing functions

In [7]:
def round_floats_in_text(text, precision=0):
    # match float numbers with 2 or more decimal places in the text
    pattern = r"\b\d+\.\d{2,}\b"

    def replace(match):
        float_number = float(match.group())
        return f"{float_number:.{precision}f}"

    text = re.sub(pattern, replace, text)
    return text

In [8]:
def remove_html_comments(text):
    # match html comments
    pattern = r"<!--.*?-->"

    text = re.sub(pattern, '', text, flags=re.DOTALL)
    return text

In [9]:
def preprocess_html_file(html_text):
    text_cleaned = html_text.replace('\n', ' ')
    text_cleaned_without_multiple_spaces = re.sub(r'\s+', ' ', text_cleaned)
    text_without_comments = remove_html_comments(text_cleaned_without_multiple_spaces)
    text_without_long_floats = round_floats_in_text(text_without_comments)
    return text_without_long_floats

### Find max sentence length and new unknown tokens

In [10]:
import os

def safe_read(filepath):
    try:
        with open(filepath, "r", encoding="utf-8") as f:
            return f.read()
    except UnicodeDecodeError:
        try:
            with open(filepath, "r", encoding="latin-1") as f:
                return f.read()
        except Exception as e:
            print(f"Skipping {filepath} due to decode error: {e}")
            return None

HTML_FILES_FOLDER = "data/new_data/html/"
all_paths = [f for f in os.listdir(HTML_FILES_FOLDER) if f.endswith(".html")]

max_length = 0
tokens_to_add = set()

for html_file in all_paths:
    text = safe_read(os.path.join(HTML_FILES_FOLDER, html_file))
    if text is None:
        continue

    processed_text = preprocess_html_file(text)
    tokens = processor.tokenizer(processed_text).tokens()

    tokens_to_add.update(tokens)
    max_length = max(max_length, len(tokens))

print(f"Max sentence length = {max_length}")

newly_added_num = processor.tokenizer.add_tokens(list(tokens_to_add))
print(f"Number of new tokens = {newly_added_num}")

if newly_added_num > 0:
    model.decoder.resize_token_embeddings(len(processor.tokenizer))


Max sentence length = 1852
Number of new tokens = 9144


In [11]:
# import shutil
# from pathlib import Path

# macosx_dir = Path("data/new_data/images/__MACOSX")

# if macosx_dir.exists() and macosx_dir.is_dir():
#     shutil.rmtree(macosx_dir)
#     print(f"🗑️ Deleted: {macosx_dir}")
# else:
#     print("✅ No __MACOSX folder found.")


In [12]:
# # Find max length
# all_paths = os.listdir(HTML_FILES_FOLDER)

# max_length = 0

# # Read text files and add new tokens to dictionary
# tokens_to_add = set()
# for html_file_path in all_paths:
#     with open(HTML_FILES_FOLDER + html_file_path, "r") as reader:
#         splitted_text = processor.tokenizer(preprocess_html_file(reader.read())).tokens()
#         tokens_to_add = tokens_to_add.union(set(splitted_text))

#         # Check if the current sentence has the largest number of tokens
#         if len(splitted_text) > max_length:
#             max_length = len(splitted_text)

# print(f"Max sentence length = {max_length}")

# newly_added_num = processor.tokenizer.add_tokens(list(tokens_to_add))
# print(f"Number of new tokens = {newly_added_num}")

# # Resize the model's token embeddings if there are new tokens
# if newly_added_num > 0:
#     model.decoder.resize_token_embeddings(len(processor.tokenizer))

### Split files into training - validation - test sets

In [13]:
random.seed(RANDOM_SEED)
random.shuffle(sorted(all_paths))

train_len = int(TRAIN_SET_PERCENTAGE * len(all_paths))
valid_len = int(VALID_SET_PERCENTAGE * len(all_paths))

train_paths = all_paths[:train_len]
valid_paths = all_paths[train_len:train_len+valid_len]
test_paths = all_paths[train_len+valid_len:]

print(f"TRAIN_SET size = {len(train_paths)}")
print(f"VALID_SET size = {len(valid_paths)}")
print(f"TEST_SET size = {len(test_paths)}")

TRAIN_SET size = 880
VALID_SET size = 20
TEST_SET size = 100


In [14]:
class SythBootstrapTrainingDataset(Dataset):
    # This is a modification of the dataset used for validation and testing
    # In this one the sentences are already split into chunks, already having
    # the context from the previous chunk, empty chunks are discarded
    def __init__(self, root_dir, transform, text_files_paths):

        self.root_dir = root_dir
        self.transform = transform
        self.text_files_paths = text_files_paths

        self.max_patches = MAX_PATCHES
        self.max_length = MAX_SENTENCE_LEN
        self.ignore_id = -100

        self.data = []
        self.images_encoding = []

        for text_file in tqdm(text_files_paths):
            image_file = text_file.replace('.html', '.png')

            # Directly process the text files, and save them in the ram
            # Do the same also for images, if there is enough space in memory
            text_file_path = os.path.join(root_dir + "html/", text_file)
            image_file_path = os.path.join(root_dir + "images/", image_file)

            # Each data entry has the following structure
            # labels, image_encoding_idx, part

            # image_encoding_idx points to an entry of images_encoding, which contains attention_mask and flattened_patches for the image
            # Since a single image is used for multiple slices of the same text, this approach is used to save memory

            # Load image
            image = Image.open(image_file_path).convert('RGB')

            if DEBUG:
                image.show()

            if self.transform:
                image = self.transform(image)

            encoding = processor(images=image, max_patches=self.max_patches, return_tensors="pt")
            encoding = {k:v.squeeze() for k,v in encoding.items()}

            self.images_encoding.append(encoding)
            image_encoding_idx = len(self.images_encoding) - 1

            # Load text
            with open(text_file_path, 'r') as f:
                text = f.read()
                text_cleaned = preprocess_html_file(text)

            if DEBUG:
              print("text:")
              print(text)
              print("\n\n\ntext_cleaned:")
              print(text_cleaned)

            input_ids = processor.tokenizer(
                text_cleaned,
                max_length=self.max_length,
                padding="max_length",
                truncation=True,
                return_tensors="pt",
            ).input_ids

            input_ids_slices = []

            start_index = 0
            end_index = CHUNK_LENGTH
            while end_index <= MAX_SENTENCE_LEN:
                input_ids_slices.append(input_ids[:, start_index:end_index])
                start_index = end_index - CONTEXT_OVERLAP_LENGTH
                end_index = start_index + CHUNK_LENGTH

            for part, input_ids_slice in enumerate(input_ids_slices):
                labels = input_ids_slice.squeeze().clone()

                labels[labels == processor.tokenizer.pad_token_id] = self.ignore_id  # model doesn't need to predict pad token

                # Skip slices with only padding tokens, ignore context from the previous chunk
                if part != 0 and all(x == self.ignore_id for x in labels[CONTEXT_OVERLAP_LENGTH:]):
                    continue

                # labels, image_encoding_idx, part
                # Save them as int32 to save ram memory
                self.data.append((labels.to(torch.int32), image_encoding_idx, part))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        labels, image_encoding_idx, part = self.data[idx]
        encoding = self.images_encoding[image_encoding_idx]
        encoding["labels"] = labels.to(torch.int64)

        return encoding, part

In [15]:
class SythBootstrapDataset(Dataset):
    def __init__(self, root_dir, transform, text_files_paths):

        self.root_dir = root_dir
        self.transform = transform
        self.text_files_paths = text_files_paths

        self.max_patches = MAX_PATCHES
        self.max_length = MAX_SENTENCE_LEN
        self.ignore_id = -100

        self.encodings = []

        for text_file in tqdm(text_files_paths):
            image_file = text_file.replace('.html', '.png')

            # Directly process the text files, and save them in the ram
            # Do the same also for images, if there is enough space in memory
            text_file_path = os.path.join(root_dir + "html/", text_file)
            image_file_path = os.path.join(root_dir + "images/", image_file)

            # Load image
            image = Image.open(image_file_path).convert('RGB')

            if DEBUG:
                image.show()

            if self.transform:
                image = self.transform(image)

            encoding = processor(images=image, max_patches=self.max_patches, return_tensors="pt")
            encoding = {k:v.squeeze() for k,v in encoding.items()}

            # Load text
            with open(text_file_path, 'r') as f:
                text = f.read()
                text_cleaned = preprocess_html_file(text)

            if DEBUG:
              print("text:")
              print(text)
              print("\n\n\ntext_cleaned:")
              print(text_cleaned)

            input_ids = processor.tokenizer(
                text_cleaned,
                max_length=self.max_length,
                padding="max_length",
                truncation=True,
                return_tensors="pt",
            ).input_ids

            labels = input_ids.squeeze().clone()
            labels[labels == processor.tokenizer.pad_token_id] = self.ignore_id  # model doesn't need to predict pad token

            encoding["labels"] = labels.to(torch.int32)

            # For each sample save directly the encoding of both text and image
            self.encodings.append(encoding)

    def __len__(self):
        return len(self.encodings)

    def __getitem__(self, idx):
        return self.encodings[idx]

In [16]:
# Transformations for the image
transform = transforms.Compose([
    transforms.ToTensor(),  # convert PIL Image to PyTorch Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # normalize for pretrained models
])

# Instantiate the CustomDataset
train_dataset = SythBootstrapTrainingDataset(DATASET_FOLDER, transform, train_paths)
val_dataset = SythBootstrapDataset(DATASET_FOLDER, transform, valid_paths)

# Use DataLoader for batching and shuffling
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=10, shuffle=False) # Use 10 as batch for testing

100%|██████████| 880/880 [00:59<00:00, 14.85it/s]
100%|██████████| 20/20 [00:01<00:00, 16.84it/s]


In [17]:
print(f"train_dataloader size = {len(train_dataloader)}")
print(f"val_dataloader size = {len(val_dataloader)}")

train_dataloader size = 142
val_dataloader size = 2


## Training

In [18]:
START_TOKEN_ID = PAD_TOKEN_ID = processor.tokenizer.pad_token_id

### Utility functions

In [19]:
def move_to_device(data):
    if isinstance(data, (list,tuple)):
        return [move_to_device(x) for x in data]
    elif isinstance(data, dict):
        return {k: move_to_device(v) for k, v in data.items()}
    elif isinstance(data, torch.Tensor):
        return data.to(DEVICE)
    else:
        return data

In [20]:
def create_extended_attention_mask_for_decoder_with_context(input_shape, attention_mask, part):
    device = attention_mask.device
    batch_size, seq_length = input_shape
    seq_ids = torch.arange(seq_length, device=device)

    causal_mask = seq_ids[None, None, :].repeat(batch_size, seq_length, 1) <= seq_ids[None, :, None]

    # Expand part to have the same shape as the relevant slice of causal_mask
    part_expanded = part.unsqueeze(-1).unsqueeze(-1).expand(-1, seq_length, CONTEXT_OVERLAP_LENGTH)

    # Create a mask with ones where part is not zero
    context_mask = (part_expanded != 0).float()

    # Apply the context_mask to the corresponding part of causal_mask
    causal_mask[:, :, :CONTEXT_OVERLAP_LENGTH] = causal_mask[:, :, :CONTEXT_OVERLAP_LENGTH] * (1 - context_mask) + context_mask

    # in case past_key_values are used we need to add a prefix ones mask to the causal mask
    causal_mask = causal_mask.to(attention_mask.dtype)

    if causal_mask.shape[1] < attention_mask.shape[1]:
        print("!!should not enter here in my case!!")
        prefix_seq_len = attention_mask.shape[1] - causal_mask.shape[1]
        causal_mask = torch.cat(
            [
                torch.ones((batch_size, seq_length, prefix_seq_len), device=device, dtype=causal_mask.dtype),
                causal_mask,
            ],
            axis=-1,
        )

    extended_attention_mask = causal_mask[:, :, :] * attention_mask[:, None, :]
    return extended_attention_mask


In [21]:
def get_attention_mask(decoder_input_ids, part):
    decoder_attention_mask = (decoder_input_ids.ne(PAD_TOKEN_ID).float())

    # always attend on first token
    decoder_attention_mask[:, 0] = 1

    # Expand part to have the same shape as the relevant slice of decoder_attention_mask
    part_expanded = part.unsqueeze(-1).expand(-1, CONTEXT_OVERLAP_LENGTH)

    # Create a mask with ones where part is not zero
    context_mask = (part_expanded != 0).float()

    # Apply the context_mask to the corresponding part of decoder_attention_mask
    decoder_attention_mask[:, 0:CONTEXT_OVERLAP_LENGTH] = decoder_attention_mask[:, 0:CONTEXT_OVERLAP_LENGTH] * (1 - context_mask) + context_mask

    return decoder_attention_mask

In [22]:
def shift_right_modified(input_ids, decoder_starting_token_idx):

    # shift inputs to the right
    shifted_input_ids = input_ids.new_zeros(input_ids.shape)
    shifted_input_ids[..., 1:] = input_ids[..., :-1].clone()
    shifted_input_ids[..., 0] = decoder_starting_token_idx

    # replace possible -100 values in labels by `pad_token_id`
    shifted_input_ids.masked_fill_(shifted_input_ids == -100, PAD_TOKEN_ID)

    return shifted_input_ids

In [23]:
def get_decoder_input_ids(labels_chunk, start_id):
    return shift_right_modified(labels_chunk, start_id)

In [24]:
def get_decoder_input_ids_and_attention_mask(labels, part):
    decoder_input_ids = get_decoder_input_ids(labels, START_TOKEN_ID)
    decoder_attention_mask = get_attention_mask(decoder_input_ids, part)
    extended_decoder_attention_mask = create_extended_attention_mask_for_decoder_with_context(decoder_input_ids.shape, decoder_attention_mask, part)

    return decoder_input_ids, extended_decoder_attention_mask

### Main training function

In [25]:
def train_model(config, processor, model, train_dataloader, val_dataloader):
    # Extract configuration values
    lr = config.get("lr")
    max_epochs = config.get("max_epochs")
    num_warmup_steps = config.get("num_warmup_steps")

    model.to(DEVICE)

    optimizer = Adafactor(model.parameters(), scale_parameter=False, relative_step=False, lr=lr, weight_decay=1e-05)

    # Use total steps (i.e., max_epochs * length_of_train_data)
    total_steps = max_epochs * len(train_dataloader)
    scheduler = get_cosine_schedule_with_warmup(optimizer,
                                                num_warmup_steps=num_warmup_steps,
                                                num_training_steps=total_steps)

    global_step = 0  # to keep track of total steps
    epoch_start = 0

    if LOAD_FROM_CHECKPOINT:
        print("Loading model from checkpoint:", LAST_CHECKPOINT_NAME)
        checkpoint = torch.load(LAST_CHECKPOINT_NAME)
        model.resize_token_embeddings(50244) ### retrain
        model.load_state_dict(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        scheduler.load_state_dict(checkpoint["scheduler_state_dict"])
        epoch_start = checkpoint["epoch"] + 1
        global_step = checkpoint["global_step"] + 1
        wandb_run_id = checkpoint["wandb_run_id"]

        # Resume the WandB run
        wandb.init(project="Pix2Struct", name="run-" + EXPERIMENT_NAME, config=config,     resume="must", id="iaegqr6z")
    else:
        wandb.init(project="Pix2Struct", name="run-" + EXPERIMENT_NAME, config=config)

    epoch_last = epoch_start + max_epochs - 1
    for epoch in range(epoch_start, epoch_start + max_epochs):
        global_step, moving_avg_loss = training_loop(epoch, train_dataloader, model, config, optimizer, scheduler, global_step, epoch_last)

        if epoch == 0 + epoch_start or epoch == epoch_last or (epoch + 1) % config.get("check_val_every_n_epoch") == 0:
            avg_bleu_score = testing_loop(val_dataloader, model, processor, config, f"Epoch {epoch}/{epoch_last} - valid loop")

            # Save the model after each validation step
            save_checkpoint(model, optimizer, scheduler, epoch, global_step, wandb.run.id, avg_bleu_score, EXPERIMENT_NAME, FOLDER_CHECKPOINTS)

            if config.get("verbose", False):
                print(f"Moving Avg Loss: {moving_avg_loss:.3f}")
                print(f" Avg Bleu Score: {avg_bleu_score:.2f}")

            wandb.log({"moving_avg_loss": moving_avg_loss, "bleu": avg_bleu_score, **{f'lr_{i}': param_group['lr'] for i, param_group in enumerate(optimizer.param_groups)}})

    wandb.finish()

In [26]:
def training_loop(epoch, train_dataloader, model, config, optimizer, scheduler, global_step, epoch_last):


    model.train()
    train_loop = tqdm(enumerate(train_dataloader), total=len(train_dataloader), desc=f"Epoch {epoch}/{epoch_last} - train loop")

    # Extract configuration values
    accumulate_grad_batches = config.get('accumulate_grad_batches', 1)
    gradient_clip_val = config.get("gradient_clip_val")

    moving_avg_loss = 0
    alpha = 0.1 # Smoothing factor

    for step, batch in train_loop:
        encoding, part = map(move_to_device, batch)
        labels, flattened_patches, attention_mask = encoding["labels"], encoding["flattened_patches"], encoding["attention_mask"]

        decoder_input_ids, decoder_attention_mask = get_decoder_input_ids_and_attention_mask(labels, part)
        
        outputs = model(labels=labels, flattened_patches=flattened_patches, attention_mask=attention_mask, decoder_input_ids=decoder_input_ids, decoder_attention_mask=decoder_attention_mask)
        loss = outputs.loss
        loss.backward()

        if global_step % accumulate_grad_batches == 0 or step == len(train_dataloader) - 1:
            if gradient_clip_val:
                torch.nn.utils.clip_grad_norm_(model.parameters(), gradient_clip_val)
            optimizer.step()
            optimizer.zero_grad()

        # Update the progress bar
        train_loop.set_postfix({'loss': loss.item()}, refresh=True)

        scheduler.step()
        global_step += 1

        # Update the moving average loss
        moving_avg_loss = loss.item() if moving_avg_loss == 0 else alpha * loss.item() + (1 - alpha) * moving_avg_loss

        # Log Loss after each step
        wandb.log({"loss": loss.item()})

    return global_step, moving_avg_loss

In [27]:
# def testing_loop(testing_dataloader, model, processor, config, description):
#     model.eval()
#     bleu_scores = []

#     with torch.no_grad():
#         test_loop = tqdm(enumerate(testing_dataloader), total=len(testing_dataloader), desc=description)
#         for i, batch in test_loop:
#             encoding = move_to_device(batch)
#             labels, flattened_patches, attention_mask = encoding["labels"], encoding["flattened_patches"], encoding["attention_mask"]

#             # Initialize total_outputs with zeros
#             total_outputs = None
#             context_from_last = None

#             # Initialize a mask to track which sentences are finished
#             finished_sentences_mask = torch.zeros(flattened_patches.size(0), dtype=torch.bool, device=flattened_patches.device)

#             for iteration in range(MAX_N_CHUNKS_PER_SENTENCE):

#                 generate_args = {
#                     "flattened_patches": flattened_patches[~finished_sentences_mask],
#                     "attention_mask": attention_mask[~finished_sentences_mask],
#                     "max_new_tokens": CHUNK_LENGTH - (CONTEXT_OVERLAP_LENGTH if iteration else 0),
#                 }

#                 if iteration and context_from_last is not None:
#                     generate_args["decoder_input_ids"] = context_from_last[~finished_sentences_mask]

#                 outputs = model.generate(**generate_args)

#                 # Remove context overlap only from the second iteration onwards
#                 new_chunks = outputs if iteration == 0 else outputs[:, CONTEXT_OVERLAP_LENGTH:]

#                 if iteration == 0:
#                     total_outputs = new_chunks
#                 else:
#                     # Update total_outputs by concatenating new chunks
#                     new_chunks_with_padding_chunks = torch.full((flattened_patches.shape[0], new_chunks.shape[1]), PAD_TOKEN_ID, dtype=new_chunks.dtype, device=new_chunks.device)
#                     new_chunks_with_padding_chunks[~finished_sentences_mask] = new_chunks
#                     total_outputs = torch.cat((total_outputs, new_chunks_with_padding_chunks), dim=1)

#                 # Update the finished_sentences_mask
#                 finished_sentences_mask[~finished_sentences_mask] |= (outputs == processor.tokenizer.eos_token_id).any(dim=1)

#                 # If all sentences are finished, exit the loop
#                 if finished_sentences_mask.all():
#                     break

#                 if outputs.shape[1] < CHUNK_LENGTH:
#                     print("ERROR: !! should have already exited because all sentences reached the end!!")

#                 # -1 because it will put in front a START_TOKEN automatically
#                 context_from_last = total_outputs[:, -(CONTEXT_OVERLAP_LENGTH-1):]

#             predictions = processor.tokenizer.batch_decode(total_outputs, skip_special_tokens=True)

#             labels[labels == -100] = 0
#             answers = processor.tokenizer.batch_decode(labels, skip_special_tokens=True)

#             bleu_scores += [corpus_bleu([[answer]], [pred], smoothing_function=SmoothingFunction().method4) for pred, answer in zip(predictions, answers)]

#             avg_bleu_score = np.mean(bleu_scores)
#             test_loop.set_postfix(bleu_score=avg_bleu_score)

#             if config.get("verbose", False):
#                 for pred, answer, bleu_score in zip(predictions, answers, bleu_scores):
#                     tqdm.write(f"\nPrediction: {pred}\n    Answer: {answer}\n      Bleu: {bleu_score:.2f}")


#     return avg_bleu_score


In [28]:
def testing_loop(testing_dataloader, model, processor, config, description):
    model.eval()
    bleu_scores = []

    with torch.no_grad():
        test_loop = tqdm(enumerate(testing_dataloader), total=len(testing_dataloader), desc=description)
        for i, batch in test_loop:
            encoding = move_to_device(batch)
            labels, flattened_patches, attention_mask = encoding["labels"], encoding["flattened_patches"], encoding["attention_mask"]

            # Initialize total_outputs with zeros
            total_outputs = None
            context_from_last = None

            # Initialize a mask to track which sentences are finished
            finished_sentences_mask = torch.zeros(flattened_patches.size(0), dtype=torch.bool, device=flattened_patches.device)

            for iteration in range(MAX_N_CHUNKS_PER_SENTENCE):

                generate_args = {
                    "flattened_patches": flattened_patches[~finished_sentences_mask],
                    "attention_mask": attention_mask[~finished_sentences_mask],
                    "max_new_tokens": CHUNK_LENGTH - (CONTEXT_OVERLAP_LENGTH if iteration else 0),
                }

                if iteration and context_from_last is not None:
                    generate_args["decoder_input_ids"] = context_from_last[~finished_sentences_mask]

                outputs = model.generate(**generate_args)

                # Remove context overlap only from the second iteration onwards
                new_chunks = outputs if iteration == 0 else outputs[:, CONTEXT_OVERLAP_LENGTH:]

                if iteration == 0:
                    total_outputs = new_chunks
                else:
                    # Update total_outputs by concatenating new chunks
                    new_chunks_with_padding_chunks = torch.full((flattened_patches.shape[0], new_chunks.shape[1]), PAD_TOKEN_ID, dtype=new_chunks.dtype, device=new_chunks.device)
                    new_chunks_with_padding_chunks[~finished_sentences_mask] = new_chunks
                    total_outputs = torch.cat((total_outputs, new_chunks_with_padding_chunks), dim=1)

                # Update the finished_sentences_mask
                finished_sentences_mask[~finished_sentences_mask] |= (outputs == processor.tokenizer.eos_token_id).any(dim=1)

                # If all sentences are finished, exit the loop
                if finished_sentences_mask.all():
                    break

                if outputs.shape[1] < CHUNK_LENGTH:
                    print("ERROR: !! should have already exited because all sentences reached the end!!")

                # -1 because it will put in front a START_TOKEN automatically
                context_from_last = total_outputs[:, -(CONTEXT_OVERLAP_LENGTH-1):]

            predictions = processor.tokenizer.batch_decode(total_outputs, skip_special_tokens=True)

            labels[labels == -100] = 0
            answers = processor.tokenizer.batch_decode(labels, skip_special_tokens=True)

            bleu_scores += [corpus_bleu([[answer]], [pred], smoothing_function=SmoothingFunction().method4) for pred, answer in zip(predictions, answers)]

            avg_bleu_score = np.mean(bleu_scores)
            test_loop.set_postfix(bleu_score=avg_bleu_score)

            if config.get("verbose", False):
                for pred, answer, bleu_score in zip(predictions, answers, bleu_scores):
                    tqdm.write(f"\nPrediction: {pred}\n    Answer: {answer}\n      Bleu: {bleu_score:.2f}")


    return avg_bleu_score


In [29]:
def save_checkpoint(model, optimizer, scheduler, epoch, global_step, wandb_run_id, avg_bleu_score, experiment_name, folder_path):
    checkpoint = {
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
        "scheduler_state_dict": scheduler.state_dict(),
        "epoch": epoch,
        "global_step": global_step,
        'wandb_run_id': wandb_run_id
    }
    model_name = f"{experiment_name}_epoch[{epoch}]_bleu[{avg_bleu_score:.2f}].pth"
    torch.save(checkpoint, folder_path + model_name)


In [30]:
config = {
          "batch_size": BATCH_SIZE,
          "num_warmup_steps": NUM_WARMUP_STEPS,
          "max_epochs": MAX_EPOCHS,
          "lr": LR,
          "check_val_every_n_epoch": CHECK_VAL_EVERY_N_EPOCH,
          "gradient_clip_val": GRADIENT_CLIP_VAL,
          "accumulate_grad_batches": ACCUMULATE_GRAD_BATCHES,
          "verbose": VERBOSE,
}

In [31]:
# import os
# import glob

# # Define directories
# html_dir = "data/new_data/html"
# image_dir = "data/new_data/images"

# # Function to delete all files in a directory
# def delete_files_in_dir(directory):
#     files = glob.glob(os.path.join(directory, '*'))
#     for f in files:
#         if os.path.isfile(f):
#             os.remove(f)

# # Delete files
# delete_files_in_dir(html_dir)
# delete_files_in_dir(image_dir)

# print("All files deleted in html and images folders.")


In [32]:
def validate_config(config):
    # Check required keys
    required_keys = [
        "batch_size",
        "num_warmup_steps",
        "max_epochs",
        "lr",
        "check_val_every_n_epoch",
        "gradient_clip_val",
        "accumulate_grad_batches",
        "verbose"
    ]
    for key in required_keys:
        if key not in config:
            raise ValueError(f"Key '{key}' must be present in the configuration.")

    # Check that values are in expected ranges
    if config["batch_size"] <= 0:
        raise ValueError("batch_size must be positive.")
    if config["num_warmup_steps"] < 0:
        raise ValueError("num_warmup_steps must be non-negative.")
    if config["max_epochs"] <= 0:
        raise ValueError("max_epochs must be positive.")
    if config["lr"] <= 0:
        raise ValueError("Learning rate must be positive.")
    if config["check_val_every_n_epoch"] <= 0:
        raise ValueError("check_val_every_n_epoch must be positive.")
    if config["gradient_clip_val"] < 0:
        raise ValueError("gradient_clip_val must be non-negative.")
    if config["accumulate_grad_batches"] <= 0:
        raise ValueError("accumulate_grad_batches must be positive.")
    if not isinstance(config["verbose"], bool):
        raise ValueError("verbose must be a boolean value.")


In [33]:
validate_config(config)
print(config)

{'batch_size': 8, 'num_warmup_steps': 1000, 'max_epochs': 200, 'lr': 0.0001, 'check_val_every_n_epoch': 5, 'gradient_clip_val': 1.0, 'accumulate_grad_batches': 1.0, 'verbose': True}


In [34]:
# checkpoint = torch.load('checkpointsPix2Struct_SynthBootstrap_1000_Complete_epoch[19]_bleu[0.87].pth')
# print("Saved WandB run ID:", checkpoint['wandb_run_id'])


In [35]:
train_model(config, processor, model, train_dataloader, val_dataloader)

[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mspark711247[0m ([33mseyeon-michigan-state-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 0/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.56it/s, loss=4.02]
Epoch 0/199 - valid loop:  50%|█████     | 1/2 [04:14<04:14, 254.26s/it, bleu_score=0.00629]


Prediction: <Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Projects Research Proje

Epoch 0/199 - valid loop: 100%|██████████| 2/2 [07:47<00:00, 234.00s/it, bleu_score=0.00691]



Prediction: <> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > 

Epoch 1/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.55it/s, loss=1.76]
Epoch 2/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=1.15]
Epoch 3/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=1.09]
Epoch 4/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.865]
Epoch 4/199 - valid loop:  50%|█████     | 1/2 [04:18<04:18, 258.59s/it, bleu_score=0.0879]


Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Al Research Portfolio</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; } h1 { text-align: center; margin-bottom: 20px; } h2 { text-align: center; margin-bottom: 20px; } h3 { text-align: center; margin-bottom: 20px; }.research-projects { display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px; }.research-projects h3 { margin-bottom: 20px; }.research-projects h3 { margin-bottom: 20px; }.research-projects h3 { margin-bottom: 20px; }.research-projects h3 { margin-bottom: 20px; }.research-projects h3 { margin-bottom: 20px; }.research-projects h3 { margin-bottom: 20px; }.research-projects h3 { margin-bottom: 20px; }.research-projects h3 { margin-bottom: 20px; }.research-projects h3 { margin-bottom: 20px; }.research-projects h3 { margin-bottom: 20px; }.resear

Epoch 4/199 - valid loop: 100%|██████████| 2/2 [08:50<00:00, 265.35s/it, bleu_score=0.0811]



Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>XYZ Charity</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; } h1 { text-align: center; margin-bottom: 20px; } h2 { text-align: center; margin-bottom: 20px; } h3 { text-align: center; margin-bottom: 20px; } h3 { text-align: center; margin-bottom: 20px; }.donate-button { background-color: #333; padding: 20px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; border-radius: 5px; bord

Epoch 5/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.743]
Epoch 6/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=1.31] 
Epoch 7/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.722]
Epoch 8/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.444]
Epoch 9/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.58it/s, loss=0.364]
Epoch 9/199 - valid loop:  50%|█████     | 1/2 [02:52<02:52, 172.16s/it, bleu_score=0.274]


Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Community Forum</title> <style> body { font-family: 'Montserrat', sans-serif; background-color: #f5f5f5; color: #333; padding: 20px; } h1, h2 { color: #333; text-align: center; }.container { width: 80%; margin: auto; }.search-bar { display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px; }.search-bar input[type="text"] { width: 100%; padding: 10px 20px; border-radius: 5px; border: 1px solid #333; border-radius: 5px; cursor: pointer; }.search-bar input[type="submit"]:hover { background-color: #333; }.search-bar input[type="submit"]:hover { background-color: #333; }.search-bar input[type="submit"]:hover { background-color: #333; } </style> </head> <body> <div class="container"> <h1>Community Forum</h1> <div class="search-bar"> <input type="text" placeholder="Search..."> </div> <div class="search-bar"> <in

Epoch 9/199 - valid loop: 100%|██████████| 2/2 [05:49<00:00, 174.73s/it, bleu_score=0.267]



Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>XYZ Charity - Help Our Cause</title> <style> body { font-family: 'Montserrat', sans-serif; background-color: #f5f5f5; color: #333; padding: 20px; } h1, h2 { color: #333; text-align: center; }.donate-button { display: block; padding: 10px 20px; background-color: #f5f5f5; color: #333; border-radius: 5px; cursor: pointer; }.donate-button:hover { background-color: #333; }.donate-button:hover { background-color: #333; } </style> </head> <body> <h1>Welcome to XYZ Charity</h1> <h2>Our Cause</h2> <p>We are a non-profit organization dedicated to providing education and resources to underprivileged children in the world. Our mission is to provide resources to children in the world.</p> </body> </html>
    Answer: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0">

Epoch 10/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.628]
Epoch 11/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.535]
Epoch 12/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.476]
Epoch 13/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.638]
Epoch 14/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.648]
Epoch 14/199 - valid loop:  50%|█████     | 1/2 [04:31<04:31, 271.93s/it, bleu_score=0.0703]


Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Community Forum</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; } h1, h2 { color: #333; }.container { width: 80%; margin: auto; }.container h3 { margin-bottom: 20px; }.forum h4 { margin-bottom: 10px; }.forum p { margin-bottom: 10px; }.forum p { margin-bottom: 10px; }.forum p a { color: #333; text-decoration: none; }.forum a:hover { color: #333; }.forum a:hover { color: #333; }.forum a:hover { color: #333; }.forum a:hover { color: #333; }.forum a:hover { color: #333; }.forum a:hover { color: #333; }.forum a:hover { color: #333; }.forum a:hover { color: #333; }.forum a:hover { color: #333; }.forum a:hover { color: #333; }.forum a:hover { color: #333; }.forum a:hover { color: #333; }.forum a:hover { color: #333; }.forum a:hover { color: #333; }.forum a:hover { color: #333; }.forum a:ho

Epoch 14/199 - valid loop: 100%|██████████| 2/2 [09:03<00:00, 272.00s/it, bleu_score=0.0796]



Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>XYZ Charity</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; } h1, h2 { color: #333; }.container { width: 80%; margin: auto; }.donate-button { display: block; margin: auto; padding: 10px 20px; background-color: #333; color: #fff; border-radius: 5px; text-decoration: none; }.donate-button:hover { background-color: #333; }.donate-button:hover { background-color: #333; }.donate-button:hover { background-color: #333; }.donate-button:hover { background-color: #333; }.donate-button:hover { background-color: #333; }.donate-button:hover { background-color: #333; }.donate-button:hover { background-color: #333; }.donate-button:hover { background-color: #333; }.donate-button:hover { background-color: #333; }.donate-button:hover { background-color: #333; }.donate-button:hover { background-color:

Epoch 15/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.846]
Epoch 16/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.422]
Epoch 17/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.353]
Epoch 18/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.586]
Epoch 19/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=1]    
Epoch 19/199 - valid loop:  50%|█████     | 1/2 [02:33<02:33, 153.04s/it, bleu_score=0.327]


Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Tech Company - Your Source for Technology Processing</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; padding: 20px; } h1, h2 { color: #333; }.services { display: flex; flex-wrap: wrap; justify-content: space-between; align-items: center; margin-bottom: 20px; }.services img { width: 100%; height: auto; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); }.services h3 { margin-bottom: 10px; }.services p { margin-bottom: 10px; }.testimonials { margin-bottom: 20px; }.testimonial { margin-bottom: 10px; }.testimonial h3 { margin-bottom: 10px; }.testimonial p { margin-bottom: 10px; } </style> </head> <body> <h1>Tech Company - Your Source for Technology Processing</h1> <h2>Services</h2> <div class="services"> <div class="services"> <img src="services1.jpg" alt="Services 1"> <h3>Web D

Epoch 19/199 - valid loop: 100%|██████████| 2/2 [05:45<00:00, 172.89s/it, bleu_score=0.291]



Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>XYZ Charity - Providing Education for Children</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; padding: 20px; } h1, h2 { color: #333; }.donate-button { display: block; margin: 20px auto; padding: 10px 20px; background-color: #333; color: #fff; border: none; border-radius: 5px; cursor: pointer; transition: background-color 0.3s; }.donate-button:hover { background-color: #333; }.volunteer-section { background-color: #fff; padding: 20px; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); }.volunteer-section h3 { margin-bottom: 10px; }.volunteer-section p { margin-bottom: 10px; }.volunteer-section p:last-child { margin-bottom: 0; } </style> </head> <body> <h1>Welcome to XYZ Charity</h1> <h2>Our Mission</h2> <p>To provide education and resources to underprivileged children in dev

Epoch 20/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.455]
Epoch 21/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.473]
Epoch 22/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.268]
Epoch 23/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.291]
Epoch 24/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.404]
Epoch 24/199 - valid loop:  50%|█████     | 1/2 [01:34<01:34, 94.47s/it, bleu_score=0.507]


Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Community Forum</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; padding: 20px; } header { background-color: #007BFF; padding: 20px; text-align: center; color: #fff; }.container { width: 80%; margin: auto; }.navigation { display: flex; justify-content: space-between; align-items: center; background-color: #fff; padding: 20px; margin-bottom: 20px; }.navigation a { color: #007BFF; text-decoration: none; margin-right: 20px; }.navigation a:hover { color: #007BFF; }.navigation a:hover { color: #007BFF; }.navigation a:hover { color: #007BFF; }.navigation a:hover { color: #007BFF; }.search-bar { margin-top: 20px; padding: 10px; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); }.search-bar input[type="text"] { padding: 10px; border-radius: 5px; border: none; border-radius: 5px; bor

Epoch 24/199 - valid loop: 100%|██████████| 2/2 [03:32<00:00, 106.11s/it, bleu_score=0.448]



Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>XYZ Charity</title> <style> body { font-family: 'Montserrat', sans-serif; background-color: #f5f5f5; color: #333; } h1, h2 { color: #333; }.mission-statement { margin-bottom: 20px; }.mission-statement p { margin-bottom: 10px; }.donate-button { padding: 10px 20px; background-color: #007BFF; border: none; color: #fff; cursor: pointer; border-radius: 5px; font-size: 16px; transition: background-color 0.3s; }.donate-button:hover { background-color: #005BFF; } </style> </head> <body> <h1>Welcome to XYZ Charity</h1> <h2>Our Mission:</h2> <p>To provide education and resources to underprivileged communities around the world.</p> <h2>Our Services:</h2> <ul> <li>Education 101</li> <li>Education 2021</li> </ul> <h2>Our Stories:</h2> <ul> <li>Education 3</li> <li>Education 4</li> <li>Education 5</li> </ul> </body> </html>
    Answer: <!DOCTYPE ht

Epoch 25/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.27] 
Epoch 26/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.401]
Epoch 27/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.58it/s, loss=0.44] 
Epoch 28/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.58it/s, loss=0.477]
Epoch 29/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.429]
Epoch 29/199 - valid loop:  50%|█████     | 1/2 [01:07<01:07, 67.62s/it, bleu_score=0.515]


Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Community Forum</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; padding: 20px; } header { background-color: #333; color: #fff; padding: 20px; text-align: center; } #categories { display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px; } #categories a { color: #333; text-decoration: none; margin-right: 20px; } #categories a:hover { color: #007bff; } #recent-posts { display: flex; flex-wrap: wrap; justify-content: space-between; margin-bottom: 20px; }.post { width: 30%; margin-bottom: 20px; background-color: #fff; padding: 20px; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); }.post h2 { margin-bottom: 10px; }.post p { margin-bottom: 10px; }.post a { color: #007bff; text-decoration: none; margin-bottom: 10px; } </style> </head> <body> <header

Epoch 29/199 - valid loop: 100%|██████████| 2/2 [03:25<00:00, 102.81s/it, bleu_score=0.458]



Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>XYZ Charity - Providing Education for All</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; }.header { text-align: center; margin-bottom: 30px; }.logo { font-size: 24px; font-weight: bold; }.mission { display: flex; justify-content: space-between; align-items: center; margin-bottom: 30px; }.mission img { width: 100%; height: auto; border-radius: 5px; margin-right: 10px; }.donate-button { padding: 10px 20px; background-color: #007BFF; border: none; color: #fff; cursor: pointer; border-radius: 5px; font-size: 16px; transition: background-color 0.3s; }.donate-button:hover { background-color: #005a9c; } </style> </head> <body> <header class="header"> <h1>Welcome to XYZ Charity</h1> </header> <div class="mission"> <img src="logo.png" alt="XYZ Charity logo"> <p>Our mission is to provide edu

Epoch 30/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.262]
Epoch 31/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.229]
Epoch 32/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.213]
Epoch 33/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.58it/s, loss=0.41] 
Epoch 34/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.58it/s, loss=0.0961]
Epoch 34/199 - valid loop:  50%|█████     | 1/2 [01:34<01:34, 94.56s/it, bleu_score=0.524]


Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Community Forum</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; padding: 20px; } header { background-color: #007bff; padding: 20px; text-align: center; color: #fff; }.container { width: 80%; margin: auto; }.categories { display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px; }.category { flex: 1; padding: 10px; background-color: #fff; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); }.category h2 { margin-bottom: 10px; }.category p { margin-bottom: 10px; }.recent-posts { margin-bottom: 20px; }.recent-post { margin-bottom: 10px; }.recent-post h3 { margin-bottom: 10px; }.recent-post p { margin-bottom: 10px; } </style> </head> <body> <header> <h1>Community Forum</h1> </header> <div class="container"> <div class="categories"> <div class="catego

Epoch 34/199 - valid loop: 100%|██████████| 2/2 [03:27<00:00, 103.59s/it, bleu_score=0.457]



Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>XYZ Charity - Empowering Children in Need</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; padding: 20px; } h1, h2 { color: #333; } p { margin-bottom: 20px; }.mission { display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px; }.mission img { width: 100px; height: 100px; object-fit: cover; }.donate-button { padding: 10px 20px; background-color: #007BFF; border: none; color: #fff; cursor: pointer; border-radius: 5px; font-size: 16px; transition: background-color 0.3s; }.donate-button:hover { background-color: #005a9c; } </style> </head> <body> <h1>Welcome to XYZ Charity</h1> <p>Our mission is to provide support and resources to underprivileged children in developing countries.</p> <div class="mission"> <img src="mission1.jpg" alt="Mission 1"> <p>Our miss

Epoch 35/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.362]
Epoch 36/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.106]
Epoch 37/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.245]
Epoch 38/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.188]
Epoch 39/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.58it/s, loss=0.377]
Epoch 39/199 - valid loop:  50%|█████     | 1/2 [01:01<01:01, 61.64s/it, bleu_score=0.559]


Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Job Recruitment Portal</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; } header { background-color: #007bff; padding: 20px; text-align: center; color: #fff; }.search-bar { margin-top: 20px; padding: 10px; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); }.search-bar input[type="text"] { width: 100%; padding: 10px; border-radius: 5px; border: none; }.job-listing { margin-top: 20px; }.job-listing h3 { margin-bottom: 10px; }.job-listing p { margin-bottom: 10px; } </style> </head> <body> <header> <h1>Job Recruitment Portal</h1> </header> <div class="search-bar"> <input type="text" placeholder="Search for jobs..."> <input type="submit" value="Search"> </div> <div class="search-bar"> <input type="text" placeholder="Search for projects..."> <input type="submit" value="Search"> </

Epoch 39/199 - valid loop: 100%|██████████| 2/2 [02:00<00:00, 60.04s/it, bleu_score=0.517]



Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Charity 101 - A Non-Profit Organization</title> <style> body { font-family: 'Montserrat', sans-serif; background-color: #f5f5f5; color: #333; }.container { width: 80%; margin: auto; } h1 { text-align: center; margin-bottom: 30px; }.mission-statement { font-size: 30px; margin-bottom: 20px; }.donate-button { display: block; margin: auto; padding: 10px 20px; background-color: #007BFF; border: none; color: #fff; cursor: pointer; border-radius: 5px; font-size: 16px; transition: background-color 0.3s; }.donate-button:hover { background-color: #005a9c; }.volunteer-section { display: flex; justify-content: space-between; margin-bottom: 30px; }.volunteer-section h3 { margin-bottom: 10px; }.volunteer-section p { margin-bottom: 10px; } </style> </head> <body> <div class="container"> <h1>Welcome to XYZ Charity</h1> <div class="mission-statement">

Epoch 40/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.371]
Epoch 41/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.154]
Epoch 42/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.381]
Epoch 43/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.438]
Epoch 44/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.258]
Epoch 44/199 - valid loop:  50%|█████     | 1/2 [01:34<01:34, 94.45s/it, bleu_score=0.501]


Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Community Forum</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; } header { background-color: #007bff; padding: 20px; text-align: center; color: #fff; }.container { width: 80%; margin: auto; }.categories { display: flex; justify-content: space-between; margin-bottom: 20px; }.category { background-color: #fff; padding: 10px; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); }.recent-posts { background-color: #fff; padding: 20px; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); }.recent-posts h2 { margin-bottom: 10px; }.recent-posts p { margin-bottom: 10px; } </style> </head> <body> <header> <h1>Community Forum</h1> </header> <div class="container"> <div class="categories"> <div class="category">General Discussion</div> <div class="category">Technology</div> <div cla

Epoch 44/199 - valid loop: 100%|██████████| 2/2 [02:34<00:00, 77.10s/it, bleu_score=0.472]



Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>XYZ Charity - Providing Education for All</title> <style> body { font-family: 'Montserrat', sans-serif; background-color: #f5f5f5; color: #333; } h1 { text-align: center; }.donate-section { display: flex; justify-content: space-between; margin-bottom: 20px; }.donate-section img { width: 100%; height: auto; }.donate-section h3 { margin-bottom: 10px; }.donate-section p { margin-bottom: 5px; }.donate-section a { display: block; margin-bottom: 10px; text-decoration: none; color: #333; }.donate-section a:hover { background-color: #007bff; }.volunteer-section { display: flex; flex-direction: column; align-items: center; }.volunteer-section img { width: 100%; height: auto; } </style> </head> <body> <h1>Welcome to XYZ Charity</h1> <div class="donate-section"> <img src="/path/to/volunteer1.jpg" alt="Volunteer 1"> <h3>Become a Volunteer</h3> <p

Epoch 45/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.373]
Epoch 46/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.197]
Epoch 47/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.291]
Epoch 48/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.212]
Epoch 49/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.305]
Epoch 49/199 - valid loop:  50%|█████     | 1/2 [00:43<00:43, 43.76s/it, bleu_score=0.576]


Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Consulting for Businesses</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; } header { background-color: #007bff; padding: 20px; text-align: center; color: #fff; } #mission { background-color: #fff; padding: 20px; margin-bottom: 20px; } #projects { background-color: #fff; padding: 20px; margin-bottom: 20px; } #projects h2 { margin-bottom: 10px; } #projects p { margin-bottom: 10px; } </style> </head> <body> <header> <h1>Consulting for Businesses</h1> </header> <section id="mission"> <h2>Our Mission</h2> <p>To provide innovative solutions to businesses of all sizes.</p> </section> <section id="projects"> <h2>Recent Projects</h2> <p>- Project 1: XYZ Corporation</p> <p>- Project 2: Business Fundamentals</p> </section> <section id="projects"> <h2>Recent Projects</h2> <p>- Project 3: Market

Epoch 49/199 - valid loop: 100%|██████████| 2/2 [02:36<00:00, 78.18s/it, bleu_score=0.491]



Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>XYZ Charity - Giving a Mind to the World</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; padding: 20px; } h1 { text-align: center; }.mission { display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px; }.mission img { width: 50px; height: 50px; border-radius: 50%; margin-right: 20px; }.donate-button { display: block; margin: 20px auto; padding: 10px 20px; background-color: #007BFF; border: none; color: #fff; cursor: pointer; border-radius: 5px; font-size: 16px; transition: background-color 0.3s; }.donate-button:hover { background-color: #005a9c; } </style> </head> <body> <h1>Welcome to XYZ Charity</h1> <p>Our mission is to provide aid and resources to underprivileged communities around the world.</p> <div class="mission"> <img src="logo.png" alt="XYZ Ch

Epoch 50/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.0718]
Epoch 51/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.104]
Epoch 52/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.251]
Epoch 53/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.301]
Epoch 54/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.174]
Epoch 54/199 - valid loop:  50%|█████     | 1/2 [00:45<00:45, 45.76s/it, bleu_score=0.584]


Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Green Earth</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; } header { background-color: #007bff; padding: 20px; text-align: center; color: #fff; } #mission { background-color: #fff; padding: 20px; margin-bottom: 30px; } #projects { background-color: #fff; padding: 20px; margin-bottom: 30px; } #projects h2 { margin-bottom: 10px; } #projects p { margin-bottom: 10px; } </style> </head> <body> <header> <h1>Green Earth</h1> </header> <section id="mission"> <h2>Our Mission</h2> <p>To protect and preserve the environment for future generations.</p> </section> <section id="projects"> <h2>Recent Projects</h2> <p>- Building a Green Earth Fundraiser</p> <p>- Protecting a Local Wildlife Fundraiser</p> </section> <section id="projects"> <h2>Recent Projects</h2> <p>- Building a Remote Garden for

Epoch 54/199 - valid loop: 100%|██████████| 2/2 [01:44<00:00, 52.20s/it, bleu_score=0.551]



Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Charity 5k Run</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; padding: 20px; } h1 { text-align: center; }.mission-section { display: flex; justify-content: space-between; margin-bottom: 20px; }.mission-section img { width: 100%; border-radius: 5px; }.donate-button { display: block; margin: 20px auto; padding: 10px 20px; background-color: #007BFF; border: none; color: #fff; cursor: pointer; border-radius: 5px; font-size: 16px; } </style> </head> <body> <h1>Welcome to XYZ Charity</h1> <p>Charity 5k Run is dedicated to providing shelter, food, and resources to those in need.</p> <div class="mission-section"> <img src="mission1.jpg" alt="Mission 1"> <p>Our mission is to raise funds and awareness around the world by providing funds and resources to those in need.</p> </div> <div class="

Epoch 55/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.392]
Epoch 56/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.29] 
Epoch 57/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.203]
Epoch 58/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.163]
Epoch 59/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.21] 
Epoch 59/199 - valid loop:  50%|█████     | 1/2 [01:34<01:34, 94.48s/it, bleu_score=0.494]


Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Green Earth Society</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; } header { background-color: #007bff; padding: 20px; text-align: center; color: #fff; }.mission { background-color: #0069cc; padding: 20px; margin-bottom: 20px; }.projects { margin-bottom: 20px; }.project { margin-bottom: 20px; } </style> </head> <body> <header> <h1>Green Earth Society</h1> </header> <div class="mission"> <h2>Conservation Mission</h2> <p>Protecting environmental awareness and preserving the environment</p> </div> <div class="projects"> <h2>Recent Projects</h2> <div class="project"> <h3>Building a New School in Tokyo</h3> <p>Protecting a new school in Tokyo to promote education and awareness efforts.</p> </div> <div class="project"> <h3>Protecting Children in Education Through Healthcare</h3> <p>Providing educati

Epoch 59/199 - valid loop: 100%|██████████| 2/2 [02:38<00:00, 79.39s/it, bleu_score=0.456]



Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Charity Xy Portfolio</title> <style> body { font-family: 'Montserrat', sans-serif; background-color: #fff; color: #333; } h1 { text-align: center; color: #555; }.project { display: flex; flex-wrap: wrap; justify-content: space-between; margin-bottom: 20px; }.project img { width: 100%; height: auto; }.project h3 { margin-bottom: 10px; }.project p { margin-bottom: 5px; }.button { display: block; margin: 20px auto; padding: 10px 20px; background-color: #007BFF; border: none; color: #fff; cursor: pointer; border-radius: 5px; font-size: 16px; transition: background-color 0.3s; }.button:hover { background-color: #0069d9; } </style> </head> <body> <h1>Welcome to XYZ Portfolio</h1> <div class="project"> <img src="https://www.example.com/project1.jpg" alt="Project 1"> <h3>Charity 1</h3> <p>Created a modern and efforts website for a local non-p

Epoch 60/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.0924]
Epoch 61/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.21] 
Epoch 62/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.0674]
Epoch 63/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.221]
Epoch 64/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.213]
Epoch 64/199 - valid loop:  50%|█████     | 1/2 [01:34<01:34, 94.44s/it, bleu_score=0.563]


Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Community Forum</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; } header { background-color: #333; padding: 20px; text-align: center; color: #fff; }.container { width: 80%; margin: auto; }.section-title { font-size: 24px; font-weight: bold; margin-bottom: 10px; }.post { margin-bottom: 20px; }.post-title { font-size: 18px; font-weight: bold; margin-bottom: 10px; }.post-content { margin-bottom: 10px; }.search-bar { margin-top: 20px; }.search-bar input { display: block; width: 100%; padding: 10px; font-size: 16px; border: none; border-radius: 5px; }.search-bar button { padding: 10px 20px; background-color: #007BFF; border: none; color: #fff; cursor: pointer; border-radius: 5px; } </style> </head> <body> <header> <h1>Community Forum</h1> </header> <div class="container"> <div class="section-title">G

Epoch 64/199 - valid loop: 100%|██████████| 2/2 [02:43<00:00, 81.81s/it, bleu_score=0.514]



Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Charity by Sarah - Giving Giving to the World</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; } h1 { text-align: center; color: #333; }.mission-statement { display: flex; justify-content: space-between; margin-bottom: 20px; }.mission-statement img { width: 100px; height: 100px; border-radius: 50%; }.donate-button { display: block; margin: 20px auto; padding: 10px 20px; background-color: #007BFF; border: none; color: #fff; cursor: pointer; border-radius: 5px; font-size: 16px; transition: background-color 0.3s; }.donate-button:hover { background-color: #005a9c; }.events { display: grid; grid-template-columns: repeat(3, 1fr); gap: 20px; }.event { background-color: #fff; padding: 20px; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); }.event h3 { margin-bottom: 10px; } </style

Epoch 65/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.111]
Epoch 66/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.266]
Epoch 67/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.264]
Epoch 68/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.215]
Epoch 69/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.344]
Epoch 69/199 - valid loop:  50%|█████     | 1/2 [00:59<00:59, 59.30s/it, bleu_score=0.549]


Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Green Earth</title> <style> body { font-family: 'Open Sans', sans-serif; background-color: #f5f5f5; color: #333; } header { background-color: #007bff; padding: 20px; text-align: center; color: #fff; }.container { width: 80%; margin: auto; }.mission { background-color: #fff; padding: 20px; margin-bottom: 20px; }.projects { margin-bottom: 20px; }.project { margin-bottom: 20px; }.project img { max-width: 100%; height: auto; } </style> </head> <body> <header> <h1>Green Earth</h1> </header> <div class="container"> <div class="mission"> <h2>Our Mission</h2> <p>Protecting and preserving the environment for future generations.</p> </div> <div class="projects"> <h2>Recent Projects</h2> <div class="project"> <img src="https://images.unsplash.com/photo-15160000004244-0b81b81a1627?ixlib=rb-1.2.1&auto=format&fit=crop&w=1350&q=80" alt="Renewable En

Epoch 69/199 - valid loop: 100%|██████████| 2/2 [02:17<00:00, 68.58s/it, bleu_score=0.485]



Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Charity 5k Run</title> <style> body { font-family: 'Montserrat', sans-serif; background-color: #f5f5f5; color: #333; } h1 { text-align: center; }.mission-statement { background-color: #fff; padding: 20px; margin-bottom: 20px; text-align: center; color: #333; }.donate-section { background-color: #fff; padding: 20px; margin-bottom: 20px; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); }.volunteer-section { background-color: #fff; padding: 20px; margin-bottom: 20px; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); }.volunteer-section h2 { margin-bottom: 10px; }.donate-section p { margin-bottom: 10px; }.donate-section a { display: block; margin: 10px auto; padding: 10px 20px; background-color: #007BFF; border: none; color: #fff; text-align: center; text-decoration: none; border-radius: 5px; } </style> </head> <body> <

Epoch 70/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.16] 
Epoch 71/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.235]
Epoch 72/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.158]
Epoch 73/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.212]
Epoch 74/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.163]
Epoch 74/199 - valid loop:  50%|█████     | 1/2 [00:59<00:59, 59.69s/it, bleu_score=0.59]


Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Green Earth</title> <style> body { font-family: 'Roboto', sans-serif; background-color: #f5f5f5; } header { background-color: #003888; padding: 20px; text-align: center; color: #fff; }.container { width: 80%; margin: auto; }.mission { background-color: #fff; padding: 20px; margin-bottom: 30px; }.projects { display: flex; flex-wrap: wrap; justify-content: space-between; }.project { width: 30%; margin-bottom: 20px; }.project img { width: 100%; border-radius: 5px; } </style> </head> <body> <header> <h1>Green Earth</h1> </header> <div class="container"> <div class="mission"> <h2>Our Mission</h2> <p>To protect and preserve the environment for future generations.</p> </div> <div class="projects"> <h2>Recent Projects</h2> <div class="project"> <img src="https://images.unsplash.com/photo-1549106031362-18b81b1a1222?ixlib=rb-1.2.1&ixid=eyJhcHBf

Epoch 74/199 - valid loop: 100%|██████████| 2/2 [02:57<00:00, 88.76s/it, bleu_score=0.501]



Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Charity 5k Run Registration</title> <style> body { font-family: 'Montserrat', sans-serif; background-color: #f5f5f5; color: #333; padding: 20px; } h1, h2 { color: #1e88e5; } p { margin-bottom: 20px; }.btn { display: inline-block; padding: 10px 20px; background-color: #1e88e5; color: #f5f5f5; border-radius: 5px; text-decoration: none; }.donate-section { display: flex; justify-content: space-between; margin-top: 20px; }.btn:hover { background-color: #1e88e5; color: #fff; }.event-section { display: flex; flex-wrap: wrap; }.event-section img { width: 100%; margin-bottom: 10px; } </style> </head> <body> <h1>Welcome to XYZ Charity 5k Run</h1> <p>Charity 5k Run - Register now and get access to our latest events.</p> <a href="#" class="btn">Register</a> <div class="donate-section"> <h2>Donate Now</h2> <p>Help us continue our mission to raise 

Epoch 75/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.144]
Epoch 76/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.278]
Epoch 77/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.11]  
Epoch 78/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.232] 
Epoch 79/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.134]
Epoch 79/199 - valid loop:  50%|█████     | 1/2 [01:39<01:39, 99.11s/it, bleu_score=0.508]


Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Green Earth</title> <style> body { font-family: 'Roboto', sans-serif; background-color: #f5f5f5; } header { background-color: #007bff; padding: 20px; text-align: center; color: #fff; }.container { width: 80%; margin: auto; }.container h2 { margin-top: 0; }.mission { background-color: #007bff; padding: 20px; }.projects { display: flex; flex-wrap: wrap; justify-content: space-between; }.project { width: 30%; margin-bottom: 20px; }.project img { width: 100%; border-radius: 5px; }.project h3 { margin-top: 0; }.project p { margin-bottom: 10px; } </style> </head> <body> <header> <h1>Green Earth</h1> </header> <div class="container"> <div class="mission"> <h2>Our Mission</h2> <p>To protect and preserve the environment for future generations, promoting sustainable practices, and promoting sustainable practices.</p> </div> <div class="projects

Epoch 79/199 - valid loop: 100%|██████████| 2/2 [03:18<00:00, 99.12s/it, bleu_score=0.486]



Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Charity XYZ | Charity Awareness</title> <style> body { font-family: 'Montserrat', sans-serif; background-color: #f7f7f7; color: #333; padding: 20px; } h1 { text-align: center; } p { margin-bottom: 20px; }.donate-button { display: block; margin: 20px auto; padding: 10px 20px; background-color: #007bff; color: #fff; text-align: center; border-radius: 5px; text-decoration: none; }.donate-button:hover { background-color: #0069d9; }.event-details { display: flex; justify-content: space-between; margin-top: 30px; }.event-details p { margin: 0 10px; } </style> </head> <body> <h1>Welcome to XYZ Charity</h1> <p>Check out our latest projects and join us in making a difference!</p> <a href="#" class="donate-button">Donate Now</a> <div class="event-details"> <p>Date: October 1, 2021</p> <p>Time: 10am - 12pm</p> <p>Location: Charity Awareness</p> 

Epoch 80/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.156]
Epoch 81/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.228] 
Epoch 82/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.0322]
Epoch 83/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.151] 
Epoch 84/199 - train loop: 100%|██████████| 142/142 [00:55<00:00,  2.57it/s, loss=0.138] 
Epoch 84/199 - valid loop:  50%|█████     | 1/2 [00:40<00:40, 40.47s/it, bleu_score=0.582]


Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Community Forum</title> <style> body { font-family: 'Arial', sans-serif; background-color: #f0f0f0; } header { background-color: #00796B; padding: 20px; text-align: center; color: #fff; }.container { display: flex; justify-content: space-between; align-items: center; }.section { margin-bottom: 20px; background-color: #fff; padding: 20px; border-radius: 5px; }.post { margin-bottom: 20px; }.post-title { font-weight: bold; color: #333; margin-bottom: 10px; }.post-content { margin-bottom: 10px; } </style> </head> <body> <header> <h1>Community Forum</h1> </header> <div class="container"> <div class="section"> <h2 class="section-title">General Discussion</h2> <p class="post-content">Discuss anything and everything in the community.</p> </div> <div class="post"> <h2 class="section-title">Technology and Artificial Intelligence</h2> <p class="

Epoch 84/199 - valid loop: 100%|██████████| 2/2 [01:39<00:00, 49.67s/it, bleu_score=0.523]



Prediction: <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Charity Xx | Redesign and Control</title> <style> body { font-family: 'Montserrat', sans-serif; background-color: #f7f7f7; color: #333; padding: 20px; } h1 { text-align: center; }.about-section { background-color: #fff; padding: 20px; margin-bottom: 30px; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); }.donate-button { display: block; margin: 20px auto; padding: 10px 20px; background-color: #007BFF; border: none; color: #fff; cursor: pointer; border-radius: 5px; font-size: 16px; }.volunteer-section { background-color: #fff; padding: 20px; margin-bottom: 30px; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); }.volunteer-section h3 { margin-top: 0; margin-bottom: 10px; }.volunteer-section p { margin-bottom: 10px; } </style> </head> <body> <h1>Welcome to XYZ Charity Xx</h1> <div class="about-section"> <h2>About Us</

KeyboardInterrupt: 