# Llama 3.2 fine tuning with "chopped" dataset

2024-12-27 12:47

Over a week of fine-tuning on the chopped data set. Unfortunately the training froze several times without an error message and had to be restarted. It's probably the metrics calculation. The loss did not improve over time and the output is garbage, although I suspect this might have something to do with the checkpoints not being properly "picked back up".

In [1]:
!apt-get install build-essential -y

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
build-essential is already the newest version (12.9ubuntu3).
0 upgraded, 0 newly installed, 0 to remove and 40 not upgraded.


In [2]:
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

!pip install sacrebleu
!pip install pytest-playwright
!playwright install
!pip install matplotlib
!pip install pillow
!pip install torchvision
!pip install lpips

!playwright install-deps  

!pip install -U numpy
!pip install tensorboard

[0mFound existing installation: unsloth 2024.12.4
Uninstalling unsloth-2024.12.4:
  Successfully uninstalled unsloth-2024.12.4
[0mCollecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-h4lvy1ls/unsloth_effc2adce2d74bbc8cd0e9c5ae3463cb
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-h4lvy1ls/unsloth_effc2adce2d74bbc8cd0e9c5ae3463cb
  Resolved https://github.com/unslothai/unsloth.git to commit 85f1fa096afde5efe2fb8521d8ceec8d13a00715
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: unsloth
  Building wheel for unsloth (pyproject.toml) ... [?25ldone
[?25h  Created wheel for unsloth: filename=unsloth-2024.12.4-py3-none

In [1]:
import os
import numpy as np
import pandas as pd

import torch
from trl import SFTTrainer
from transformers import TrainingArguments, TextStreamer
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel
from datasets import Dataset
from unsloth import is_bfloat16_supported

# Saving model
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Warnings
import warnings
warnings.filterwarnings("ignore")

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
max_seq_length = 131_072

def load_model():
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name="unsloth/Llama-3.2-1B-bnb-4bit",
        max_seq_length=max_seq_length,
        load_in_4bit=True,
        dtype=None,
    )
    
    model = FastLanguageModel.get_peft_model(
        model,
        r=16,
        lora_alpha=16,
        lora_dropout=0,
        target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],
        use_rslora=True,
        use_gradient_checkpointing="unsloth",
        random_state = 32,
        loftq_config = None,
    )
    return model, tokenizer

In [3]:
def create_trainer(model, tokenizer, training_data, max_steps):
    training_arguments = TrainingArguments(
        learning_rate=3e-4,
        lr_scheduler_type="linear",
        per_device_train_batch_size=2,
        gradient_accumulation_steps=64,
        num_train_epochs=1,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        # max_steps=max_steps,
        optim="adamw_8bit",
        weight_decay=0.01,
        warmup_steps=10,
        output_dir="output",
        seed=0,
        save_total_limit=3,
    )

    if max_steps is not None:
        training_arguments.max_steps = max_steps

    print(training_data)
    
    return SFTTrainer(
        model=model,
        tokenizer=tokenizer,
        train_dataset=training_data,
        dataset_text_field="text",
        max_seq_length=max_seq_length,
        dataset_num_proc=10,
        packing=True,
        args=training_arguments,
    )

In [4]:
import numpy as np
from utils.similarity import calculate_metrics
from torch.utils.tensorboard import SummaryWriter
from PIL import Image
import torch

log_dir = 'output/runs'

def add_image_to_tensorboard(name, step, img_path):
    image = Image.open(img_path)
    image = image.convert('RGB')
    image_array = np.array(image)
    image_tensor = torch.from_numpy(image_array)
    image_tensor = image_tensor.permute(2, 0, 1)
    image_tensor = image_tensor.float() / 255.0
    
    writer = SummaryWriter(log_dir=log_dir)
    writer.add_image(name, image_tensor, step)
    
def add_text_to_tensorboard(name, step, text):
    writer = SummaryWriter(log_dir=log_dir)
    writer.add_text(name, text, step)

def postprocess_text(preds, labels):
    preds = [pred.strip().replace('<unk>', '') for pred in preds]
    labels = [[label.strip().replace('<unk>', '')] for label in labels]

    return preds, labels

def compute_metrics(decoded_predictions, decoded_labels, steps):
    similarity_scores = []
    perceptual_losses = []
    index = 1
    
    for prediction, label in zip(decoded_predictions, decoded_labels):
        prediction = prediction.replace(tokenizer.eos_token, '')
        
        add_text_to_tensorboard(f'valid_{index}_label_text', steps, label)
        add_text_to_tensorboard(f'valid_{index}_prediction_text', steps, prediction)
        
        metrics = calculate_metrics(prediction, label)
        
        if metrics is not None:
            similarity_scores.append(metrics['similarity'])
            perceptual_losses.append(metrics['perceptual_loss'])
            
            add_image_to_tensorboard(f'valid_{index}_expectation', steps, metrics['expected_screenshot_path'])
            add_image_to_tensorboard(f'valid_{index}_prediction', steps, metrics['predicted_screenshot_path'])
        
        index += 1

    results = {
        "similarity": float(np.mean(similarity_scores)),
        "perceptual_loss": float(np.mean(perceptual_losses)),
    }
    
    writer = SummaryWriter(log_dir=log_dir)
    writer.add_scalar('similarity', results['similarity'], steps)
    writer.add_scalar('perceptual_loss', results['perceptual_loss'], steps)
    
    print("Similarity:", results['similarity'])
    print("Perceptual loss:", results['perceptual_loss'])

    return results

def test_prediction(model, data, steps):
    answers = []
    labels = []
    print("Generating predictions...")
    for row in data:
        inputs = tokenizer(
        [
            data_prompt.format(
                #instructions
                row['svg'],
                #answer
                "",
            )
        ], return_tensors = "pt").to("cuda")
        
        outputs = model.generate(**inputs, max_new_tokens = 5020, use_cache = True)
        answer = tokenizer.batch_decode(outputs)
        answers.append(answer[0].split("### Response:")[-1])
        labels.append(row['html'])

    print("Computing metrics...")
    compute_metrics(answers, labels, steps)

In [5]:
!rm -rf output

In [5]:
!apt install zip -y
!rm -rf data-rb-chopped
!mkdir -p data-rb-chopped
!wget "https://www.dropbox.com/scl/fi/hsqsp79okuob4u63oj7j3/data-rb-chopped.zip?rlkey=ey3a4ap5h6v9mcaava1bps52n&dl=11" -O model.zip
!unzip model.zip -d data-rb-chopped

!rm -rf data-rb-validate
!mkdir -p data-rb-validate
!wget "https://www.dropbox.com/scl/fi/5szml8y5l248mcabj9rqg/verify-dataset.zip?rlkey=se33rwtxgngn0ts1i0pc8f6wk&st=1d68x9zt&dl=1" -O validate.zip
!unzip validate.zip -d data-rb-validate

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
zip is already the newest version (3.0-12build2).
0 upgraded, 0 newly installed, 0 to remove and 40 not upgraded.
--2024-12-15 14:04:33--  https://www.dropbox.com/scl/fi/hsqsp79okuob4u63oj7j3/data-rb-chopped.zip?rlkey=ey3a4ap5h6v9mcaava1bps52n&dl=11
Resolving www.dropbox.com (www.dropbox.com)... 162.125.13.18, 2620:100:6057:18::a27d:d12
Connecting to www.dropbox.com (www.dropbox.com)|162.125.13.18|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://uc3f648558af4ef362476eb55a1d.dl.dropboxusercontent.com/cd/0/inline/CgSsviUjyh4F-Ioi5qCgpIEVkA_HQeyK3USE0VNoq0wVHxIBaHYcDQUfWrZQneIvq4NosSBlMK1nFDlxyoGWFYRrc9BxSAV8H47FkmLWdQwzgIzHx8-1iXw86XXCi-tPJec/file# [following]
--2024-12-15 14:04:34--  https://uc3f648558af4ef362476eb55a1d.dl.dropboxusercontent.com/cd/0/inline/CgSsviUjyh4F-Ioi5qCgpIEVkA_HQeyK3USE0VNoq0wVHxIBaHYcDQUfWrZQneIvq4NosSBlMK1nFDlxyoGWFYRrc9BxSAV8H4

In [8]:
from datasets import load_from_disk
dataset = load_from_disk('data-rb-chopped')

dataset = dataset.train_test_split(test_size=1/len(dataset))
visual_validation_dataset = load_from_disk('data-rb-validate')

dataset['test'] = dataset['test'].add_item(visual_validation_dataset[0])
dataset['test'] = dataset['test'].add_item(visual_validation_dataset[1])
dataset['test'] = dataset['test'].add_item(visual_validation_dataset[2])
dataset['test'] = dataset['test'].add_item(visual_validation_dataset[3])

dataset

DatasetDict({
    train: Dataset({
        features: ['svg', 'html'],
        num_rows: 214811
    })
    test: Dataset({
        features: ['svg', 'html'],
        num_rows: 5
    })
})

In [5]:
model, tokenizer = load_model()

data_prompt = """Your job is to take an SVG file of a web design and convert it into a pixel-perfect HTML and CSS markup and stylesheet.

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompt(examples):
    inputs       = examples["svg"]
    outputs      = examples["html"]
    texts = []
    for input_, output in zip(inputs, outputs):
        text = data_prompt.format(input_, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }



==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: NVIDIA H100 NVL. Max memory: 93.003 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 9.0. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2024.12.4 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


In [10]:
training_data = dataset.map(formatting_prompt, batched=True)

Map:   0%|          | 0/214811 [00:00<?, ? examples/s]

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

In [11]:
training_data

DatasetDict({
    train: Dataset({
        features: ['svg', 'html', 'text'],
        num_rows: 214811
    })
    test: Dataset({
        features: ['svg', 'html', 'text'],
        num_rows: 5
    })
})

In [12]:
def get_token_lengths(examples):
    inputs = tokenizer(
        examples['text'],
        truncation=False,  # Don't truncate yet
        padding=False,     # Don't pad yet
        return_length=True,
    )

    return inputs

tokenized_data = training_data.map(get_token_lengths, batched=True)

def filter_function(example):
    return example['length'] <= max_seq_length

filtered_data = tokenized_data.filter(filter_function)

print(filtered_data)

Map:   0%|          | 0/214811 [00:00<?, ? examples/s]

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

Filter:   0%|          | 0/214811 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['svg', 'html', 'text', 'input_ids', 'attention_mask', 'length'],
        num_rows: 125950
    })
    test: Dataset({
        features: ['svg', 'html', 'text', 'input_ids', 'attention_mask', 'length'],
        num_rows: 5
    })
})


In [13]:
filtered_data = filtered_data.remove_columns(["input_ids", "attention_mask", "length"])
filtered_data.save_to_disk('data-rb-chopped-filtered-' + str(max_seq_length))

Saving the dataset (0/18 shards):   0%|          | 0/125950 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/5 [00:00<?, ? examples/s]

In [6]:
from datasets import load_from_disk

filtered_data = load_from_disk('data-rb-chopped-filtered')
filtered_data = filtered_data.remove_columns(["input_ids", "attention_mask", "length"])

filtered_data

Loading dataset from disk:   0%|          | 0/59 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['svg', 'html', 'text'],
        num_rows: 213821
    })
    test: Dataset({
        features: ['svg', 'html', 'text'],
        num_rows: 5
    })
})

In [7]:
import torch
from tqdm import tqdm

resume = True
# for steps in tqdm(range(0, 200, 1)):
for steps in tqdm(range(191, 200, 1)):
    print(f"Steps: {steps}")

    if steps > 0:
        trainer = create_trainer(model, tokenizer, filtered_data['train'], steps)
        if resume:
            trainer.train(resume_from_checkpoint=True)
        else:
            trainer.train()
            resume = True
        
    model = FastLanguageModel.for_inference(model)

    results = test_prediction(model, filtered_data['test'], steps)

    if results is not None and results['perceptual_loss'] == 0.0:
        break

    model = FastLanguageModel.for_training(model)

    

  0%|          | 0/9 [00:00<?, ?it/s]

Steps: 191
Dataset({
    features: ['svg', 'html', 'text'],
    num_rows: 213821
})


Loading dataset shards:   0%|          | 0/21 [00:00<?, ?it/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 20,216 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 64
\        /    Total batch size = 128 | Total steps = 191
 "-____-"     Number of trainable parameters = 11,272,192


Step,Training Loss
193,1.164


Generating predictions...
Computing metrics...


 11%|█         | 1/9 [1:02:36<8:20:49, 3756.14s/it]

Similarity: 0.5132264371663787
Perceptual loss: 0.6475745012750849
Steps: 192
Dataset({
    features: ['svg', 'html', 'text'],
    num_rows: 213821
})


Loading dataset shards:   0%|          | 0/21 [00:00<?, ?it/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 20,216 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 64
\        /    Total batch size = 128 | Total steps = 192
 "-____-"     Number of trainable parameters = 11,272,192


Step,Training Loss
194,1.2235


Generating predictions...
Computing metrics...


 22%|██▏       | 2/9 [2:05:10<7:18:07, 3755.35s/it]

Similarity: 0.5132264371663787
Perceptual loss: 0.6475745012750849
Steps: 193
Dataset({
    features: ['svg', 'html', 'text'],
    num_rows: 213821
})


Loading dataset shards:   0%|          | 0/21 [00:00<?, ?it/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 20,216 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 64
\        /    Total batch size = 128 | Total steps = 193
 "-____-"     Number of trainable parameters = 11,272,192


Step,Training Loss
195,1.4204


Generating predictions...
Computing metrics...


 33%|███▎      | 3/9 [3:08:24<6:17:15, 3772.58s/it]

Similarity: 0.5132264371663787
Perceptual loss: 0.6475745012750849
Steps: 194
Dataset({
    features: ['svg', 'html', 'text'],
    num_rows: 213821
})


Loading dataset shards:   0%|          | 0/21 [00:00<?, ?it/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 20,216 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 64
\        /    Total batch size = 128 | Total steps = 194
 "-____-"     Number of trainable parameters = 11,272,192


Step,Training Loss
196,1.235


Generating predictions...
Computing metrics...


 44%|████▍     | 4/9 [4:11:34<5:14:57, 3779.54s/it]

Similarity: 0.5132264371663787
Perceptual loss: 0.6475745012750849
Steps: 195
Dataset({
    features: ['svg', 'html', 'text'],
    num_rows: 213821
})


Loading dataset shards:   0%|          | 0/21 [00:00<?, ?it/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 20,216 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 64
\        /    Total batch size = 128 | Total steps = 195
 "-____-"     Number of trainable parameters = 11,272,192


Step,Training Loss
197,1.2095


Generating predictions...
Computing metrics...


 56%|█████▌    | 5/9 [5:14:43<4:12:11, 3782.92s/it]

Similarity: 0.5132264371663787
Perceptual loss: 0.6475745012750849
Steps: 196
Dataset({
    features: ['svg', 'html', 'text'],
    num_rows: 213821
})


Loading dataset shards:   0%|          | 0/21 [00:00<?, ?it/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 20,216 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 64
\        /    Total batch size = 128 | Total steps = 196
 "-____-"     Number of trainable parameters = 11,272,192


Step,Training Loss
198,1.2326


Generating predictions...
Computing metrics...


 67%|██████▋   | 6/9 [6:18:00<3:09:23, 3787.67s/it]

Similarity: 0.5132264371663787
Perceptual loss: 0.6475745012750849
Steps: 197
Dataset({
    features: ['svg', 'html', 'text'],
    num_rows: 213821
})


Loading dataset shards:   0%|          | 0/21 [00:00<?, ?it/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 20,216 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 64
\        /    Total batch size = 128 | Total steps = 197
 "-____-"     Number of trainable parameters = 11,272,192


Step,Training Loss
199,1.2941


Generating predictions...
Computing metrics...


 78%|███████▊  | 7/9 [7:21:33<2:06:31, 3795.95s/it]

Similarity: 0.5132264371663787
Perceptual loss: 0.6475745012750849
Steps: 198
Dataset({
    features: ['svg', 'html', 'text'],
    num_rows: 213821
})


Loading dataset shards:   0%|          | 0/21 [00:00<?, ?it/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 20,216 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 64
\        /    Total batch size = 128 | Total steps = 198
 "-____-"     Number of trainable parameters = 11,272,192


Step,Training Loss
200,1.2913


Generating predictions...


 89%|████████▉ | 8/9 [8:24:55<1:03:18, 3798.13s/it]

Similarity: 0.5132264371663787
Perceptual loss: 0.6475745012750849
Steps: 199
Dataset({
    features: ['svg', 'html', 'text'],
    num_rows: 213821
})


Loading dataset shards:   0%|          | 0/21 [00:00<?, ?it/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 20,216 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 64
\        /    Total batch size = 128 | Total steps = 199
 "-____-"     Number of trainable parameters = 11,272,192


Step,Training Loss
201,1.2447


Generating predictions...
Computing metrics...


100%|██████████| 9/9 [9:28:44<00:00, 3791.60s/it]  

Similarity: 0.5132264371663787
Perceptual loss: 0.6475745012750849





In [9]:
test_index = 4
text = filtered_data['test'][test_index]['svg']
model = FastLanguageModel.for_inference(model)
inputs = tokenizer(
[
    data_prompt.format(
        #instructions
        text,
        #answer
        "",
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 5020, use_cache = True)
answer=tokenizer.batch_decode(outputs)
answer = answer[0].split("### Response:")[-1]

print(filtered_data['test'][test_index]['html'])
print("Answer of the question is:", answer)

<body><div class="box red"></div><div class="box green"></div><div class="box blue"></div></body>

<style>

      * {
          margin: 0;
          padding: 0;
          box-sizing: border-box;
      }

       body {
          height: 100%;
          font-family: Arial, sans-serif;
      }

      body {
          display: flex;
          flex-direction: column;
          height: 100vh;
      }

      .box {
          flex: 1; 
          width: 100%;
      }

      .red {
          background-color: red;
      }

      .green {
          background-color: green;
      }

      .blue {
          background-color: blue;
      }
    
</style>
Answer of the question is: 
```
<!DOCTYPE html>
<html lang="en">
<head>
    <title>Design</title>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <meta name="format-detection" content="date-time-format=dd-mm-yy" />
    <meta name="keywords" content="design, web, design, web, design, web, d

In [11]:
test_prediction(model, filtered_data['test'], steps)

Generating predictions...
Computing metrics...
Similarity: 0.5132264371663787
Perceptual loss: 0.6475745012750849
