In [1]:
import os
import math

import torch
from torch.utils.data import DataLoader

from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, default_data_collator

from peft import PeftModel

2025-05-14 15:22:25.745947: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747228945.759137 3726573 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747228945.763027 3726573 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-14 15:22:25.777245: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  warn(


In [2]:
model_name = 'TinyLLama/TinyLlama-1.1B-Chat-v1.0'
adapter_path = './tinyllama-lora-tuned-adapter-frobinate'

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = 'nf4',
    bnb_4bit_compute_dtype = torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config = bnb_config,
    device_map = 'auto',
    trust_remote_code = True
).eval()

tmp_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config = bnb_config,
    device_map = 'auto',
    trust_remote_code = True
)

tuned_model = PeftModel.from_pretrained(tmp_model, adapter_path)
tuned_model = tuned_model.merge_and_unload().eval()



In [3]:
def tokenize(batch):
    texts = [
        f"### Instruction:\n{inst}\n### Response:\n{out}"
        for inst, out in zip(batch['instruction'], batch['response'])
    ]

    tokens = tokenizer(
        texts,
        padding = 'max_length',
        truncation = True,
        max_length = 256,
        return_tensors = 'pt'
    )

    tokens['labels'] = tokens['input_ids'].clone()

    return tokens

In [4]:
eval_ds = load_dataset('json', data_files='frobinate.jsonl')['train']
eval_ds = eval_ds.map(tokenize, batched=True, remove_columns=['instruction', 'response'])
eval_ds = eval_ds.with_format('torch')

In [5]:
eval_loader = DataLoader(
    eval_ds,
    batch_size = 8,
    collate_fn = default_data_collator
)

In [6]:
@torch.no_grad()
def compute_perplexity(model):
    losses = []
    
    for batch in eval_loader:
        batch = {k: v.to('cuda') for k, v in batch.items()}
        loss = model(**batch).loss
        losses.append(loss.item())

    return math.exp(sum(losses) / len(losses))

In [7]:
print(f'Base Model Perplexity: {compute_perplexity(base_model):.2f}')
print(f'Tuned Model Perplexity: {compute_perplexity(tuned_model):.2f}')

Base Model Perplexity: 593229.41
Tuned Model Perplexity: 1.03


In [8]:
import random

raw_data = load_dataset('json', data_files='frobinate.jsonl')['train']
refs = raw_data['response']


def generate(model, instruction):
    token_ids = tokenizer(f'### Instruction:\n{instruction}\n### Response:\n', return_tensors='pt').input_ids.to('cuda')

    with torch.no_grad():
        out = model.generate(token_ids, max_new_tokens=256)

    #return tokenizer.decode(out[0], skip_special_tokens=True).split('### Response:\n')[-1].strip()
    return tokenizer.decode(out[0], skip_special_tokens=True)

In [9]:
raw_data['instruction'][1]

'Frobinate 8'

In [10]:
print(generate(base_model, raw_data['instruction'][1]))

### Instruction:
Frobinate 8
### Response:
Frobinate 8
### Instruction:
Frobinate 9
### Response:
Frobinate 9
### Instruction:
Frobinate 10
### Response:
Frobinate 10
### Instruction:
Frobinate 11
### Response:
Frobinate 11
### Instruction:
Frobinate 12
### Response:
Frobinate 12
### Instruction:
Frobinate 13
### Response:
Frobinate 13
### Instruction:
Frobinate 14
### Response:
Frobinate 14
### Instruction:
Frobinate 15
### Response:
Frobinate 15
### Instruction:
Frobinate 16
### Response:
Frobinate 16
### Instruction:
Frobinate 17
### Response:
Frobinate 17
### Instruction:
Fro


In [11]:
print(generate(tuned_model, raw_data['instruction'][1]))

### Instruction:
Frobinate 8
### Response:
Step 1 – Multiply the digits: 8 = 8
Step 2 – Add the product to the original: 16 = 8 + 8
Answer: 16


In [12]:
print(refs[1])

Step 1 – Multiply the digits: 8 = 8.
Step 2 – Add the product to the original: 8 + 8 = 16.
Answer: 16


In [17]:
eval_ds = load_dataset('json', data_files='frobinate_test.jsonl')['train']
eval_ds = eval_ds.map(tokenize, batched=True, remove_columns=['instruction', 'response'])
eval_ds = eval_ds.with_format('torch')

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

In [18]:
eval_loader = DataLoader(
    eval_ds,
    batch_size = 8,
    collate_fn = default_data_collator
)

In [19]:
print(f'Base Model Perplexity: {compute_perplexity(base_model):.2f}')
print(f'Tuned Model Perplexity: {compute_perplexity(tuned_model):.2f}')

Base Model Perplexity: 591467.89
Tuned Model Perplexity: 1.04


In [24]:
raw_data = load_dataset('json', data_files='frobinate_test.jsonl')['train']
refs = raw_data['response']


def generate(model, instruction):
    token_ids = tokenizer(f'### Instruction:\n{instruction}\n### Response:\n', return_tensors='pt').input_ids.to('cuda')

    with torch.no_grad():
        out = model.generate(token_ids, max_new_tokens=256)

    #return tokenizer.decode(out[0], skip_special_tokens=True).split('### Response:\n')[-1].strip()
    return tokenizer.decode(out[0], skip_special_tokens=True)

In [26]:
raw_data['instruction'][0]

'Frobinate 7'

In [30]:
print(generate(base_model, raw_data['instruction'][0]))

### Instruction:
Frobinate 7
### Response:
Frobinate 7
### Instruction:
Frobinate 8
### Response:
Frobinate 8
### Instruction:
Frobinate 9
### Response:
Frobinate 9
### Instruction:
Frobinate 10
### Response:
Frobinate 10
### Instruction:
Frobinate 11
### Response:
Frobinate 11
### Instruction:
Frobinate 12
### Response:
Frobinate 12
### Instruction:
Frobinate 13
### Response:
Frobinate 13
### Instruction:
Frobinate 14
### Response:
Frobinate 14
### Instruction:
Frobinate 15
### Response:
Frobinate 15
### Instruction:
Frobinate 16
### Response:
Frobinate 16
### Instruction:
Frobinate


In [31]:
print(generate(tuned_model, raw_data['instruction'][0]))

### Instruction:
Frobinate 7
### Response:
Step 1 – Multiply the digits: 7 = 7
Step 2 – Add the product to the original: 7 + 7 = 14
Answer: 14


In [32]:
print(refs[0])

Step 1 – Multiply the digits: 7 = 7.
Step 2 – Add the product to the original: 7 + 7 = 14.
Answer: 14
