In [2]:
!pip install transformers datasets peft accelerate bitsandbytes


Defaulting to user installation because normal site-packages is not writeable
Collecting transformers
  Downloading transformers-4.53.0-py3-none-any.whl.metadata (39 kB)
Collecting datasets
  Using cached datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting peft
  Downloading peft-0.15.2-py3-none-any.whl.metadata (13 kB)
Collecting accelerate
  Downloading accelerate-1.8.1-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.46.0-py3-none-win_amd64.whl.metadata (10 kB)
Collecting filelock (from transformers)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting huggingface-hub<1.0,>=0.30.0 (from transformers)
  Downloading huggingface_hub-0.33.1-py3-none-any.whl.metadata (14 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2024.11.6-cp312-cp312-win_amd64.whl.metadata (41 kB)
     ---------------------------------------- 0.0/41.5 kB ? eta -:--:--
     ---------------------------------------- 41.5/41.5



In [1]:
!pip install torch torchvision --index-url https://download.pytorch.org/whl/cu126

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://download.pytorch.org/whl/cu126
Collecting torch
  Using cached https://download.pytorch.org/whl/cu126/torch-2.8.0%2Bcu126-cp312-cp312-win_amd64.whl.metadata (29 kB)
Collecting torchvision
  Using cached https://download.pytorch.org/whl/cu126/torchvision-0.23.0%2Bcu126-cp312-cp312-win_amd64.whl.metadata (6.3 kB)
Downloading https://download.pytorch.org/whl/cu126/torch-2.8.0%2Bcu126-cp312-cp312-win_amd64.whl (2915.4 MB)
   ---------------------------------------- 0.0/2.9 GB ? eta -:--:--
   ---------------------------------------- 0.0/2.9 GB ? eta -:--:--
   ---------------------------------------- 0.0/2.9 GB ? eta -:--:--
   ---------------------------------------- 0.0/2.9 GB 435.7 kB/s eta 1:51:31
   ---------------------------------------- 0.0/2.9 GB 363.1 kB/s eta 2:13:49
   ---------------------------------------- 0.0/2.9 GB 706.2 kB/s eta 1:08:49
   ----------------------------

# Variables

In [2]:
model_path = "./emoji-shakespeare-lora"
model_name = "google/byt5-base"
prompt = "Translate these emojis to Shakespearean English:"


In [3]:
import torch
print("CUDA available:", torch.cuda.is_available())
device = "cuda" if torch.cuda.is_available() else "cpu"
print("PyTorch CUDA version:", torch.version.cuda)
print("Torch version:", torch.__version__)

CUDA available: True
PyTorch CUDA version: 12.6
Torch version: 2.8.0+cu126


In [3]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, TaskType

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)


# Load dataset

In [4]:
import pandas as pd
from datasets import Dataset

df = pd.read_json("dataset.json")
dataset = Dataset.from_pandas(df)

def add_prompt(example):
    example["input"] = f"{prompt} {example['emoji']}"
    example["target"] = example["shakespeare"]
    return example

dataset = dataset.map(add_prompt)

print(dataset[0])

Map:   0%|          | 0/1414 [00:00<?, ? examples/s]

{'emoji': '🌙✨😴', 'modern': "It's a beautiful starry night and I'm sleepy.", 'shakespeare': 'The moon doth shine with stars about her crown, and gentle sleep doth beckon me to rest.', 'input': 'Translate these emojis to Shakespearean English: 🌙✨😴', 'target': 'The moon doth shine with stars about her crown, and gentle sleep doth beckon me to rest.'}


# Tokenize

In [5]:
def preprocess(example):
    model_inputs = tokenizer(example["input"], truncation=True, padding="max_length", max_length=196)
    labels = tokenizer(example["target"], truncation=True, padding="max_length", max_length=196)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_dataset = dataset.map(
    preprocess,
    batched=True,
    remove_columns=dataset.column_names
)

Map:   0%|          | 0/1414 [00:00<?, ? examples/s]

# Apply LoRA

In [6]:
lora_config = LoraConfig(
    r=16,                 # more rank → more capacity
    lora_alpha=32,        # scale up to match r
    target_modules=["q", "v"],  # good for T5
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


trainable params: 2,211,840 || all params: 583,865,088 || trainable%: 0.3788


# Training arguments

In [7]:
training_args = TrainingArguments(
    output_dir=model_path,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=2,
    learning_rate=1e-3,
    num_train_epochs=30,
    logging_dir="./logs",
    logging_steps=50,
    save_strategy="epoch",
    save_total_limit=1,
    remove_unused_columns=False,
)

# Train

In [8]:
from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)

trainer.train()

model.merge_and_unload()
model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)


  trainer = Trainer(
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Step,Training Loss
50,11.2123
100,3.7878
150,3.5818
200,3.098
250,2.4219
300,2.2017
350,2.138
400,2.0513
450,2.002
500,1.9932


('./emoji-shakespeare-lora\\tokenizer_config.json',
 './emoji-shakespeare-lora\\special_tokens_map.json',
 './emoji-shakespeare-lora\\added_tokens.json')

In [4]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained(model_path)

# Load model
model = AutoModelForSeq2SeqLM.from_pretrained(model_path, device_map=None)  # load on CPU
model.to(device)

Loading adapter weights from ./emoji-shakespeare-lora led to missing keys in the model: encoder.block.0.layer.0.SelfAttention.q.lora_A.default.weight, encoder.block.0.layer.0.SelfAttention.q.lora_B.default.weight, encoder.block.0.layer.0.SelfAttention.v.lora_A.default.weight, encoder.block.0.layer.0.SelfAttention.v.lora_B.default.weight, encoder.block.1.layer.0.SelfAttention.q.lora_A.default.weight, encoder.block.1.layer.0.SelfAttention.q.lora_B.default.weight, encoder.block.1.layer.0.SelfAttention.v.lora_A.default.weight, encoder.block.1.layer.0.SelfAttention.v.lora_B.default.weight, encoder.block.2.layer.0.SelfAttention.q.lora_A.default.weight, encoder.block.2.layer.0.SelfAttention.q.lora_B.default.weight, encoder.block.2.layer.0.SelfAttention.v.lora_A.default.weight, encoder.block.2.layer.0.SelfAttention.v.lora_B.default.weight, encoder.block.3.layer.0.SelfAttention.q.lora_A.default.weight, encoder.block.3.layer.0.SelfAttention.q.lora_B.default.weight, encoder.block.3.layer.0.SelfAt

T5ForConditionalGeneration(
  (shared): Embedding(384, 1536)
  (encoder): T5Stack(
    (embed_tokens): Embedding(384, 1536)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): lora.Linear(
                (base_layer): Linear(in_features=1536, out_features=768, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=1536, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=768, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k): Linear(in_featu

In [6]:
# --- Few-shot grounding examples ---
few_shot = """Example:
😂🤣
Modern: They laugh loudly together, delighting in mirth.
Shakespeare: They laugh so, their very ribs do quake with mirth.

Generate exactly two lines for the following emojis:
Modern: [English]
Shakespeare: [Shakespearean English]

"""

def generate_with_grounding(emoji, max_new_tokens=200, num_beams=5):
    full_prompt = f"Give exactly a single sentence: {prompt} {emoji}"
    encoded = tokenizer(full_prompt, return_tensors="pt").to(device)
    out = model.generate(
        **encoded,
        max_new_tokens=max_new_tokens,
        num_beams=num_beams,
        early_stopping=True,
        no_repeat_ngram_size=3,
        length_penalty=1.0
    )
    decoded = tokenizer.decode(out[0], skip_special_tokens=True)

    # Parse Modern and Shakespeare lines
    modern_line = None
    shake_line = None
    for line in decoded.splitlines():
        line = line.strip()
        if line.lower().startswith("modern:"):
            modern_line = line.partition(":")[2].strip()
        elif line.lower().startswith("shakespeare:"):
            shake_line = line.partition(":")[2].strip()
    return decoded, modern_line, shake_line


# --- Test inputs ---
test_inputs = ["😂🤣", "💔😭", "👑🙌", "😎🕶️"]

for emoji in test_inputs:
    full, modern, shakespeare = generate_with_grounding(emoji)
    print("INPUT:", emoji)
    print("FULL OUTPUT:\n", full)
    print("MODERN:", modern)
    print("SHAKESPEARE:", shakespeare)
    print("—" * 50)

    torch.cuda.empty_cache()



INPUT: 😂🤣
FULL OUTPUT:
 likean English: Translate these emojis to Shakespeare: 😂🤣Give only a few sentences: Don't forget your words or phrases. They will help me. I'm not sure what it's doing: I&#39;m,,,
MODERN: None
SHAKESPEARE: None
——————————————————————————————————————————————————
INPUT: 💔😭
FULL OUTPUT:
 like these emojis to Shakespearean English: 💔😭gives exactly a small sentence: Translate and communication with your friends: I love it! I'm so happy! We're here!
I LOVE IT!!!

MODERN: None
SHAKESPEARE: None
——————————————————————————————————————————————————
INPUT: 👑🙌
FULL OUTPUT:
 espearean English: Translate these emojis to Hebrew: 👑🙌 I love you so much more. I’m not sure about it. When trying, we have: Gives a small sentence or words: And I'm..., I..A.
MODERN: None
SHAKESPEARE: None
——————————————————————————————————————————————————
INPUT: 😎🕶️
FULL OUTPUT:
 ojis to Shakespearean English: Translate these emotionally simple sentences: 😎🕶️and Give your own words or phrase: Incorpora

In [10]:
torch.cuda.empty_cache()