In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer

In [3]:
input_texts = ["def add(a, b):", "def subtract(a, b):", "def multiply(a, b):", "def divide(a, b):"]
output_texts = ["return a + b", "return a - b", "return a * b", "return a / b"]

In [4]:
model_name = "Salesforce/codet5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)



In [9]:
prefix = "Code Prediction:"

def tokenize_data_with_masking(input_texts):
    inputs = [prefix + text for text in input_texts]
    tokenized_inputs = tokenizer(inputs, max_length=128, truncation=True, padding=True, return_tensors="pt")
    labels = tokenized_inputs.input_ids.clone()
    probability_matrix = torch.full(labels.shape, 0.15)

    special_tokens_mask = [
        tokenizer.get_special_tokens_mask(seq.tolist(), already_has_special_tokens=True)
        for seq in labels
    ]
    special_tokens_mask = torch.tensor(special_tokens_mask, dtype=torch.bool)

    probability_matrix.masked_fill_(special_tokens_mask, value=0.0)
    masked_indices = torch.bernoulli(probability_matrix).bool()
    labels[~masked_indices] = -100 
    tokenized_inputs['labels'] = labels
    return tokenized_inputs

tokenized_data = tokenize_data_with_masking(input_texts)

In [8]:
learning_rates_to_test = [1e-5, 5e-5, 1e-4, 5e-4]

for lr in learning_rates_to_test:
    print(f"\nTesting learning rate: {lr}")

    training_args = TrainingArguments(
        output_dir=f"./dummy_results_{lr}",
        learning_rate=lr,
        per_device_train_batch_size=32,  
        per_device_eval_batch_size=32,
        num_train_epochs=5,  
        weight_decay=0.01,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        logging_steps=1,
    )

    class DummyDataset(torch.utils.data.Dataset):
        def __init__(self, encodings):
            self.encodings = encodings

        def __getitem__(self, idx):
            item = {key: val[idx].clone().detach() for key, val in self.encodings.items()}
            return item

        def __len__(self):
            return len(self.encodings['input_ids'])

    train_dataset = DummyDataset(tokenized_data)
    eval_dataset = DummyDataset(tokenized_data)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        tokenizer=tokenizer
    )

    trainer.train()

    evaluation_results = trainer.evaluate(eval_dataset)
    print(f"Evaluation results for LR {lr}: {evaluation_results}")

    print(f"Final training loss for LR {lr}: {trainer.state.log_history[-1]}")

    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)


Testing learning rate: 1e-05


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 20%|██        | 1/5 [00:07<00:29,  7.43s/it]

{'loss': 25.6937, 'learning_rate': 8.000000000000001e-06, 'epoch': 1.0}


                                             
 20%|██        | 1/5 [00:08<00:29,  7.43s/it]

{'eval_loss': 24.357620239257812, 'eval_runtime': 0.5406, 'eval_samples_per_second': 7.399, 'eval_steps_per_second': 1.85, 'epoch': 1.0}


 40%|████      | 2/5 [00:20<00:31, 10.50s/it]

{'loss': 25.0258, 'learning_rate': 6e-06, 'epoch': 2.0}


                                             
 40%|████      | 2/5 [00:20<00:31, 10.50s/it]Checkpoint destination directory ./dummy_results_1e-05/checkpoint-2 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'eval_loss': 24.137231826782227, 'eval_runtime': 0.5837, 'eval_samples_per_second': 6.853, 'eval_steps_per_second': 1.713, 'epoch': 2.0}


 60%|██████    | 3/5 [00:32<00:22, 11.34s/it]

{'loss': 23.6692, 'learning_rate': 4.000000000000001e-06, 'epoch': 3.0}


                                             
 60%|██████    | 3/5 [00:33<00:22, 11.34s/it]

{'eval_loss': 23.999122619628906, 'eval_runtime': 0.5839, 'eval_samples_per_second': 6.85, 'eval_steps_per_second': 1.712, 'epoch': 3.0}


 80%|████████  | 4/5 [00:46<00:12, 12.31s/it]

{'loss': 24.7285, 'learning_rate': 2.0000000000000003e-06, 'epoch': 4.0}


                                             
 80%|████████  | 4/5 [00:46<00:12, 12.31s/it]Checkpoint destination directory ./dummy_results_1e-05/checkpoint-4 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'eval_loss': 23.919322967529297, 'eval_runtime': 0.5842, 'eval_samples_per_second': 6.846, 'eval_steps_per_second': 1.712, 'epoch': 4.0}


100%|██████████| 5/5 [00:59<00:00, 12.74s/it]

{'loss': 23.5129, 'learning_rate': 0.0, 'epoch': 5.0}


                                             
100%|██████████| 5/5 [01:00<00:00, 12.74s/it]

{'eval_loss': 23.869091033935547, 'eval_runtime': 0.5815, 'eval_samples_per_second': 6.879, 'eval_steps_per_second': 1.72, 'epoch': 5.0}


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].
100%|██████████| 5/5 [01:14<00:00, 14.86s/it]


{'train_runtime': 74.3922, 'train_samples_per_second': 0.269, 'train_steps_per_second': 0.067, 'train_loss': 24.526004028320312, 'epoch': 5.0}


100%|██████████| 1/1 [00:00<00:00,  3.75it/s]


Evaluation results for LR 1e-05: {'eval_loss': 23.869091033935547, 'eval_runtime': 0.5787, 'eval_samples_per_second': 6.912, 'eval_steps_per_second': 1.728, 'epoch': 5.0}
Final training loss for LR 1e-05: {'eval_loss': 23.869091033935547, 'eval_runtime': 0.5787, 'eval_samples_per_second': 6.912, 'eval_steps_per_second': 1.728, 'epoch': 5.0, 'step': 5}

Testing learning rate: 5e-05


 20%|██        | 1/5 [00:03<00:14,  3.53s/it]

{'loss': 25.6937, 'learning_rate': 4e-05, 'epoch': 1.0}


                                             
 20%|██        | 1/5 [00:04<00:14,  3.53s/it]

{'eval_loss': 23.591093063354492, 'eval_runtime': 0.5653, 'eval_samples_per_second': 7.076, 'eval_steps_per_second': 1.769, 'epoch': 1.0}


 40%|████      | 2/5 [00:19<00:32, 10.79s/it]

{'loss': 24.3695, 'learning_rate': 3e-05, 'epoch': 2.0}


                                             
 40%|████      | 2/5 [00:19<00:32, 10.79s/it]Checkpoint destination directory ./dummy_results_5e-05/checkpoint-2 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'eval_loss': 22.963695526123047, 'eval_runtime': 0.5605, 'eval_samples_per_second': 7.137, 'eval_steps_per_second': 1.784, 'epoch': 2.0}


 60%|██████    | 3/5 [00:34<00:25, 12.60s/it]

{'loss': 22.6361, 'learning_rate': 2e-05, 'epoch': 3.0}


                                             
 60%|██████    | 3/5 [00:34<00:25, 12.60s/it]

{'eval_loss': 22.518112182617188, 'eval_runtime': 0.56, 'eval_samples_per_second': 7.143, 'eval_steps_per_second': 1.786, 'epoch': 3.0}


 80%|████████  | 4/5 [00:47<00:12, 12.88s/it]

{'loss': 23.2675, 'learning_rate': 1e-05, 'epoch': 4.0}


                                             
 80%|████████  | 4/5 [00:48<00:12, 12.88s/it]Checkpoint destination directory ./dummy_results_5e-05/checkpoint-4 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'eval_loss': 22.22148895263672, 'eval_runtime': 0.5613, 'eval_samples_per_second': 7.127, 'eval_steps_per_second': 1.782, 'epoch': 4.0}


100%|██████████| 5/5 [01:02<00:00, 13.83s/it]

{'loss': 22.1459, 'learning_rate': 0.0, 'epoch': 5.0}


                                             
100%|██████████| 5/5 [01:03<00:00, 13.83s/it]

{'eval_loss': 22.069435119628906, 'eval_runtime': 0.557, 'eval_samples_per_second': 7.181, 'eval_steps_per_second': 1.795, 'epoch': 5.0}


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].
100%|██████████| 5/5 [01:17<00:00, 15.58s/it]


{'train_runtime': 77.8881, 'train_samples_per_second': 0.257, 'train_steps_per_second': 0.064, 'train_loss': 23.622539138793947, 'epoch': 5.0}


100%|██████████| 1/1 [00:00<00:00,  3.88it/s]


Evaluation results for LR 5e-05: {'eval_loss': 22.069435119628906, 'eval_runtime': 0.5513, 'eval_samples_per_second': 7.255, 'eval_steps_per_second': 1.814, 'epoch': 5.0}
Final training loss for LR 5e-05: {'eval_loss': 22.069435119628906, 'eval_runtime': 0.5513, 'eval_samples_per_second': 7.255, 'eval_steps_per_second': 1.814, 'epoch': 5.0, 'step': 5}

Testing learning rate: 0.0001


 20%|██        | 1/5 [00:02<00:10,  2.51s/it]

{'loss': 25.6937, 'learning_rate': 8e-05, 'epoch': 1.0}



 20%|██        | 1/5 [00:03<00:10,  2.51s/it]

{'eval_loss': 22.918277740478516, 'eval_runtime': 0.5636, 'eval_samples_per_second': 7.098, 'eval_steps_per_second': 1.774, 'epoch': 1.0}


 40%|████      | 2/5 [00:19<00:32, 10.99s/it]

{'loss': 23.7106, 'learning_rate': 6e-05, 'epoch': 2.0}



 40%|████      | 2/5 [00:20<00:32, 10.99s/it]Checkpoint destination directory ./dummy_results_0.0001/checkpoint-2 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'eval_loss': 21.832275390625, 'eval_runtime': 0.6212, 'eval_samples_per_second': 6.439, 'eval_steps_per_second': 1.61, 'epoch': 2.0}


 60%|██████    | 3/5 [00:33<00:25, 12.53s/it]

{'loss': 21.5905, 'learning_rate': 4e-05, 'epoch': 3.0}



 60%|██████    | 3/5 [00:34<00:25, 12.53s/it]

{'eval_loss': 21.11882972717285, 'eval_runtime': 0.5618, 'eval_samples_per_second': 7.12, 'eval_steps_per_second': 1.78, 'epoch': 3.0}


 80%|████████  | 4/5 [00:50<00:14, 14.06s/it]

{'loss': 21.8081, 'learning_rate': 2e-05, 'epoch': 4.0}



 80%|████████  | 4/5 [00:50<00:14, 14.06s/it]Checkpoint destination directory ./dummy_results_0.0001/checkpoint-4 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'eval_loss': 24.492443084716797, 'eval_runtime': 0.5648, 'eval_samples_per_second': 7.083, 'eval_steps_per_second': 1.771, 'epoch': 4.0}


100%|██████████| 5/5 [01:05<00:00, 14.47s/it]

{'loss': 24.05, 'learning_rate': 0.0, 'epoch': 5.0}



100%|██████████| 5/5 [01:05<00:00, 14.47s/it]

{'eval_loss': 23.99457550048828, 'eval_runtime': 0.5762, 'eval_samples_per_second': 6.942, 'eval_steps_per_second': 1.735, 'epoch': 5.0}


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].
100%|██████████| 5/5 [01:20<00:00, 16.05s/it]


{'train_runtime': 80.2245, 'train_samples_per_second': 0.249, 'train_steps_per_second': 0.062, 'train_loss': 23.370585632324218, 'epoch': 5.0}


100%|██████████| 1/1 [00:00<00:00,  5.50it/s]


Evaluation results for LR 0.0001: {'eval_loss': 21.11882972717285, 'eval_runtime': 0.5593, 'eval_samples_per_second': 7.152, 'eval_steps_per_second': 1.788, 'epoch': 5.0}
Final training loss for LR 0.0001: {'eval_loss': 21.11882972717285, 'eval_runtime': 0.5593, 'eval_samples_per_second': 7.152, 'eval_steps_per_second': 1.788, 'epoch': 5.0, 'step': 5}

Testing learning rate: 0.0005


 20%|██        | 1/5 [00:03<00:12,  3.06s/it]

{'loss': 25.6937, 'learning_rate': 0.0004, 'epoch': 1.0}



 20%|██        | 1/5 [00:03<00:12,  3.06s/it]

{'eval_loss': 22.442142486572266, 'eval_runtime': 0.5692, 'eval_samples_per_second': 7.027, 'eval_steps_per_second': 1.757, 'epoch': 1.0}


 40%|████      | 2/5 [00:19<00:32, 10.89s/it]

{'loss': 22.7498, 'learning_rate': 0.0003, 'epoch': 2.0}



 40%|████      | 2/5 [00:20<00:32, 10.89s/it]Checkpoint destination directory ./dummy_results_0.0005/checkpoint-2 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'eval_loss': 16.947032928466797, 'eval_runtime': 0.5729, 'eval_samples_per_second': 6.982, 'eval_steps_per_second': 1.745, 'epoch': 2.0}


 60%|██████    | 3/5 [00:34<00:25, 12.73s/it]

{'loss': 16.7619, 'learning_rate': 0.0002, 'epoch': 3.0}



 60%|██████    | 3/5 [00:34<00:25, 12.73s/it]

{'eval_loss': 14.41857624053955, 'eval_runtime': 0.5545, 'eval_samples_per_second': 7.214, 'eval_steps_per_second': 1.803, 'epoch': 3.0}


 80%|████████  | 4/5 [00:47<00:12, 12.76s/it]

{'loss': 14.2657, 'learning_rate': 0.0001, 'epoch': 4.0}



 80%|████████  | 4/5 [00:47<00:12, 12.76s/it]Checkpoint destination directory ./dummy_results_0.0005/checkpoint-4 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'eval_loss': 12.508618354797363, 'eval_runtime': 0.5663, 'eval_samples_per_second': 7.064, 'eval_steps_per_second': 1.766, 'epoch': 4.0}


100%|██████████| 5/5 [01:05<00:00, 14.76s/it]

{'loss': 12.9639, 'learning_rate': 0.0, 'epoch': 5.0}



100%|██████████| 5/5 [01:06<00:00, 14.76s/it]

{'eval_loss': 11.796076774597168, 'eval_runtime': 0.5714, 'eval_samples_per_second': 7.001, 'eval_steps_per_second': 1.75, 'epoch': 5.0}


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].
100%|██████████| 5/5 [01:21<00:00, 16.27s/it]


{'train_runtime': 81.3408, 'train_samples_per_second': 0.246, 'train_steps_per_second': 0.061, 'train_loss': 18.48701229095459, 'epoch': 5.0}


100%|██████████| 1/1 [00:00<00:00, 12.17it/s]


Evaluation results for LR 0.0005: {'eval_loss': 11.796076774597168, 'eval_runtime': 0.6954, 'eval_samples_per_second': 5.752, 'eval_steps_per_second': 1.438, 'epoch': 5.0}
Final training loss for LR 0.0005: {'eval_loss': 11.796076774597168, 'eval_runtime': 0.6954, 'eval_samples_per_second': 5.752, 'eval_steps_per_second': 1.438, 'epoch': 5.0, 'step': 5}
