In [6]:
from huggingface_hub import login
login()

# SFT + DPO

## SFT

In [1]:
from simplet5_trl import SimpleT5_TRL
import pandas as pd

In [2]:
train_df = pd.read_csv("data/sft_train.csv")
train_df.head(2)

Unnamed: 0,source_text,target_text
0,summarize: Artificial intelligence is revoluti...,AI transforms healthcare through faster diagno...
1,summarize: Climate change poses significant ch...,Climate change threatens food security through...


In [3]:
val_df = pd.read_csv("data/sft_val.csv")
val_df.head(2)

Unnamed: 0,source_text,target_text
0,summarize: Robotics automation is transforming...,Robotics transforms manufacturing with collabo...
1,summarize: Cloud computing has become essentia...,Cloud computing provides essential scalable in...


In [4]:
# Initialize and load model
model = SimpleT5_TRL()
model.from_pretrained("facebook/bart-base")

Loading weights:   0%|          | 0/259 [00:00<?, ?it/s]

In [5]:
model.train(
    train_df=train_df,
    eval_df=val_df,
    source_max_token_len=512,
    target_max_token_len=128,
    batch_size=4,
    max_epochs=3,
    learning_rate=1e-4,
    outputdir="outputs/longt5_sft",
    save_strategy="epoch",
    save_total_limit=2,
    precision="32"
)

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Map:   0%|          | 0/16 [00:00<?, ? examples/s]

2025-12-30 20:31:42.280271: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.796371,2.421082,40.9157,25.0343,40.9157,40.9157
2,1.584836,2.383382,39.4616,17.9892,38.7672,38.7672
3,1.673006,2.337334,44.9241,21.8489,42.9797,42.9797


In [6]:
model.load_model("outputs/longt5_sft/checkpoint-45", use_gpu=True)
print(model.predict("translate English to French: Good Morning!"))

Loading weights:   0%|          | 0/260 [00:00<?, ?it/s]

['Le progres est le recherche.']


## DPO

In [7]:
train_df = pd.read_csv("data/dpo_train.csv")
eval_df = pd.read_csv("data/dpo_val.csv")

In [8]:
model.train_dpo(
    train_df=train_df,
    eval_df=eval_df,
    beta=0.1,                    # Lower = more deviation from reference allowed
    loss_type="sigmoid",         # Standard DPO loss
    max_length=512,
    max_prompt_length=256,
    batch_size=4,
    max_epochs=3,
    learning_rate=5e-7,          # DPO typically uses lower learning rates
    outputdir="outputs/dpo",
)

Extracting prompt in train dataset:   0%|          | 0/80 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/80 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/80 [00:00<?, ? examples/s]

Extracting prompt in eval dataset:   0%|          | 0/20 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/20 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/20 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/chosen,Logps/rejected,Logits/chosen,Logits/rejected
1,0.979697,0.598648,0.076292,-0.124239,1.0,0.200532,-76.404793,-70.159073,-2.303445,-1.402496
2,1.680983,0.552758,0.113599,-0.193935,1.0,0.307534,-76.031715,-70.856026,-2.296139,-1.392909
3,1.2371,0.536807,0.128044,-0.218316,1.0,0.34636,-75.887268,-71.099831,-2.295274,-1.391303


In [9]:
model.load_model("outputs/dpo/checkpoint-60", use_gpu=True)
print(model.predict("translate English to French: Good Morning!"))

Loading weights:   0%|          | 0/260 [00:00<?, ?it/s]

['Le croissance est la progresse.']


# SFT + SimPO

In [10]:
from simplet5_trl import SimpleT5_TRL
import pandas as pd

In [11]:
train_df = pd.read_csv("data/sft_train.csv")
val_df = pd.read_csv("data/sft_val.csv")

In [12]:
# Initialize and load model
model = SimpleT5_TRL()
model.from_pretrained("google/t5gemma-s-s-ul2")

Loading weights:   0%|          | 0/228 [00:00<?, ?it/s]

In [13]:
model.train(
    train_df=train_df,
    eval_df=val_df,
    source_max_token_len=512,
    target_max_token_len=128,
    batch_size=4,
    max_epochs=5,
    learning_rate=1e-4,
    outputdir="outputs/t5gemma_sft",
    save_strategy="epoch",
    save_total_limit=2,
)

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Map:   0%|          | 0/16 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,9.955886,9.001279,8.7649,0.0,8.7649,8.7649
2,7.426774,8.053118,4.8214,0.0,4.8214,4.8214
3,5.819927,7.52078,6.0587,0.0,6.0587,6.0587
4,3.373345,7.925241,2.2485,0.0,2.2485,2.2485
5,2.78342,8.417527,3.1408,0.3472,2.8118,2.8118


In [14]:
model.load_model("outputs/t5gemma_sft/checkpoint-75", use_gpu=True)
print(model.predict("translate English to French: Good Morning!"))

Loading weights:   0%|          | 0/228 [00:00<?, ?it/s]

["L'innovation stimule le succes.ici vendredi.on jamais.est est la fort des opportunites.on.on.on.on. intensification.on.on.ens. innovation. training. crops. enhances battery. fusion. plusite. and retail. collaboration. organizational agility. productivity. cle. companies. genomic.. reuse..pes work. computers. reducing emissions. improving personalized.. manufacturing. services. treatments. communication. global. targeted.. investments.. connectivity... ecosystems...... applications...................... farming........................................................................................................................................................................................................................................................................................................................................................................................"]


## SimPO

In [15]:
train_df = pd.read_csv("data/simpo_train.csv")
eval_df = pd.read_csv("data/simpo_val.csv")

In [16]:
model.train_simpo(
    train_df=train_df,
    eval_df=eval_df,
    beta=2.0,                    # SimPO uses higher beta than DPO
    simpo_gamma=0.5,             # Target reward margin
    label_smoothing=0.0,
    max_length=512,
    max_prompt_length=256,
    batch_size=4,
    max_epochs=3,
    learning_rate=5e-7,
    finetuning="lora",
    outputdir="outputs/simpo",
)

trainable params: 1,572,864 || all params: 314,090,496 || trainable%: 0.5008


  self.trainer = Seq2SeqCPOTrainerCompat(


Map:   0%|          | 0/80 [00:00<?, ? examples/s]

Map:   0%|          | 0/80 [00:00<?, ? examples/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Map:   0%|          | 0/80 [00:00<?, ? examples/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Runtime,Samples Per Second,Steps Per Second,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/rejected,Logps/chosen,Logits/rejected,Logits/chosen,Nll Loss
1,7.53125,9.11875,0.3027,66.067,16.517,-17.375,-26.15,0.95,8.74375,-13.075,-8.6875,7.25625,-1.633887,8.81875
2,8.5625,9.1125,0.3034,65.93,16.482,-17.35,-26.125,0.95,8.76875,-13.0625,-8.675,7.2625,-1.622852,8.8125
3,9.1875,9.10625,0.3056,65.447,16.362,-17.35,-26.125,0.95,8.7625,-13.0625,-8.675,7.2625,-1.616943,8.80625


In [17]:
print(model.predict("translate English to French: Good Morning!"))

["L'innovation stimule le succes.onon jamais.est en une opportunites.ici vendredi. intensification. agility.arrete jamais. innovation. crops. advances. enhances access. organizational agility. productivity. connectivity. genomic. fusion. manufacturing and healthcare. reuse. collaboration. computers. services. adoption. improving battery. training. companies. reducing pollution. personalized. treatments. farming. faster diagnosis. communication. industry. processing. global. applications. prioriti. targeted.. investments. ecosystems. cities....... plus for solutions...................... exploration........................................................................................................................................................................................................................................................................................................................................................................................"]


# SFT + RFT

In [18]:
from simplet5_trl import SimpleT5_TRL
import pandas as pd

In [19]:
train_df = pd.read_csv("data/sft_train.csv")
val_df = pd.read_csv("data/sft_val.csv")

In [20]:
# Initialize and load model
model = SimpleT5_TRL()
model.from_pretrained("google-t5/t5-small")

Loading weights:   0%|          | 0/131 [00:00<?, ?it/s]

In [21]:
model.train(
    train_df=train_df,
    eval_df=val_df,
    source_max_token_len=512,
    target_max_token_len=128,
    batch_size=4,
    max_epochs=5,
    learning_rate=1e-4,
    outputdir="outputs/t5_sft",
    save_strategy="epoch",
    save_total_limit=2,
)

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Map:   0%|          | 0/16 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.509808,1.7442,62.8488,39.1475,62.8488,62.8488
2,2.161224,1.64276,66.0504,41.3722,66.0504,66.0504
3,1.64264,1.600037,70.2658,46.0059,70.2658,70.2658
4,1.507864,1.583732,70.2658,46.0059,70.2658,70.2658
5,1.840633,1.574657,70.2658,46.0059,70.2658,70.2658


In [22]:
model.load_model("outputs/t5_sft/checkpoint-75", use_gpu=True)
print(model.predict("translate English to French: Good Morning!"))

Loading weights:   0%|          | 0/131 [00:00<?, ?it/s]

['Bonjour!']


## RFT

In [23]:
train_df = pd.read_csv("data/rft_train.csv")
eval_df = pd.read_csv("data/rft_val.csv")

In [24]:
model.train_rft(
    train_df=train_df,
    eval_df=eval_df,
    max_seq_length=512,
    batch_size=8,
    max_epochs=3,
    learning_rate=2e-5,
    finetuning="lora",
    lora_r=16,
    outputdir="outputs/rft",
)

trainable params: 1,179,648 || all params: 61,686,272 || trainable%: 1.9123


Map:   0%|          | 0/80 [00:00<?, ? examples/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.789991,3.317001,40.0232,23.0701,37.0134,37.0134
2,3.623442,3.312031,40.4546,22.9676,37.4448,37.4448
3,2.651294,3.310171,40.4546,22.9676,37.4448,37.4448


In [25]:
model.load_model("outputs/rft/checkpoint-30", use_gpu=True)
print(model.predict("translate English to French: Good Morning! How are you?"))

Loading weights:   0%|          | 0/131 [00:00<?, ?it/s]

["Le matin, c'est-à-dire que vous êtes bien?"]


# BART/PEGASUS/T5GEMMA2 testing

In [1]:
from simplet5_trl import SimpleT5_TRL
import pandas as pd

In [2]:
def test_model(model_name):
    train_df = pd.read_csv("data/sft_train.csv")
    val_df = pd.read_csv("data/sft_val.csv")
    # Initialize and load model
    model =  SimpleT5_TRL()
    model.from_pretrained(model_name)
    model.train(
    train_df=train_df,
    eval_df=val_df,
    source_max_token_len=512,
    target_max_token_len=128,
    batch_size=4,
    max_epochs=5,
    learning_rate=1e-4,
    outputdir="outputs/t5_sft",
    save_strategy="epoch",
    save_total_limit=2,
    )
    train_df = pd.read_csv("data/rft_train.csv")
    eval_df = pd.read_csv("data/rft_val.csv")
    model.train_rft(
    train_df=train_df,
    eval_df=eval_df,
    max_seq_length=128,
    batch_size=8,
    max_epochs=3,
    learning_rate=2e-5,
    finetuning="lora",
    lora_r=8,
    outputdir="outputs/rft",
    )
    model.load_model("outputs/rft/checkpoint-30", use_gpu=True)
    print(model.predict("translate English to French: Good Morning! How are you?"))

In [3]:
test_model("facebook/bart-base")

Loading weights:   0%|          | 0/259 [00:00<?, ?it/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Map:   0%|          | 0/16 [00:00<?, ? examples/s]

2025-12-30 20:44:40.158861: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,2.806641,2.452336,40.3147,25.0343,40.3147,40.3147
2,1.650561,2.526084,38.6469,20.2004,37.9525,37.9525
3,1.486393,2.399581,42.5127,23.2647,40.7357,40.7357
4,1.307119,2.343822,36.9133,17.3487,34.928,34.928
5,0.434856,2.361337,39.2487,18.6212,36.2217,36.2217


trainable params: 884,736 || all params: 140,305,152 || trainable%: 0.6306


Map:   0%|          | 0/80 [00:00<?, ? examples/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,4.438905,4.486356,26.4494,10.1398,22.9738,22.9738
2,3.953916,4.443602,25.6259,8.4732,21.562,21.562
3,4.737692,4.428769,25.6259,8.4732,21.562,21.562


Loading weights:   0%|          | 0/259 [00:00<?, ?it/s]

['translate English to French: Good Morning! How are you?']


In [4]:
test_model("google/pegasus-xsum")

Loading weights:   0%|          | 0/678 [00:00<?, ?it/s]

PegasusForConditionalGeneration LOAD REPORT from: google/pegasus-xsum
Key                                  | Status  | 
-------------------------------------+---------+-
model.decoder.embed_positions.weight | MISSING | 
model.encoder.embed_positions.weight | MISSING | 

Notes:
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.


Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Map:   0%|          | 0/16 [00:00<?, ? examples/s]

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None}.


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,3.507122,3.077482,38.5459,18.6398,37.8106,37.8106
2,2.169777,2.784922,44.9608,19.2021,44.9608,44.9608
3,3.043738,2.688685,47.7488,23.7644,47.7488,47.7488
4,2.625656,2.629913,48.3223,22.254,48.3223,48.3223
5,1.585173,2.61732,47.3753,22.254,47.3753,47.3753


trainable params: 3,145,728 || all params: 572,894,208 || trainable%: 0.5491


Map:   0%|          | 0/80 [00:00<?, ? examples/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,3.377697,3.276729,25.3399,9.1482,24.1267,24.1267
2,3.379344,3.27193,25.3399,9.1482,24.1267,24.1267
3,3.517129,3.27012,25.3399,9.1482,24.1267,24.1267


Loading weights:   0%|          | 0/678 [00:00<?, ?it/s]

PegasusForConditionalGeneration LOAD REPORT from: google/pegasus-xsum
Key                                  | Status  | 
-------------------------------------+---------+-
model.decoder.embed_positions.weight | MISSING | 
model.encoder.embed_positions.weight | MISSING | 

Notes:
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.


['How are you?']


In [3]:
test_model("google/t5gemma-2-270m-270m")

Loading weights:   0%|          | 0/911 [00:00<?, ?it/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Map:   0%|          | 0/16 [00:00<?, ? examples/s]

2025-12-30 20:49:50.945706: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,4.186065,3.351251,7.5249,1.9866,6.5467,6.5467
2,1.921416,3.73785,6.8505,1.2239,6.2427,6.2427
3,0.80453,4.083356,6.0039,1.6963,5.642,5.642
4,0.361469,3.786724,5.3051,1.6282,4.9678,4.9678
5,0.115626,3.918174,5.6961,1.3785,5.1966,5.1966


trainable params: 2,967,552 || all params: 788,996,848 || trainable%: 0.3761


Map:   0%|          | 0/80 [00:00<?, ? examples/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,5.854585,6.060541,4.1656,1.2174,4.0406,4.0406
2,5.160693,5.71316,5.9905,1.4477,5.0639,5.0639
3,5.907971,5.591557,3.6042,1.2188,3.4188,3.4188


Loading weights:   0%|          | 0/911 [00:00<?, ?it/s]

[" Are you feeling better? What are you doing today? How are you? I hope you feel better. Have a good day! If you have any questions or comments, please don't hesitate to contact me at 610-238-4759. Thank you for your business and look forward to hearing from you soon."]
