# Fine-tune T5 for Payment Intent Extraction
This notebook shows how to fine-tune a T5 model on a dataset of natural language payment commands.

In [None]:
!pip install transformers datasets accelerate -q

In [None]:
from datasets import load_dataset, Dataset
import json

# Load JSONL into HuggingFace Dataset
with open('payment_intent_parser/fine_tune_data.jsonl') as f:
    data = [json.loads(line) for line in f]

dataset = Dataset.from_list(data)
dataset = dataset.train_test_split(test_size=0.2)

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, TrainingArguments, Trainer
tokenizer = T5Tokenizer.from_pretrained('t5-small')

# Tokenization
def preprocess(example):
    input_text = 'extract intent: ' + example['prompt']
    target_text = example['completion']
    return tokenizer(input_text, text_target=target_text, truncation=True)

tokenized = dataset.map(preprocess, remove_columns=dataset['train'].column_names)

In [None]:
model = T5ForConditionalGeneration.from_pretrained('t5-small')

training_args = TrainingArguments(
    output_dir='./results',
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=10,
    logging_dir='./logs',
    evaluation_strategy='epoch',
    save_strategy='epoch',
    logging_steps=10,
    fp16=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized['train'],
    eval_dataset=tokenized['test']
)

trainer.train()

In [None]:
# Inference example
def predict(text):
    inputs = tokenizer('extract intent: ' + text, return_tensors='pt')
    outputs = model.generate(**inputs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

predict('Pay Sarah 100 bucks')