# 💳 FLAN-T5 for Payment Intent and Slot Extraction
This notebook fine-tunes `google/flan-t5-base` to extract payment-related intents and slots from user commands like "Send $200 to Alice tomorrow".

In [2]:
!pip install transformers datasets evaluate --quiet

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments
from datasets import Dataset
import random

In [4]:
examples = [
    {
        'input': 'Send $200 to Alice tomorrow',
        'target': 'Intent: SendMoney | Slots: amount=$200, recipient=Alice, date=tomorrow'
    },
    {
        'input': 'Pay John 150 for dinner',
        'target': 'Intent: SendMoney | Slots: amount=150, recipient=John, reason=dinner'
    },
    {
        'input': 'Request $75 from Mike for groceries',
        'target': 'Intent: RequestMoney | Slots: amount=$75, sender=Mike, reason=groceries'
    },
    {
        'input': 'Remind me to pay rent on the 1st',
        'target': 'Intent: SetReminder | Slots: reason=rent, date=1st'
    },
        {
        'input': 'Add expense of $20 to group Travel Friends',
        'target': 'Intent: AddExpense | Slots: amount=$20, recipient=Travel Friends'
    },
    {
        'input': 'Add rent of $1200 to House Bills group',
        'target': 'Intent: AddExpense | Slots: amount=$1200, recipient=House Bills group'
    },
    {
        'input': 'Add a $350 software charge to Startup Budget group',
        'target': 'Intent: AddExpense | Slots: amount=$350, recipient=Startup Budget'
    },
]
dataset = Dataset.from_list(examples)

In [5]:
model_name = 'google/flan-t5-base'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [8]:
def preprocess(example):
    model_input = tokenizer(example['input'], truncation=True, padding='max_length', max_length=64)
    labels = tokenizer(example['target'], truncation=True, padding='max_length', max_length=64)
    model_input['labels'] = labels['input_ids']
    return model_input

tokenized_dataset = dataset.map(preprocess)

Map:   0%|          | 0/7 [00:00<?, ? examples/s]

In [12]:
training_args = Seq2SeqTrainingArguments(
    output_dir='./results',
    eval_strategy='no',
    per_device_train_batch_size=4,
    num_train_epochs=20,
    logging_steps=10,
    save_steps=10,
    save_total_limit=1
)

In [None]:
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset
)
trainer.train()

Step,Training Loss
10,6.4775


In [None]:
input_text = 'Transfer 300 dollars to Mom next week'
inputs = tokenizer(input_text, return_tensors='pt')
outputs = model.generate(**inputs)
print('Prediction:', tokenizer.decode(outputs[0], skip_special_tokens=True))

Prediction: Mom transfers $600 to Mom next week.


In [None]:
input_text = "Pay 300 to her again for groceries"
inputs = tokenizer(input_text, return_tensors='pt')
outputs = model.generate(**inputs)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Pay 300 to her again for groceries
