In [1]:
import warnings
warnings.filterwarnings("ignore")

# 1 Dataset

In [2]:
! pip install transformers datasets seqeval evaluate accelerate -U

Collecting transformers
  Downloading transformers-4.37.2-py3-none-any.whl (8.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.17.0-py3-none-any.whl (536 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m536.6/536.6 kB[0m [31m39.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.27.0-py3-none-any.whl (279 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m

In [3]:
from datasets import load_dataset
dataset = load_dataset("AmazonScience/massive", "fa-IR")

Downloading data:   0%|          | 0.00/908k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/180k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/249k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/11514 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2033 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2974 [00:00<?, ? examples/s]

In [4]:
intent_classes = dataset['train'].features['intent'].names
intent_classes = [' '.join(intent_class.split('_')) for intent_class in intent_classes]

In [5]:
label2id = dict(zip(intent_classes, range(len(intent_classes))))
id2label = dict(zip(range(len(intent_classes)), intent_classes))

# 2 Training

In [6]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, T5ForConditionalGeneration, DataCollatorWithPadding, AdamW

In [7]:
model_name = "Ahmad/parsT5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.02M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/678 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

In [8]:
def intent_id2label(example):
    if id2label.get(example['intent']):
      example["intent_class"] = id2label[example['intent']]
    return example

dataset = dataset.map(intent_id2label)

Map:   0%|          | 0/11514 [00:00<?, ? examples/s]

Map:   0%|          | 0/2033 [00:00<?, ? examples/s]

Map:   0%|          | 0/2974 [00:00<?, ? examples/s]

In [9]:
train_dataset_inputs = tokenizer(dataset['train']['intent_class'], return_tensors='pt', truncation=True, max_length=128, padding=True).input_ids
train_dataset_labels = tokenizer(dataset['train']['utt'], return_tensors='pt', truncation=True, max_length=128, padding=True).input_ids

In [10]:
train_dataset_inputs.shape, train_dataset_labels.shape

(torch.Size([11514, 10]), torch.Size([11514, 48]))

In [11]:
from tqdm import tqdm

num_examples = train_dataset_inputs.shape[0]
num_epochs = 3
learning_rate = 1e-4
batch_size = 64

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = AdamW(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    # Assign the tqdm instance to a variable
    tqdm_iterator = tqdm(range(0, num_examples, batch_size), desc=f'Epoch {epoch + 1}/{num_epochs}')

    for start in tqdm_iterator:
        end = start + batch_size
        if end > num_examples:
            end = num_examples

        inputs = train_dataset_inputs[start:end].to(device)
        labels = train_dataset_labels[start:end].to(device)

        optimizer.zero_grad()

        loss = model(input_ids=inputs, labels=labels).loss
        loss.backward()

        optimizer.step()

        # Use the tqdm instance to update the desc parameter
        tqdm_iterator.set_postfix(loss=loss.item())


Epoch 1/3: 100%|██████████| 180/180 [02:45<00:00,  1.09it/s, loss=7.71]
Epoch 2/3: 100%|██████████| 180/180 [02:47<00:00,  1.07it/s, loss=7.56]
Epoch 3/3: 100%|██████████| 180/180 [02:46<00:00,  1.08it/s, loss=7.12]


In [12]:
print(f"Final Loss: {loss.item()}")

Final Loss: 7.117866039276123


In [13]:
# saving the trained model
model.save_pretrained("parsT5-finetuned_intent")
tokenizer.save_pretrained("parsT5-finetuned_intent")

('parsT5-finetuned_intent/tokenizer_config.json',
 'parsT5-finetuned_intent/special_tokens_map.json',
 'parsT5-finetuned_intent/tokenizer.json')

In [14]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [15]:
# push the model to hub
model.push_to_hub("parsT5-finetuned_intent")
tokenizer.push_to_hub("parsT5-finetuned_intent")

ValueError: Token is required (write-access action) but no token found. You need to provide a token or be logged in to Hugging Face with `huggingface-cli login` or `huggingface_hub.login`. See https://huggingface.co/settings/tokens.

# 3 Test

In [16]:
# manual test 5 from test dataset
print('-'*100)
for i in range(5):
    test_intent = dataset['test'][i]['intent_class']
    test_input = tokenizer(test_intent, return_tensors='pt', truncation=True, max_length=128, padding=True).input_ids
    test_input = test_input.to(device)
    model.eval()
    test_output = model.generate(input_ids=test_input)

    print("Intent:", test_intent)
    print("Real:", dataset['test'][i]['utt'])
    print("Generate:", tokenizer.decode(test_output[0]))
    print('-'*100)

----------------------------------------------------------------------------------------------------
Intent: alarm set
Real: این هفته ساعت پنج صبح بیدارم کن
Generate: <pad> زنگ خطر را به من بگو</s>
----------------------------------------------------------------------------------------------------
Intent: audio volume mute
Real: ساکت
Generate: <pad></s>
----------------------------------------------------------------------------------------------------
Intent: iot hue lightchange
Real: صورتی همان چیزی است که نیاز داریم
Generate: <pad> یک ساعته که در ساعت پنج بامداد به من نزدیک شد</s>
----------------------------------------------------------------------------------------------------
Intent: iot hue lighton
Real: و تاریک شده است
Generate: <pad></s>
----------------------------------------------------------------------------------------------------
Intent: iot hue lightoff
Real: علی چراغ‌های اتاق خواب را خاموش کن
Generate: <pad> یک ساعته را به من نشان بده</s>
----------------------------

# 4 Score

In [17]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [18]:
! pip install sacrebleu rouge-score bert-score

Collecting sacrebleu
  Downloading sacrebleu-2.4.0-py3-none-any.whl (106 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/106.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.3/106.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=r

In [19]:
import sacrebleu
import evaluate
from bert_score import score

In [20]:
# Load the fine-tuned model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("parsT5-finetuned_intent")
model = T5ForConditionalGeneration.from_pretrained("parsT5-finetuned_intent")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [21]:
num_tests = 100

# Define reference and hypothesis lists for evaluation
references = dataset['test']['utt'][:num_tests]
hypotheses = []

# Set the model to evaluation mode
model.eval()
model.to(device)

# Generate hypotheses for each intent
for intent in dataset['test']['intent_class'][:num_tests]:
    input = tokenizer(intent, return_tensors='pt', truncation=True, max_length=128, padding=True).input_ids.to(device)
    generated_text = model.generate(input_ids=input)
    hypotheses.append(tokenizer.decode(generated_text[0]))

In [22]:
# BLEU scores
bleu = sacrebleu.corpus_bleu(hypotheses, [references], force=True)
print(f"BLEU-1: {bleu.precisions[0]:.4f}, BLEU-2: {bleu.precisions[1]:.4f}, BLEU-3: {bleu.precisions[2]:.4f}, BLEU-4: {bleu.precisions[3]:.4f}")

BLEU-1: 2.9641, BLEU-2: 0.4230, BLEU-3: 0.1848, BLEU-4: 0.0509


In [23]:
# Rouge scores
rouge = evaluate.load('rouge')
results = rouge.compute(predictions=hypotheses, references=references)
print(results)

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

{'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0, 'rougeLsum': 0.0}


In [24]:
# Bert-Score
P, R, F1 = score(hypotheses, references, lang='en', verbose=True)
print(f"Bert-Precision: {P.mean().item():.4f}, Bert-Recall: {R.mean().item():.4f}, Bert-F1: {F1.mean().item():.4f}")

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/2 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/2 [00:00<?, ?it/s]

done in 1.45 seconds, 69.14 sentences/sec
Bert-Precision: 0.8668, Bert-Recall: 0.8327, Bert-F1: 0.8486


# 5 Accuracy

In [25]:
from transformers import AutoTokenizer, T5ForConditionalGeneration

In [26]:
model_name = "infinity2357/parsT5-finetuned_intent"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/21.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.06M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/796 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

In [30]:
import torch
from transformers import AutoTokenizer, T5ForConditionalGeneration
from sklearn.metrics import accuracy_score

# Load the generator model
generator_model_name = "infinity2357/parsT5-finetuned_intent"
generator_tokenizer = AutoTokenizer.from_pretrained(generator_model_name)
generator_model = T5ForConditionalGeneration.from_pretrained(generator_model_name)

from transformers import BertForSequenceClassification

classifier_model_name = "bert-base-uncased"
classifier_tokenizer = AutoTokenizer.from_pretrained(classifier_model_name)
classifier_model = BertForSequenceClassification.from_pretrained(classifier_model_name)


# Number of test samples
num_tests = 100

# Initialize lists for true and predicted intent_classes
true_intent_classes = []
predicted_intent_classes = []

# Set the models to evaluation mode
generator_model.eval()
classifier_model.eval()

# Generate and classify intents
for intent in dataset['test']['intent_class'][:num_tests]:
    # Generate text using the generator model
    generated_input = generator_tokenizer(intent, return_tensors='pt', truncation=True, max_length=128, padding=True).input_ids
    generated_text = generator_model.generate(input_ids=generated_input)
    generated_text_decoded = generator_tokenizer.decode(generated_text[0])

   # Classify intent using the classifier model
    classified_input = classifier_tokenizer(generated_text_decoded, return_tensors='pt', truncation=True, max_length=128, padding=True).input_ids
    output = classifier_model(classified_input)
    predicted_class = output.logits.argmax(dim=1).item()


    true_intent_classes.append(label2id[intent])
    predicted_intent_classes.append(predicted_class)

# Calculate accuracy
accuracy = accuracy_score(true_intent_classes, predicted_intent_classes)
print(f"Accuracy: {accuracy * 100:.2f}%")


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Accuracy: 3.00%
