In [1]:
train_sentences = [
    {"text": ["How", "do", "I", "turn", "on", "setting_brightness", "?"], "ner": ["O", "O", "O", "state", "state", "setting_name", "O"]},
    {"text": ["I", "want", "to", "turn", "setting_volume", "off", "."], "ner": ["O", "O", "O", "state", "setting_name", "state", "O"]},
    {"text": ["Can", "you", "please", "turn", "setting_wifi", "on", "?"], "ner": ["O", "O", "O", "state", "setting_name", "state", "O"]},
    {"text": ["Turn", "off", "setting_bluetooth", "please", "."], "ner": ["state", "state", "setting_name", "O", "O"]},
    {"text": ["Enable", "setting_notifications", "for", "me", "."], "ner": ["state", "setting_name", "O", "O", "O"]},
    {"text": ["I", "would", "like", "to", "disable", "setting_location", "services", "."], "ner": ["O", "O", "O", "O", "state", "setting_name", "setting_name", "O"]},
    {"text": ["How", "can", "I", "change", "the", "setting_screen_timeout", "?"], "ner": ["O", "O", "O", "O", "O", "setting_name", "O"]},
    {"text": ["I", "want", "to", "switch", "setting_vpn", "on", "."], "ner": ["O", "O", "O", "state", "setting_name", "state", "O"]},
    {"text": ["Please", "turn", "setting_autoupdate", "off", "for", "me", "."], "ner": ["O", "state", "setting_name", "state", "O", "O", "O"]},
    {"text": ["Can", "I", "enable", "setting_mobile_data", "?"], "ner": ["O", "O", "state", "setting_name", "O"]},
    {"text": ["I", "don't", "want", "setting_autosync", "to", "be", "on", "anymore", "."], "ner": ["O", "O", "O", "setting_name", "O", "O", "state", "O", "O"]},
    {"text": ["Turn", "setting_flight_mode", "off", "."], "ner": ["state", "setting_name", "state", "O"]},
    {"text": ["How", "to", "switch", "setting_dnd", "off", "?"], "ner": ["O", "O", "state", "setting_name", "state", "O"]},
    {"text": ["Can", "I", "turn", "setting_gps", "on", "?"], "ner": ["O", "O", "state", "setting_name", "state", "O"]},
    {"text": ["Enable", "setting_hotspot", "please", "."], "ner": ["state", "setting_name", "O", "O"]},
    {"text": ["Please", "disable", "setting_auto_rotate", "."], "ner": ["O", "state", "setting_name", "O"]},
    {"text": ["How", "do", "I", "turn", "setting_data_roaming", "off", "?"], "ner": ["O", "O", "O", "state", "setting_name", "state", "O"]},
    {"text": ["Switch", "setting_nfc", "on", "."], "ner": ["state", "setting_name", "state", "O"]},
    {"text": ["Turn", "setting_airplane_mode", "off", "."], "ner": ["state", "setting_name", "state", "O"]},
    {"text": ["How", "to", "enable", "setting_dark_mode", "?"], "ner": ["O", "O", "state", "setting_name", "O"]},
    {"text": ["Can", "you", "turn", "setting_screen_lock", "on", "?"], "ner": ["O", "O", "state", "setting_name", "state", "O"]},
]


In [3]:
from transformers import BertForTokenClassification, BertTokenizerFast, TrainingArguments, Trainer
import torch

# Load pre-trained model and tokenizer
model = BertForTokenClassification.from_pretrained('bert-base-uncased', num_labels=3)
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

# Define a mapping from label names to indices
label_to_index = {"O": 0, "setting_name": 1, "state": 2}
index_to_label = {v: k for k, v in label_to_index.items()}

# Function to encode the examples
def encode_example(sentence):
    inputs = tokenizer(sentence['text'], is_split_into_words=True, padding='max_length', truncation=True, max_length=128)
    labels = [-100 if token_id==tokenizer.pad_token_id else label_to_index[label] for token_id, label in zip(inputs['input_ids'], sentence['ner'])]  
    labels += [-100] * (128 - len(labels))  # pad labels to the max length
    inputs['labels'] = labels
    return inputs


# Encode all examples
train_encodings = [encode_example(s) for s in train_sentences]
train_encodings = [ {k: torch.tensor(v) for k, v in enc.items()} for enc in train_encodings]

# Define the training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=20,
    per_device_train_batch_size=16,
    weight_decay=0.01,
)

# Define the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_encodings,
)

# Train the model
trainer.train()

# Function to extract settings and states
def extract_settings_and_states(sentence):
    inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=2)

    predicted_labels = [index_to_label[p] for p in predictions[0].tolist()]

    # Match up the original tokens with their predicted labels
    original_tokens = tokenizer.tokenize(sentence)
    
    # Only include labels that are not 'O'
    entities = [(token, label) for token, label in zip(original_tokens, predicted_labels) if label != 'O']

    return entities

print("Predicted entities:", extract_settings_and_states("How do I turn on setting_brightness?"))


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForTokenClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-u

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


100%|██████████| 40/40 [02:25<00:00,  3.63s/it]

{'train_runtime': 145.1018, 'train_samples_per_second': 2.895, 'train_steps_per_second': 0.276, 'train_loss': 0.23427119255065917, 'epoch': 20.0}
Predicted entities: [('turn', 'state'), ('on', 'state'), ('setting', 'setting_name')]





# Test model

In [5]:
print("Predicted entities:", extract_settings_and_states("How do I turn on brightness?"))
print("Predicted entities:", extract_settings_and_states("How do I turn off hotspot?"))
print("Predicted entities:", extract_settings_and_states("How do I turn on screen_lock?"))
print("Predicted entities:", extract_settings_and_states("How do I turn off wifi?"))


Predicted entities: [('turn', 'state'), ('on', 'state'), ('brightness', 'setting_name')]
Predicted entities: [('turn', 'state'), ('off', 'state'), ('hot', 'setting_name')]
Predicted entities: [('turn', 'state'), ('on', 'setting_name'), ('screen', 'setting_name')]
Predicted entities: [('turn', 'state'), ('off', 'state'), ('wi', 'setting_name')]
