In [1]:
import pandas as pd
df = pd.read_csv('data/intents_data.csv')
label_id_mp = df[['label','label_name']].drop_duplicates().sort_values('label')

id2label = dict(zip(label_id_mp.label.astype('str'), label_id_mp.label_name))

label2id = dict(zip(label_id_mp.label_name, label_id_mp.label))


In [12]:
from transformers import DistilBertTokenizer, DistilBertModel, AutoModelForSequenceClassification, AutoTokenizer
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased',
                                                            num_labels=len(id2label), 
                                                            id2label=id2label,
                                                            label2id=label2id).to(device)
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')

text = "wanna activate the card"
encoded_input = tokenizer(text, return_tensors='pt').to(device)
output = model(**encoded_input)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [48]:
from datasets import load_dataset, Dataset

dataset = load_dataset('data',data_files= 'intents_data.csv', split= None)
ds = dataset['train'].train_test_split(0.3)
ds = ds.remove_columns('label_name')



def tokenize_function(examples):
    return tokenizer(examples["text"], padding=True, truncation=True)


tokenized_datasets = ds.map(tokenize_function, batched=True)

Map: 100%|██████████| 9158/9158 [00:00<00:00, 11540.51 examples/s]
Map: 100%|██████████| 3925/3925 [00:00<00:00, 14760.16 examples/s]


In [33]:
ds['train'][1]

{'text': "What can I do if my card still hasn't arrived after 2 weeks?",
 'label': 11}

In [36]:
import numpy as np
import evaluate

metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [57]:
from transformers import Trainer, TrainingArguments

batch_size = 16
logging_steps = len(ds["train"]) // batch_size
model_name = "distilbert-finetuned-intents"
training_args = TrainingArguments(
    output_dir=model_name,
    num_train_epochs=2,
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    disable_tqdm=False,
    logging_steps=logging_steps,
    log_level="error"
)

trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer
)

In [58]:
trainer.train()

 50%|████▉     | 572/1146 [03:06<03:12,  2.98it/s]
 50%|█████     | 573/1146 [03:06<02:42,  3.53it/s]

{'loss': 3.3275, 'learning_rate': 1.0017452006980804e-05, 'epoch': 1.0}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A                                           
                                                  
 50%|█████     | 573/1146 [03:29<02:42,  3.53it/s]
[A

{'eval_loss': 2.3076109886169434, 'eval_accuracy': 0.6504458598726115, 'eval_runtime': 22.6701, 'eval_samples_per_second': 173.136, 'eval_steps_per_second': 10.851, 'epoch': 1.0}


100%|█████████▉| 1144/1146 [06:40<00:00,  3.00it/s] 
100%|█████████▉| 1144/1146 [06:40<00:00,  3.00it/s]

{'loss': 1.9873, 'learning_rate': 3.490401396160559e-08, 'epoch': 2.0}


100%|██████████| 1146/1146 [06:40<00:00,  3.71it/s]
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A                                           
                                                   
100%|██████████| 1146/1146 [07:04<00:00,  3.71it/s]
[A
100%|██████████| 1146/1146 [07:04<00:00,  2.70it/s]

{'eval_loss': 1.7148301601409912, 'eval_accuracy': 0.7635668789808917, 'eval_runtime': 23.4762, 'eval_samples_per_second': 167.191, 'eval_steps_per_second': 10.479, 'epoch': 2.0}
{'train_runtime': 424.3493, 'train_samples_per_second': 43.163, 'train_steps_per_second': 2.701, 'train_loss': 2.655939983863897, 'epoch': 2.0}





TrainOutput(global_step=1146, training_loss=2.655939983863897, metrics={'train_runtime': 424.3493, 'train_samples_per_second': 43.163, 'train_steps_per_second': 2.701, 'train_loss': 2.655939983863897, 'epoch': 2.0})

In [60]:
metric.compute()

ValueError: Evaluation module cache file doesn't exist. Please make sure that you call `add` or `add_batch` at least once before calling `compute`.

In [54]:
torch.cuda.empty_cache()
import gc

gc.collect()

3989

In [55]:
torch.cuda.memory_summary(device=None, abbreviated=False)

