# Etude du Dataset SILICONE


In [60]:
from datasets import load_dataset
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
from tqdm import tqdm
import torch
from tasknet import Adapter
import numpy as np
import evaluate

In [2]:
# Load SILICONE dataset in its Dyda_da config (only 4 possible dialog acts)

silicone_dyda = load_dataset('silicone', 'dyda_da')
dyda_train = silicone_dyda['train']
dyda_valid = silicone_dyda['validation']
dyda_test = silicone_dyda['test']

Found cached dataset silicone (C:/Users/robin/.cache/huggingface/datasets/silicone/dyda_da/1.0.0/af617406c94e3f78da85f7ea74ebfbd3f297a9665cb54adbae305b03bc4442a5)


  0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
dyda_train

Dataset({
    features: ['Utterance', 'Dialogue_Act', 'Dialogue_ID', 'Label', 'Idx'],
    num_rows: 87170
})

In [4]:
dyda_train[0:5]

{'Utterance': ['say , jim , how about going for a few beers after dinner ?',
  'you know that is tempting but is really not good for our fitness .',
  'what do you mean ? it will help us to relax .',
  "do you really think so ? i don't . it will just make us fat and act silly . remember last time ?",
  "i guess you are right.but what shall we do ? i don't feel like sitting at home ."],
 'Dialogue_Act': ['directive',
  'commissive',
  'question',
  'question',
  'question'],
 'Dialogue_ID': ['1', '1', '1', '1', '1'],
 'Label': [1, 0, 3, 3, 3],
 'Idx': [0, 1, 2, 3, 4]}

In [5]:
dyda_test

Dataset({
    features: ['Utterance', 'Dialogue_Act', 'Dialogue_ID', 'Label', 'Idx'],
    num_rows: 7740
})

In [6]:
num_labels = 4
labels = ['commissive', 'directive', 'inform', 'question']
id2label = {0: 'commissive',
            1: 'directive', 
            2: 'inform', 
            3: 'question'
}
label2id = {value: key for key, value in id2label.items()}

In [21]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model_name = 'sileod/deberta-v3-base-tasksource-nli'

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForSequenceClassification.from_pretrained(model_name, 
    ignore_mismatched_sizes=True,
    num_labels=num_labels, id2label=id2label, label2id=label2id
).to(device)

adapter = Adapter.from_pretrained(model_name.replace('nli','adapters'))
model_for_dyda = adapter.adapt_model_to_task(model, 'silicone/dyda_da')

classifier = TextClassificationPipeline(model=model_for_dyda, tokenizer=tokenizer)


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at sileod/deberta-v3-base-tasksource-nli and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([4]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You are using a model of type deberta-v2 to instantiate a model of type . This is not supported for all configurations of models and can yield errors.


In [22]:
dyda_train_df = pd.DataFrame.from_dict(dyda_train[:])
dyda_test_df = pd.DataFrame.from_dict(dyda_test[:])
dyda_test_df.head()

Unnamed: 0,Utterance,Dialogue_Act,Dialogue_ID,Label,Idx
0,"hey man , you wanna buy some weed ?",directive,1,1,0
1,some what ?,question,1,3,1
2,"weed ! you know ? pot , ganja , mary jane some...",directive,1,1,2
3,"oh , umm , no thanks .",commissive,1,0,3
4,i also have blow if you prefer to do a few lin...,directive,1,1,4


In [53]:
model_inputs = tokenizer(dyda_test['Utterance'])
print(type(model_inputs['input_ids']))
model_inputs['input_ids'] = np.array(model_inputs['input_ids'])
print(type(model_inputs['input_ids']))

<class 'list'>
<class 'numpy.ndarray'>


  model_inputs['input_ids'] = np.array(model_inputs['input_ids'])


In [55]:
print(model_inputs[:5])

[Encoding(num_tokens=11, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing]), Encoding(num_tokens=5, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing]), Encoding(num_tokens=17, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing]), Encoding(num_tokens=9, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing]), Encoding(num_tokens=15, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing])]


In [56]:
preds = model_for_dyda(model_inputs)

AttributeError: 

In [54]:
predictions = []
for utterance_enc in tqdm(model_inputs[:100]):
    predictions.append({
        'text': utterance,
        'pred': model_for_dyda(utterance_enc)
    })

  0%|                                                                                          | 0/100 [00:00<?, ?it/s]


AttributeError: 'tokenizers.Encoding' object has no attribute 'size'

In [62]:
predictions = []
for utterance in tqdm(dyda_test['Utterance'][:100]):
    predictions.append({
        'text': utterance,
        'pred': classifier(utterance)
    })

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:45<00:00,  2.21it/s]


In [63]:
predictions[0]

{'text': 'hey man , you wanna buy some weed ?',
 'pred': [{'label': 'question', 'score': 0.3376126289367676}]}

In [64]:
# Analyze predictions
counts = {
    'commissive': 0,
    'directive': 0,
    'inform': 0,
    'question': 0,
}

for prediction in predictions:
    counts[prediction['pred'][0]['label']] += 1

print(counts)

{'commissive': 3, 'directive': 0, 'inform': 95, 'question': 2}


In [65]:
counts_dyda = {
    'commissive': 0,
    'directive': 0,
    'inform': 0,
    'question': 0,
}

for line in dyda_test:
    counts_dyda[line['Dialogue_Act']] += 1

print(counts_dyda)

{'commissive': 718, 'directive': 1278, 'inform': 3534, 'question': 2210}


In [66]:
metric = evaluate.load('accuracy')
metric.compute(predictions=predictions, references=dyda_test['Dialogue_Act'][:100])

ValueError: Predictions and/or references don't match the expected format.
Expected format: {'predictions': Value(dtype='int32', id=None), 'references': Value(dtype='int32', id=None)},
Input predictions: [{'text': 'hey man , you wanna buy some weed ?', 'pred': [{'label': 'question', 'score': 0.3376126289367676}]}, {'text': 'some what ?', 'pred': [{'label': 'inform', 'score': 0.37634843587875366}]}, {'text': 'weed ! you know ? pot , ganja , mary jane some chronic !', 'pred': [{'label': 'inform', 'score': 0.3860337436199188}]}, ..., {'text': "yes , i have booked a room for 24th . it's a double room .", 'pred': [{'label': 'inform', 'score': 0.4091196060180664}]}, {'text': "hold on , please . let me check it for you . yes , you're right . you will keep it for 3 days .", 'pred': [{'label': 'inform', 'score': 0.32679685950279236}]}, {'text': 'well , now i want to change the date from 24th to 28th .', 'pred': [{'label': 'inform', 'score': 0.3827587366104126}]}],
Input references: ['directive', 'question', 'directive', ..., 'inform', 'directive', 'commissive']

# ahmetayrnc/bert-large-cased

In [89]:
model_name2 = 'ahmetayrnc/bert-large-cased'

tokenizer2 = AutoTokenizer.from_pretrained(model_name)

model2 = AutoModelForSequenceClassification.from_pretrained(model_name2, 
    ignore_mismatched_sizes=True,
    num_labels=num_labels, id2label=id2label, label2id=label2id
).to(device)

classifier2 = TextClassificationPipeline(model=model2, tokenizer=tokenizer2)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ahmetayrnc/bert-large-cased and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([46, 1024]) in the checkpoint and torch.Size([4, 1024]) in the model instantiated
- classifier.bias: found shape torch.Size([46]) in the checkpoint and torch.Size([4]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [90]:
predictions = []
for utterance in tqdm(dyda_test['Utterance'][:50]):
    predictions.append({
        'text': utterance,
        'pred': classifier(utterance)
    })

100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [00:10<00:00,  4.72it/s]


In [91]:
counts_dyda = {
    'commissive': 0,
    'directive': 0,
    'inform': 0,
    'question': 0,
}

for line in predictions:
    counts_dyda[line['pred'][0]['label']] += 1

print(counts_dyda)

{'commissive': 0, 'directive': 0, 'inform': 49, 'question': 1}
