# RTE (Recognizing Textual Entailment) with DeBERTa
## Using a pretrained DeBERTa model fine-tuned on MNLI for zero-shot text classification on SNLI
Inspired by Keras code example [Semantic Similarity with BERT](https://keras.io/examples/nlp/semantic_similarity_with_bert/)

## Setup

In [26]:
# !pip install transformers
# !pip install torch
# !pip install pandas

In [1]:
import pandas as pd

## Download and read SNLI dataset

In [9]:
# !curl -LO https://raw.githubusercontent.com/MohamadMerchant/SNLI/master/data.tar.gz
# !tar -xvzf data.tar.gz

In [2]:
# There are more than 550k samples in total; we will use 100k for this example.
train_df = pd.read_csv("SNLI_Corpus/snli_1.0_train.csv", nrows=100000)
valid_df = pd.read_csv("SNLI_Corpus/snli_1.0_dev.csv")
test_df = pd.read_csv("SNLI_Corpus/snli_1.0_test.csv")

# Shape of the data
print(f"Total train samples : {train_df.shape[0]}")
print(f"Total validation samples: {valid_df.shape[0]}")
print(f"Total test samples: {valid_df.shape[0]}")

print(train_df.dtypes)

train_df.head()

Total train samples : 100000
Total validation samples: 10000
Total test samples: 10000
similarity    object
sentence1     object
sentence2     object
dtype: object


Unnamed: 0,similarity,sentence1,sentence2
0,neutral,A person on a horse jumps over a broken down a...,A person is training his horse for a competition.
1,contradiction,A person on a horse jumps over a broken down a...,"A person is at a diner, ordering an omelette."
2,entailment,A person on a horse jumps over a broken down a...,"A person is outdoors, on a horse."
3,neutral,Children smiling and waving at camera,They are smiling at their parents
4,entailment,Children smiling and waving at camera,There are children present


## Explore examples

In [3]:
import random

idx = random.randint(0, len(train_df))

print(f"Sentence1: {train_df.loc[idx, 'sentence1']}")
print(f"Sentence2: {train_df.loc[idx, 'sentence2']}")
print(f"Similarity: {train_df.loc[idx, 'similarity']}")

Sentence1: A little blond-haired girls peers at the camera through the leaves on the tree that she sits on.
Sentence2: A little girl lookig at camera.
Similarity: entailment


## Preprocess data

In [4]:
print(train_df.similarity.unique())

['neutral' 'contradiction' 'entailment' '-']


In [5]:
train_df.similarity = train_df.similarity.str.upper()
valid_df.similarity = valid_df.similarity.str.upper()
test_df.similarity = test_df.similarity.str.upper()

test_df.head()

Unnamed: 0,similarity,sentence1,sentence2
0,NEUTRAL,This church choir sings to the masses as they ...,The church has cracks in the ceiling.
1,ENTAILMENT,This church choir sings to the masses as they ...,The church is filled with song.
2,CONTRADICTION,This church choir sings to the masses as they ...,A choir singing at a baseball game.
3,NEUTRAL,"A woman with a green headscarf, blue shirt and...",The woman is young.
4,ENTAILMENT,"A woman with a green headscarf, blue shirt and...",The woman is very happy.


In [8]:
# We have some NaN entries in our train data, we will simply drop them.
print("Number of missing values")
print('Training set')
print(train_df.isnull().sum())
print('Validation set')
print(valid_df.isnull().sum())
print('Test set')
print(test_df.isnull().sum())

train_df.dropna(axis=0, inplace=True)

Number of missing values
Training set
similarity    0
sentence1     0
sentence2     3
dtype: int64
Validation set
similarity    0
sentence1     0
sentence2     0
dtype: int64
Test set
similarity    0
sentence1     0
sentence2     0
dtype: int64


In [6]:
print("Train Target Distribution")
print(train_df.similarity.value_counts())

print("Validation Target Distribution")
print(valid_df.similarity.value_counts())

print("Test Target Distribution")
print(test_df.similarity.value_counts())

Train Target Distribution
ENTAILMENT       33385
CONTRADICTION    33311
NEUTRAL          33194
-                  110
Name: similarity, dtype: int64
Validation Target Distribution
ENTAILMENT       3329
CONTRADICTION    3278
NEUTRAL          3235
-                 158
Name: similarity, dtype: int64
Test Target Distribution
ENTAILMENT       3368
CONTRADICTION    3237
NEUTRAL          3219
-                 176
Name: similarity, dtype: int64


In [7]:
train_df = (
    train_df[train_df.similarity != "-"]
    .sample(frac=1.0, random_state=42)
    .reset_index(drop=True)
)
valid_df = (
    valid_df[valid_df.similarity != "-"]
    .sample(frac=1.0, random_state=42)
    .reset_index(drop=True)
)
test_df = (
    test_df[test_df.similarity != "-"]
    .sample(frac=1.0, random_state=42)
    .reset_index(drop=True)
)

## Build and test model

In [77]:
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline

HUB_MODEL_CKPT = 'microsoft/deberta-base-mnli'
LOCAL_MODEL_CKPT = './deberta-base-mnli-finetuned-snli/checkpoint-63'


def _pack_pair(pair):
    return f'[CLS] {pair[0]} [SEP] {pair[1]} [SEP]'
    # return f'{sentence1} {sentence2}'

def model_fn():
    tokenizer = AutoTokenizer.from_pretrained(HUB_MODEL_CKPT)
    model = AutoModelForSequenceClassification.from_pretrained(HUB_MODEL_CKPT)
    return pipeline(task='text-classification', model=model, tokenizer=tokenizer)

def predict_fn(pairs_list, model):
    packed_pairs = [_pack_pair(pair) for pair in pairs_list]
    return model(packed_pairs)

def output_fn(predictions):
    return np.array([d['label'] for d in predictions])

In [78]:
model = model_fn()

Some weights of the model checkpoint at microsoft/deberta-base-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [79]:
end_idx=2
sentence_pairs_list = train_df[["sentence1", "sentence2"]].values.astype("str")[:end_idx].tolist()

print(f"Sentence pairs: {sentence_pairs_list}")
print(f"Similarity labels: {train_df['similarity'].values[:end_idx].tolist()}")

preds = predict_fn(sentence_pairs_list, model)
print(preds)

Sentence pairs: [['A woman is using toy which blows giant bubbles.', 'A little girl is playing with chalk on a driveway.'], ['A young Asian girl holds a stuffed cat toy in a classroom.', 'A young Asian girl sits in class with a stuffed cat toy, the only surviving possession remaining after the tsunami.']]
Similarity labels: ['CONTRADICTION', 'NEUTRAL']
[{'label': 'CONTRADICTION', 'score': 0.9961360096931458}, {'label': 'NEUTRAL', 'score': 0.9989674091339111}]


  query_layer = query_layer / torch.tensor(scale, dtype=query_layer.dtype)
  p2c_att = torch.matmul(key_layer, torch.tensor(pos_query_layer.transpose(-1, -2), dtype=key_layer.dtype))


## Evaluation on test set

In [45]:
def evaluate(evaluation_df, n_samples, model):
    
    if n_samples:
        sampled_df = evaluation_df.sample(n_samples, random_state=None)
    else:
        sampled_df = evaluation_df.copy()
    y_pred = output_fn(predict_fn(sampled_df[["sentence1", "sentence2"]].values.astype("str").tolist(), model))
    y_true = sampled_df.similarity.values
    test_acc = np.mean(np.float32(y_true == y_pred))
    print(f'Test accuracy: {test_acc:.3f}')


In [82]:
%%time
evaluate(test_df, None, model)


  query_layer = query_layer / torch.tensor(scale, dtype=query_layer.dtype)
  p2c_att = torch.matmul(key_layer, torch.tensor(pos_query_layer.transpose(-1, -2), dtype=key_layer.dtype))


Test accuracy: 0.852
CPU times: user 1h 29s, sys: 1min 25s, total: 1h 1min 54s
Wall time: 11min 13s
