In [1]:
# Import Libraries and Load Model/Tokenizer
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd
import torch
from tqdm.auto import tqdm

model_name = "microsoft/deberta-large-mnli"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/729 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.62G [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()
Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


DebertaForSequenceClassification(
  (deberta): DebertaModel(
    (embeddings): DebertaEmbeddings(
      (word_embeddings): Embedding(50265, 1024, padding_idx=0)
      (LayerNorm): DebertaLayerNorm()
      (dropout): StableDropout()
    )
    (encoder): DebertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x DebertaLayer(
          (attention): DebertaAttention(
            (self): DisentangledSelfAttention(
              (in_proj): Linear(in_features=1024, out_features=3072, bias=False)
              (pos_dropout): StableDropout()
              (pos_proj): Linear(in_features=1024, out_features=1024, bias=False)
              (pos_q_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): StableDropout()
            )
            (output): DebertaSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (LayerNorm): DebertaLayerNorm()
              (dropout): StableDropout()
            )
          )
   

In [2]:
# MNLI-m test dataset

df_mnli_matched = pd.read_csv('/kaggle/input/nli-dataset-for-sentence-understanding/mnli_test_matched.csv')

# MNLI-mm test dataset

df_mnli_mismatched = pd.read_csv('/kaggle/input/nli-dataset-for-sentence-understanding/mnli_test_mismatched.csv')

# ANLI test dataset round 1

df_anli_r1 = pd.read_csv('/kaggle/input/anli-a-large-scale-nli-benchmark-dataset/test_r1.csv')

# ANLI test dataset round 2

df_anli_r2 = pd.read_csv('/kaggle/input/anli-a-large-scale-nli-benchmark-dataset/test_r2.csv')

# ANLI test dataset round 3

df_anli_r3 = pd.read_csv('/kaggle/input/anli-a-large-scale-nli-benchmark-dataset/test_r3.csv')

# SNLI test dataset

df_snli = pd.read_csv('/kaggle/input/stanford-natural-language-inference-corpus/snli_1.0_test.csv')

In [3]:
def preprocess_data(df, dataset_type):
    # Explicitly copy the DataFrame to avoid SettingWithCopyWarning
    df = df.copy()
    
    if dataset_type in ['mnli_matched', 'mnli_mismatched']:
        df['premise'] = df['premise'].astype(str)
        df['hypothesis'] = df['hypothesis'].astype(str)
        tokenized_inputs = tokenizer(df['premise'].tolist(), df['hypothesis'].tolist(), padding=True, truncation=True, max_length=512, return_tensors="pt")
    elif dataset_type in ['anli_r1', 'anli_r2', 'anli_r3']:
        df['premise'] = df['premise'].astype(str)
        df['hypothesis'] = df['hypothesis'].astype(str)
        df['reason'] = df.get('reason', '').fillna('').astype(str)  # Safely handle 'reason' column
        concatenated_hypotheses = df['hypothesis'] + " [SEP] " + df['reason']
        tokenized_inputs = tokenizer(df['premise'].tolist(), concatenated_hypotheses.tolist(), padding=True, truncation=True, max_length=512, return_tensors="pt")
    elif dataset_type == 'snli':
        df['sentence1'] = df['sentence1'].astype(str)
        df['sentence2'] = df['sentence2'].astype(str)
        tokenized_inputs = tokenizer(df['sentence1'].tolist(), df['sentence2'].tolist(), padding=True, truncation=True, max_length=512, return_tensors="pt")
    else:
        raise ValueError("Dataset type not recognized.")
    return tokenized_inputs


In [4]:
def predict_dataset(df, dataset_type, batch_size=32):
    probabilities = []
    num_batches = (len(df) + batch_size - 1) // batch_size
    
    model.eval()
    
    for i in tqdm(range(num_batches), desc=f"Predicting {dataset_type}"):
        batch = df.iloc[i * batch_size:(i + 1) * batch_size]
        tokenized_inputs = preprocess_data(batch, dataset_type)
        tokenized_inputs = {key: value.to(device) for key, value in tokenized_inputs.items()}
        
        with torch.no_grad():
            outputs = model(**tokenized_inputs)
            probs = torch.softmax(outputs.logits, dim=1).cpu().numpy()
            probabilities.extend(probs)
        
        torch.cuda.empty_cache()
    
    # Convert the probabilities to a DataFrame
    probs_df = pd.DataFrame(probabilities, columns=['Entailment', 'Neutral', 'Contradiction'])
    
    # Save the DataFrame to a CSV file
    probs_df.to_csv(f'/kaggle/working/deberta_{dataset_type}_predictions.csv', index=False)
    
    return probabilities


In [5]:
# Make predictions and save to CSV
predict_dataset(df_anli_r1, 'anli_r1', batch_size=32)
predict_dataset(df_anli_r2, 'anli_r2', batch_size=32)
predict_dataset(df_anli_r3, 'anli_r3', batch_size=32)
predict_dataset(df_mnli_matched, 'mnli_matched', batch_size=32)
predict_dataset(df_mnli_mismatched, 'mnli_mismatched', batch_size=32)
predict_dataset(df_snli, 'snli', batch_size=32)

Predicting anli_r1:   0%|          | 0/32 [00:00<?, ?it/s]

Predicting anli_r2:   0%|          | 0/32 [00:00<?, ?it/s]

Predicting anli_r3:   0%|          | 0/38 [00:00<?, ?it/s]

Predicting mnli_matched:   0%|          | 0/307 [00:00<?, ?it/s]

Predicting mnli_mismatched:   0%|          | 0/308 [00:00<?, ?it/s]

Predicting snli:   0%|          | 0/313 [00:00<?, ?it/s]

[array([0.03476679, 0.96259177, 0.00264141], dtype=float32),
 array([0.00192081, 0.3190319 , 0.6790473 ], dtype=float32),
 array([9.9878269e-01, 7.6433434e-04, 4.5297167e-04], dtype=float32),
 array([0.00100082, 0.9977081 , 0.00129105], dtype=float32),
 array([0.00107996, 0.301363  , 0.6975571 ], dtype=float32),
 array([0.21360792, 0.78555655, 0.00083554], dtype=float32),
 array([5.0442386e-04, 4.3001701e-03, 9.9519533e-01], dtype=float32),
 array([1.0185838e-02, 9.8922110e-01, 5.9302506e-04], dtype=float32),
 array([0.98957914, 0.00544187, 0.00497903], dtype=float32),
 array([1.1934051e-02, 9.8784643e-01, 2.1955071e-04], dtype=float32),
 array([2.2591709e-04, 1.1610643e-02, 9.8816341e-01], dtype=float32),
 array([9.9695230e-01, 2.8768673e-03, 1.7073669e-04], dtype=float32),
 array([9.0337906e-04, 9.9684697e-01, 2.2496032e-03], dtype=float32),
 array([3.2109572e-04, 2.6989866e-03, 9.9697989e-01], dtype=float32),
 array([9.9820161e-01, 1.6458008e-03, 1.5256841e-04], dtype=float32),
 arr