In [1]:
import os
import random
import numpy as np
import pandas as pd
import torch

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
)

# Reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)


2025-11-18 05:31:52.555963: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763443912.726298      48 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763443912.774347      48 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

Using device: cuda


In [2]:
TRAIN_FILE = "/kaggle/input/plausibilitytraindata/train_data.json"

df = pd.read_json(TRAIN_FILE)

print("Shape:", df.shape)
print("Columns:", df.columns.tolist())
df.head()


Shape: (960, 4)
Columns: ['id', 'syllogism', 'validity', 'plausibility']


Unnamed: 0,id,syllogism,validity,plausibility
0,50146f21-d265-4e3a-8d93-8165cdbe89a3,All cars are a type of vehicle. No animal is a...,False,True
1,dfafb4f6-4e1d-4cd5-aeb4-75d36aafdf1a,Nothing that is a soda is a juice. A portion o...,True,True
2,e30b1f83-a4c3-49cb-8aaf-5f64208c625b,Everything that is a planet is a celestial bod...,False,False
3,a30e07d5-0fb3-4097-9892-4b145b0c54f5,Every cat is an invisible creature. A number o...,True,False
4,5b8161b7-b1bf-4e16-a854-cd52cdce8a1b,There are no capital cities which are oceans. ...,True,True


In [3]:
def label_to_int(x):
    if isinstance(x, bool):
        return int(x)
    if isinstance(x, str):
        return 1 if x.lower() in ["true", "1", "yes"] else 0
    return int(x)

df["label"] = df["plausibility"].apply(label_to_int)

# Keep only useful columns
df = df[["id", "syllogism", "label"]]

print(df["label"].value_counts())
df.head()


label
0    486
1    474
Name: count, dtype: int64


Unnamed: 0,id,syllogism,label
0,50146f21-d265-4e3a-8d93-8165cdbe89a3,All cars are a type of vehicle. No animal is a...,1
1,dfafb4f6-4e1d-4cd5-aeb4-75d36aafdf1a,Nothing that is a soda is a juice. A portion o...,1
2,e30b1f83-a4c3-49cb-8aaf-5f64208c625b,Everything that is a planet is a celestial bod...,0
3,a30e07d5-0fb3-4097-9892-4b145b0c54f5,Every cat is an invisible creature. A number o...,0
4,5b8161b7-b1bf-4e16-a854-cd52cdce8a1b,There are no capital cities which are oceans. ...,1


In [4]:
train_df, val_df = train_test_split(
    df,
    test_size=0.15,
    random_state=SEED,
    stratify=df["label"]
)

print("Train size:", len(train_df))
print("Validation size:", len(val_df))
print(train_df["label"].value_counts(normalize=True))
print(val_df["label"].value_counts(normalize=True))

# Convert to HuggingFace Datasets
train_dataset = Dataset.from_pandas(train_df.reset_index(drop=True))
val_dataset   = Dataset.from_pandas(val_df.reset_index(drop=True))


Train size: 816
Validation size: 144
label
0    0.506127
1    0.493873
Name: proportion, dtype: float64
label
0    0.506944
1    0.493056
Name: proportion, dtype: float64


In [5]:
MODEL_NAME = "microsoft/mdeberta-v3-base"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize_function(batch):
    return tokenizer(
        batch["syllogism"],
        truncation=True,
        max_length=512,  
    )

# Apply tokenization
train_tokenized = train_dataset.map(tokenize_function, batched=True)
val_tokenized   = val_dataset.map(tokenize_function, batched=True)

# Dynamic padding for batches
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

print("Tokenization complete.")
train_tokenized[0]


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]



Map:   0%|          | 0/816 [00:00<?, ? examples/s]

Map:   0%|          | 0/144 [00:00<?, ? examples/s]

Tokenization complete.


{'id': '1e6eb831-90f8-44bc-9af7-cf906db517a2',
 'syllogism': 'Every single shark is an aquatic creature. A number of sharks are classified as fish. Consequently, it is the case that some fish are aquatic creatures.',
 'label': 1,
 'input_ids': [1,
  39580,
  6676,
  260,
  129205,
  340,
  462,
  65478,
  6863,
  318,
  180345,
  261,
  299,
  4404,
  305,
  260,
  129205,
  264,
  419,
  151440,
  528,
  42092,
  261,
  372,
  177793,
  485,
  262,
  610,
  340,
  288,
  4073,
  534,
  2156,
  42092,
  419,
  65478,
  6863,
  34931,
  19178,
  261,
  2],
 'token_type_ids': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 'attention_mask': [1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
 

In [6]:
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=2,     # plausible (1) / implausible (0)
).to(device)

model.config.id2label = {0: "implausible", 1: "plausible"}
model.config.label2id = {"implausible": 0, "plausible": 1}

print("Model loaded on:", device)


pytorch_model.bin:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded on: cuda


In [7]:
training_args = TrainingArguments(
    output_dir="./plausibility_deberta_model",
    num_train_epochs=7,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=2e-5,
    weight_decay=0.01,
    warmup_ratio=0.1,
    logging_steps=20,

    save_steps=200,
    eval_steps=200,
    do_eval=True,

    logging_dir="./logs",
    report_to="none"
)

print("TrainingArguments loaded successfully.")


TrainingArguments loaded successfully.


In [8]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)

    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="macro")

    return {"accuracy": acc, "f1": f1}


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=val_tokenized,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

print("Trainer created successfully.")


Trainer created successfully.


  trainer = Trainer(


In [9]:
train_output = trainer.train()

print("\n\n=== Training Completed Successfully ===")

Step,Training Loss
20,0.7007
40,0.7148
60,0.707
80,0.6936
100,0.7074
120,0.6975
140,0.6522
160,0.6632
180,0.6621
200,0.6462




=== Training Completed Successfully ===


In [10]:
eval_metrics = trainer.evaluate(val_tokenized)
print("Validation Metrics:", eval_metrics)

# For a clearer breakdown
val_text = val_df["syllogism"].tolist()
val_true = val_df["label"].tolist()

# Get predictions
preds_logits = trainer.predict(val_tokenized).predictions
preds = preds_logits.argmax(axis=1)

acc = accuracy_score(val_true, preds)
f1 = f1_score(val_true, preds, average="macro")

print(f"\nFinal Plausibility Accuracy: {acc:.4f}")
print(f"Final Plausibility F1 (macro): {f1:.4f}")


Validation Metrics: {'eval_loss': 0.8640713095664978, 'eval_accuracy': 0.7777777777777778, 'eval_f1': 0.7776061776061776, 'eval_runtime': 0.5369, 'eval_samples_per_second': 268.183, 'eval_steps_per_second': 33.523, 'epoch': 7.0}

Final Plausibility Accuracy: 0.7778
Final Plausibility F1 (macro): 0.7776


In [11]:
SAVE_DIR = "./plausibility_deberta_final"

model.save_pretrained(SAVE_DIR)

tokenizer.save_pretrained(SAVE_DIR)

print("Model saved to:", SAVE_DIR)
print("Files:", os.listdir(SAVE_DIR))

Model saved to: ./plausibility_deberta_final
Files: ['tokenizer_config.json', 'added_tokens.json', 'config.json', 'spm.model', 'tokenizer.json', 'special_tokens_map.json', 'model.safetensors']
