In [1]:
# Cell 1 — environment check and imports
# Run this to ensure required libs are available and to import everything we'll use.
import sys, math, os
import torch
print("Python:", sys.version.splitlines()[0])
print("Torch:", getattr(torch, "__version__", "n/a"), "CUDA:", torch.cuda.is_available())

from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding
from torch.utils.data import DataLoader


Python: 3.10.19 | packaged by conda-forge | (main, Oct 22 2025, 22:29:10) [GCC 14.3.0]
Torch: 2.9.0+cu128 CUDA: True


In [2]:
# Cell 2 — load SNLI and inspect raw examples
snli = load_dataset("snli")
print("Splits:", snli.keys())
print("Sizes: train", len(snli["train"]), "validation", len(snli["validation"]), "test", len(snli["test"]))

# show first 3 raw examples (these are plain python dicts)
for i in range(3):
    ex = snli["train"][i]
    print(f"\nExample {i}:")
    print(" Premise:", ex["premise"])
    print(" Hypothesis:", ex["hypothesis"])
    print(" Label:", ex["label"], " (0=entailment, 1=neutral, 2=contradiction; -1 may mean missing)")


README.md: 0.00B [00:00, ?B/s]

plain_text/test-00000-of-00001.parquet:   0%|          | 0.00/412k [00:00<?, ?B/s]

plain_text/validation-00000-of-00001.par(…):   0%|          | 0.00/413k [00:00<?, ?B/s]

plain_text/train-00000-of-00001.parquet:   0%|          | 0.00/19.6M [00:00<?, ?B/s]

Generating test split:   0%|          | 0/10000 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10000 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/550152 [00:00<?, ? examples/s]

Splits: dict_keys(['test', 'validation', 'train'])
Sizes: train 550152 validation 10000 test 10000

Example 0:
 Premise: A person on a horse jumps over a broken down airplane.
 Hypothesis: A person is training his horse for a competition.
 Label: 1  (0=entailment, 1=neutral, 2=contradiction; -1 may mean missing)

Example 1:
 Premise: A person on a horse jumps over a broken down airplane.
 Hypothesis: A person is at a diner, ordering an omelette.
 Label: 2  (0=entailment, 1=neutral, 2=contradiction; -1 may mean missing)

Example 2:
 Premise: A person on a horse jumps over a broken down airplane.
 Hypothesis: A person is outdoors, on a horse.
 Label: 0  (0=entailment, 1=neutral, 2=contradiction; -1 may mean missing)


In [3]:
# Cell 3 — filter invalid labels and look at label distribution
snli = snli.filter(lambda ex: ex["label"] is not None and ex["label"] >= 0)
from collections import Counter
def label_counts(split):
    return Counter([ex["label"] for ex in snli[split]])
print("Label counts (train):", label_counts("train"))
print("Label counts (validation):", label_counts("validation"))
print("Label counts (test):", label_counts("test"))

Filter:   0%|          | 0/10000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/10000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/550152 [00:00<?, ? examples/s]

Label counts (train): Counter({0: 183416, 2: 183187, 1: 182764})
Label counts (validation): Counter({0: 3329, 2: 3278, 1: 3235})
Label counts (test): Counter({0: 3368, 2: 3237, 1: 3219})
