In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B-Base")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-0.6B-Base")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
from datasets import load_dataset

train_dataset = load_dataset('csv', data_files = '/content/drive/MyDrive/contradictory-my-dear-watson/data/train.csv')
test_dataset = load_dataset('csv', data_files = '/content/drive/MyDrive/contradictory-my-dear-watson/data/test.csv')

In [5]:
train_dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'premise', 'hypothesis', 'lang_abv', 'language', 'label'],
        num_rows: 12120
    })
})

In [6]:
print(train_dataset['train'][0])

{'id': '5130fd2cb5', 'premise': 'and these comments were considered in formulating the interim rules.', 'hypothesis': 'The rules developed in the interim were put together with these comments in mind.', 'lang_abv': 'en', 'language': 'English', 'label': 0}


In [7]:
test_dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'premise', 'hypothesis', 'lang_abv', 'language'],
        num_rows: 5195
    })
})

In [8]:
print(test_dataset['train'][0])

{'id': 'c6d58c3f69', 'premise': 'بکس، کیسی، راہیل، یسعیاہ، کیلی، کیلی، اور کولمبین ہائی اسکول کے دوسرے طلبا کے نام سے بکسوں کو نشان زد کیا جائے گا جس نے اس سال پہلے اپنی زندگی کھو دی', 'hypothesis': 'کیسی کے لئے کوئی یادگار نہیں ہوگا, کولمین ہائی اسکول کے طالب علموں میں سے ایک جو مر گیا.', 'lang_abv': 'ur', 'language': 'Urdu'}


In [9]:
# split the training dataset into train/validation
split_dataset = train_dataset['train'].train_test_split(
    test_size=0.1,
    seed=42,
    shuffle=True
)

train_ds_orig = split_dataset['train']
val_ds_orig = split_dataset['test']
test_ds_orig = test_dataset['train']

In [10]:
print(f"Training dataset length: {len(train_ds_orig)}")
print(f"Validation dataset length: {len(val_ds_orig)}")
print(f"Test dataset length: {len(test_ds_orig)}")

Training dataset length: 10908
Validation dataset length: 1212
Test dataset length: 5195


In [11]:
icl_rows = train_ds_orig[10:20]
print(icl_rows)

{'id': ['dad3652214', '2c7a23e67d', '3d18b90d52', 'aee859bf43', '5459a3f905', '6ced0f8989', '07acbc212e', '2cc2f555de', '3cb6c5e9a5', 'b2550cab04'], 'premise': ['There would be little benefit to national saving from allowing early access to mandatory accounts with set contribution levels-which has been proposed for Social Security (see Q4.', 'เรายังไม่ได้สัมภาษณ์บุคคลที่มีความรู้ความสามารถครบทุกคนหรือยังไม่ได้เห็นรายงานที่เกี่ยวข้องทั้งหมดเลย', 'The most important directions are simply up and up leads eventually to the cathedral and fortress commanding the hilltop, and down inevitably leads to one of three gates through the wall to the new town.', 'Jon walked back to the town to the smithy.', 'في الوقت الحالي تم فتح ممر في التصنيف للرجال ومن هذا الممر أتت السيدة بيشوب تليها المرأة التي أسلافها من الزنوج.', 'Ωωωω, είναι υπέροχη, είναι ξέρετε, είναι ένας χαρακτήρας που θα καθίσει με οποιονδήποτε, θα παίξει με οποιονδήποτε', "yeah it's just a matter of education i think", 'तो इससे कोई फर्

In [12]:
tokenizer.encode("yes")

[9693]

In [13]:
tokenizer.encode("no")

[2152]

In [14]:
tokenizer.encode("maybe")

[36760]

In [15]:
tokenizer

Qwen2TokenizerFast(name_or_path='Qwen/Qwen3-0.6B-Base', vocab_size=151643, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'eos_token': '<|endoftext|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<|im_start|>', '<|im_end|>', '<|object_ref_start|>', '<|object_ref_end|>', '<|box_start|>', '<|box_end|>', '<|quad_start|>', '<|quad_end|>', '<|vision_start|>', '<|vision_end|>', '<|vision_pad|>', '<|image_pad|>', '<|video_pad|>']}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	151643: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151644: AddedToken("<|im_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151645: AddedToken("<|im_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151646: AddedToken("<|object_ref_start|>", rstrip=False, lstrip=False, single_word=False, no

In [16]:
index = 9

premise = icl_rows['premise'][index]
hypothesis = icl_rows['hypothesis'][index]
label = icl_rows['label'][index]

# 0 for entailment, 1 for neutral, 2 for contradiction
label_to_text_map = {
    0: "yes",
    1: "maybe",
    2: "no"
}

label_text = label_to_text_map[label]


In [17]:
# create an icl prompt using the premis and hypthoseis
template = "Consider the following premise: {premise}.\nDoes the premise entail the following hypothesis: {hypothesis}?\nPlease answer with: 'yes', 'no', or 'maybe'.\n{label_text}"

In [18]:
example = template.format(premise = premise, hypothesis = hypothesis, label_text = label_text)
print(example)

Consider the following premise: To control land and sea routes to the south, the Mauryas still needed to conquer the eastern kingdom of Kalinga (modern Orissa)..
Does the premise entail the following hypothesis: The Mauryas had a large army capable of conquering Kalinga.?
Please answer with: 'yes', 'no', or 'maybe'.
maybe


In [19]:
# create the icl prompt
prompt = ""

for index in range(10):
  premise = icl_rows['premise'][index]
  hypothesis = icl_rows['hypothesis'][index]
  label = icl_rows['label'][index]

  # 0 for entailment, 1 for neutral, 2 for contradiction
  label_to_text_map = {
      0: "yes",
      1: "maybe",
      2: "no"
  }

  label_text = label_to_text_map[label]

  example = template.format(premise = premise, hypothesis = hypothesis, label_text = label_text)
  prompt = prompt + example + "\n\n"

print(prompt)

Consider the following premise: There would be little benefit to national saving from allowing early access to mandatory accounts with set contribution levels-which has been proposed for Social Security (see Q4..
Does the premise entail the following hypothesis: There would be little benefit to national saving?
Please answer with: 'yes', 'no', or 'maybe'.
yes

Consider the following premise: เรายังไม่ได้สัมภาษณ์บุคคลที่มีความรู้ความสามารถครบทุกคนหรือยังไม่ได้เห็นรายงานที่เกี่ยวข้องทั้งหมดเลย.
Does the premise entail the following hypothesis: ยังไม่ได้รับข้อมูลจากทุกคนที่รู้?
Please answer with: 'yes', 'no', or 'maybe'.
yes

Consider the following premise: The most important directions are simply up and up leads eventually to the cathedral and fortress commanding the hilltop, and down inevitably leads to one of three gates through the wall to the new town..
Does the premise entail the following hypothesis: Go downwards to one of the gates, all of which will lead you into the cathedral.?

In [20]:
device = torch.device('cuda')
model.to(device)

Qwen3ForCausalLM(
  (model): Qwen3Model(
    (embed_tokens): Embedding(151936, 1024)
    (layers): ModuleList(
      (0-27): 28 x Qwen3DecoderLayer(
        (self_attn): Qwen3Attention(
          (q_proj): Linear(in_features=1024, out_features=2048, bias=False)
          (k_proj): Linear(in_features=1024, out_features=1024, bias=False)
          (v_proj): Linear(in_features=1024, out_features=1024, bias=False)
          (o_proj): Linear(in_features=2048, out_features=1024, bias=False)
          (q_norm): Qwen3RMSNorm((128,), eps=1e-06)
          (k_norm): Qwen3RMSNorm((128,), eps=1e-06)
        )
        (mlp): Qwen3MLP(
          (gate_proj): Linear(in_features=1024, out_features=3072, bias=False)
          (up_proj): Linear(in_features=1024, out_features=3072, bias=False)
          (down_proj): Linear(in_features=3072, out_features=1024, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): Qwen3RMSNorm((1024,), eps=1e-06)
        (post_attention_layer

In [21]:
validation_index = 0
validation_template = "Consider the following premise: {premise}.\nDoes the premise entail the following hypothesis: {hypothesis}?\nPlease answer with: 'yes', 'no', or 'maybe'.\n"

row = val_ds_orig[validation_index]
premise = row['premise']
hypothesis = row['hypothesis']

validation_text = validation_template.format(premise = premise, hypothesis = hypothesis)
print(validation_text)

full_prompt = prompt + validation_text
print(full_prompt)

inputs = tokenizer(full_prompt, return_tensors="pt")
print(inputs)
print(inputs['input_ids'].shape)


inputs = {k: v.to(device) for k,v in inputs.items()}

out = model(**inputs)
print(out.logits.shape)
logits = out.logits


"""
"yes
[9693]

[13]
0s
tokenizer.encode("no")
[2152]

[14]
0s
tokenizer.encode("maybe")
[36760]

  label_to_text_map = {
      0: "yes",
      1: "maybe",
      2: "no"
  }

"""
last_token_logits = logits[:, -1, [9693, 36760, 2152]]
print(last_token_logits.shape)

_, labels = torch.max(last_token_logits, dim=1)
print(labels.shape)

Consider the following premise: Μεταξύ του νησιού και της ηπειρωτικής χώρας είναι η Laguna Nichupte, μια τεράστια λιμνοθάλασσα με θαλασσινό νερό, οριοθετημένη από βάλτους με μανγκρόβια, που αποτελούν καταφύγιο για πολλά είδη άγριας ζωής..
Does the premise entail the following hypothesis: Η λίμνη Nichupte είναι 40 στρέμματα με νερό.?
Please answer with: 'yes', 'no', or 'maybe'.

Consider the following premise: There would be little benefit to national saving from allowing early access to mandatory accounts with set contribution levels-which has been proposed for Social Security (see Q4..
Does the premise entail the following hypothesis: There would be little benefit to national saving?
Please answer with: 'yes', 'no', or 'maybe'.
yes

Consider the following premise: เรายังไม่ได้สัมภาษณ์บุคคลที่มีความรู้ความสามารถครบทุกคนหรือยังไม่ได้เห็นรายงานที่เกี่ยวข้องทั้งหมดเลย.
Does the premise entail the following hypothesis: ยังไม่ได้รับข้อมูลจากทุกคนที่รู้?
Please answer with: 'yes', 'no', or '

In [38]:
from torch.utils.data import DataLoader

val_loader = DataLoader(val_ds_orig, batch_size=4)

In [39]:
torch.cuda.empty_cache()

In [40]:
inputs = tokenizer(prompt)
print(inputs)
orig_len = len(inputs['input_ids'])
print(orig_len)

{'input_ids': [37175, 279, 2701, 40202, 25, 2619, 1035, 387, 2632, 8760, 311, 5313, 13997, 504, 10693, 4124, 2615, 311, 23042, 9618, 448, 738, 18527, 5866, 12, 8206, 702, 1012, 10981, 369, 9784, 8234, 320, 4060, 1207, 19, 33947, 21468, 279, 40202, 85992, 279, 2701, 30078, 25, 2619, 1035, 387, 2632, 8760, 311, 5313, 13997, 5267, 5501, 4226, 448, 25, 364, 9693, 516, 364, 2152, 516, 476, 364, 36760, 23569, 9693, 271, 37175, 279, 2701, 40202, 25, 94482, 124878, 123874, 86032, 18625, 127196, 19841, 35648, 23271, 26283, 139344, 86348, 55770, 36142, 47642, 40327, 124358, 35884, 47171, 26283, 28319, 123971, 22287, 123959, 139084, 123886, 126829, 47642, 25200, 124256, 124618, 83581, 34509, 123874, 86032, 18625, 127196, 19841, 124272, 123909, 133643, 35884, 47171, 123949, 47171, 125293, 47839, 124010, 35884, 23271, 19841, 30434, 126686, 124776, 624, 21468, 279, 40202, 85992, 279, 2701, 30078, 25, 220, 34509, 123874, 86032, 18625, 127196, 19841, 22287, 83546, 47839, 80614, 91200, 124205, 35884, 4

In [41]:
from tqdm import tqdm

tokenizer.padding_side = 'left'
acc = 0

total_num_correct = 0
total_samples = 0

model.eval()


for batch in tqdm(val_loader):
  prompts = []

  for i in range(len(batch['label'])):
    premise = batch['premise'][i]
    hypothesis = batch['hypothesis'][i]

    validation_text = validation_template.format(premise = premise, hypothesis = hypothesis)
    full_prompt = prompt + validation_text

    prompts.append(full_prompt)


  labels = torch.tensor(batch['label']).to(device)

  # create inputs
  inputs = tokenizer(prompts, return_tensors="pt", padding=True)
  inputs = {k: v.to(device) for k,v in inputs.items()}

  # generations_list = []
  # for i in range(len(generations)):

  #   orig_len = len(inputs['input_ids'][0])
  #   tensor = generations[i].cpu().tolist()[orig_len: ]
  #   decoded_text = tokenizer.decode(tensor)
  #   generations_list.append((tensor, decoded_text))

  # print(len(generations_list))
  # for tensor, decoded_text in generations_list:
  #   print(tensor)
  #   print(decoded_text)
  #   print("---------------")

  # break

  with torch.no_grad():
    out = model(**inputs)
    logits = out.logits

    last_token_logits = logits[:, -1, [9693, 36760, 2152]]
    _, preds = torch.max(last_token_logits, dim=1)

  # print(f"len(prompts): {len(prompts)}")
  # print(last_token_logits.shape)

  # print(preds.shape)
  # print(labels.shape)

  num_correct = torch.sum((preds == labels).to(torch.int)).item()

  total_num_correct += num_correct
  total_samples += len(preds)


print(f"Final accuracy: {total_num_correct}/{total_samples} - {total_num_correct / total_samples}")


  labels = torch.tensor(batch['label']).to(device)
100%|██████████| 303/303 [01:49<00:00,  2.77it/s]

Final accuracy: 669/1212 - 0.551980198019802





In [42]:
# loop through the test dataset

# collect the answers

# write submission file