In [1]:
import torch
import pandas as pd

from pathlib import Path
from transformers import AutoTokenizer, AutoModelForCausalLM

### Tokenizer and Model

In [2]:
model_id = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit"

In [3]:
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [4]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

In [91]:
model.config

LlamaConfig {
  "_attn_implementation_autoset": true,
  "_name_or_path": "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128009,
  "head_dim": 64,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 16,
  "num_key_value_heads": 8,
  "pad_token_id": 128004,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bnb_4bit_compute_dtype": "bfloat16",
    "bnb_4bit_quant_storage": "uint8",
    "bnb_4bit_quant_type": "nf4",
    "bnb_4bit_use_double_quant": true,
    "llm_int8_enable_fp32_cpu_offload": false,
    "llm_int8_has_fp16_weight": false,
    "llm_int8_skip_modules": [
      "lm_head",
      "multi_modal_p

### Data

In [6]:
DATA_DIR = Path.cwd() / "am_reasoning" / "data_files"

In [7]:
df = pd.read_csv((DATA_DIR / "pe_dataset.csv"), index_col=0)

### Template Messages

In [73]:
sys_msg = """You are an expert in argumentation analysis. You will be given a complete argument. Your task is to analyze the argument holistically and classify it as exactly ONE of the following categories: Position, Claim, or Premise. 

First, think through your analysis step by step, considering the function and structure of the argument. Explain your reasoning, identifying key characteristics that point to a specific classification.

Then, provide your final classification as a single word: Position, Claim, or Premise."""

In [74]:
instruction = """### You are an expert in Argument Mining. You are given an argument. Your task is to classify the argument as either "MajorClaim", "Claim" or "Premise". You must return the argument component in following JSON format: {"component_type": [component_type (str)]}

DO NOT RETURN ANY EXPLANATION.

"""

In [75]:
usr_msg = df.iloc[0]['ac']

In [76]:
messages = [
    {"role": "system", "content": instruction},
    {"role": "user", "content": usr_msg},
]

In [77]:
input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt"
).to(model.device) # type: ignore

In [78]:
input_ids

tensor([[128000, 128006,   9125, 128007,    271,  38766,   1303,  33025,   2696,
             25,   6790,    220,   2366,     18,    198,  15724,   2696,     25,
            220,   1114,   2947,    220,   2366,     20,    271,  14711,   1472,
            527,    459,   6335,    304,  14138,  26917,     13,   1472,    527,
           2728,    459,   5811,     13,   4718,   3465,    374,    311,  49229,
            279,   5811,    439,   3060,    330,  35575,  46644,    498,    330,
          46644,      1,    477,    330,  42562,   1082,   3343,   1472,   2011,
            471,    279,   5811,   3777,    304,   2768,   4823,   3645,     25,
           5324,   8739,   1857,    794,    510,   8739,   1857,    320,    496,
           7400,    633,   5989,   4276,  31980,   4230,   4154,  95179,   3579,
             13, 128009, 128006,    882, 128007,    271,    906,   1288,  15866,
            810,  12939,    311,  23915,   2391,   6156,   6873, 128009, 128006,
          78191, 128007,    

In [79]:
print(tokenizer.decode(input_ids[0]))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 17 Mar 2025

### You are an expert in Argument Mining. You are given an argument. Your task is to classify the argument as either "MajorClaim", "Claim" or "Premise". You must return the argument component in following JSON format: {"component_type": [component_type (str)]}

DO NOT RETURN ANY EXPLANATION.<|eot_id|><|start_header_id|>user<|end_header_id|>

we should attach more importance to cooperation during primary education<|eot_id|><|start_header_id|>assistant<|end_header_id|>




In [80]:
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

In [81]:
outputs = model.generate(
    input_ids,
    max_new_tokens=512,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
)

In [82]:
response = outputs[0][input_ids.shape[-1]:]

In [83]:
print(tokenizer.decode(response, skip_special_tokens=True))

{"component_type": ["Claim"]}


In [92]:
tokenizer.backend_tokenizer

Tokenizer(version="1.0", truncation=None, padding=None, added_tokens=[{"id":128000, "content":"<|begin_of_text|>", "single_word":False, "lstrip":False, "rstrip":False, "normalized":False, "special":True}, {"id":128001, "content":"<|end_of_text|>", "single_word":False, "lstrip":False, "rstrip":False, "normalized":False, "special":True}, {"id":128002, "content":"<|reserved_special_token_0|>", "single_word":False, "lstrip":False, "rstrip":False, "normalized":False, "special":True}, {"id":128003, "content":"<|reserved_special_token_1|>", "single_word":False, "lstrip":False, "rstrip":False, "normalized":False, "special":True}, {"id":128004, "content":"<|finetune_right_pad_id|>", "single_word":False, "lstrip":False, "rstrip":False, "normalized":False, "special":True}, {"id":128005, "content":"<|reserved_special_token_2|>", "single_word":False, "lstrip":False, "rstrip":False, "normalized":False, "special":True}, {"id":128006, "content":"<|start_header_id|>", "single_word":False, "lstrip":Fals