# Imports

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
from tqdm import tqdm
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


# Loading Models

In [None]:
qwen2_5 = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-Math-1.5B")

KeyboardInterrupt: 

In [None]:
qwen2_5_sft = AutoModelForCausalLM.from_pretrained("VerlTool/Qwen2.5-Math-1.5B-TIR-SFT", dtype=torch.bfloat16)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/700 [00:00<?, ?B/s]



model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.18G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

In [None]:
qwen2_5_rl = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-Math-1.5B-Instruct", dtype=torch.bfloat16)

config.json:   0%|          | 0.00/656 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

# Util Functions

In [None]:
def compute_subnetwork_mask(sft_model, rl_model, tolerance):
    rl_state_dict = rl_model.state_dict()
    sft_state_dict = sft_model.state_dict()

    delta_mask = {}

    with torch.no_grad():
        for name, _ in tqdm(sft_model.named_parameters(), desc="Computing deltas"):
            try:
                delta = rl_state_dict[name] - sft_state_dict[name]
                delta_mask[name] = delta > tolerance
            except Exception as e:
                print(f"Error in {name}: {e}")
    return delta_mask

In [None]:
def create_random_mask(masks, seed=42):
    """Creates masks for each layer with the same number of True values as in the
    original mask.
    """
    np.random.seed(seed)

    out = {}

    for layer in masks:
        k = int(torch.prod(torch.tensor(masks[layer].shape)).item())
        num_nonzero = torch.count_nonzero(masks[layer]).item()
        mask = np.full(k, False)
        indices = np.random.choice(k, num_nonzero, replace=False)
        mask[indices] = True
        mask = torch.from_numpy(mask)
        out[layer] = torch.reshape(mask, masks[layer].shape)

    return out

In [None]:
subnetwork_mask = compute_subnetwork_mask(qwen2_5_sft, qwen2_5_rl, 1e-2)

Computing deltas: 338it [00:01, 249.31it/s]


In [None]:
rand_mask = create_random_mask(subnetwork_mask)

# Models for Ablation Study (Zero Mask)

In [None]:
def zero_mask(masks, output_dir, invert=False):
    """Zero out the weights of the RL finetuned model based on the per-layer masks.

    Saves the resulting model to output_dir.
    """
    model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-Math-1.5B-Instruct", dtype=torch.bfloat16)

    with torch.no_grad():
        for name, param in tqdm(model.named_parameters(), desc='masking'):
            if invert:
                param.data[~masks[name]] = 0
            else:
                param.data[masks[name]] = 0

    model.save_pretrained(output_dir)

In [None]:
zero_mask(subnetwork_mask, 'zero_subnetwork')

masking: 338it [00:03, 103.98it/s]


In [None]:
zero_mask(subnetwork_mask, 'zero_non_subnetwork', True)

masking: 338it [00:04, 71.21it/s] 


In [None]:
zero_mask(rand_mask, 'random_subnetwork')

: 

# Models for Ablation Study (Base Model Mask)

In [None]:
def base_mask(base_state_dict, masks, output_dir, invert=False):
    """Mask out the weights of the RL finetuned model based on the per-layer masks with the base model weights.

    Saves the resulting model to output_dir.
    """
    model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-Math-1.5B-Instruct", dtype=torch.bfloat16)

    with torch.no_grad():
        for name, param in tqdm(model.named_parameters(), desc='masking'):
            if invert:
                param.data = torch.where(~masks[name], base_state_dict[name], param.data)
            else:
                param.data = torch.where(masks[name], base_state_dict[name], param.data)

    model.save_pretrained(output_dir)

In [None]:
del qwen2_5_sft
del qwen2_5_rl

In [None]:
qwen2_5 = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-Math-1.5B")

config.json:   0%|          | 0.00/676 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

In [None]:
base_weights = qwen2_5.state_dict()

In [None]:
base_mask(base_weights, subnetwork_mask, 'base_subnetwork')

masking: 338it [00:04, 77.16it/s] 


In [None]:
base_mask(base_weights, subnetwork_mask, 'base_non_subnetwork')

masking: 338it [00:03, 104.17it/s]


In [None]:
base_mask(base_weights, rand_mask, 'base_random_subnetwork_seed42')

masking: 338it [00:03, 95.68it/s] 


# Eval

In [2]:
models = [
    'Qwen/Qwen2.5-Math-1.5B',
    'base_subnetwork',
    'base_non_subnetwork',
    'base_random_subnetwork_seed42',
    'Qwen/Qwen2.5-Math-1.5B-Instruct'
    ]

In [3]:
prompts = [
    '$4x + 5 = 6x + 7$. $x$ =',
    '$x + 2y = 1, 3x +2y + 4z = 7, -2x + y - 2z = -1$. $(x,y,z) = ',
    '\int (w^{\frac{1}{3}} + 10 w^\frac{3}{5})dw = '
]

  '\int (w^{\frac{1}{3}} + 10 w^\frac{3}{5})dw = '


In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [26]:
def get_responses(models, prompt):
  responses = {}

  for name in tqdm(models):
    responses[name] = []
    model = AutoModelForCausalLM.from_pretrained(name).to(device)
    model.eval()

    with torch.no_grad():
      print('prompting')
      if name == 'Qwen/Qwen2.5-Math-1.5B':
        tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen2.5-Math-1.5B')
      else:
        tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen2.5-Math-1.5B-Instruct')

      messages = [
          {"role": "user", "content": prompt}
      ]
      inputs = tokenizer.apply_chat_template(
          messages,
          add_generation_prompt=True,
          tokenize=True,
          return_dict=True,
          return_tensors="pt",
        ).to(model.device)
      outputs = model.generate(**inputs, max_new_tokens=4096)
      responses[name].append(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))
  return responses

In [27]:
responses = {}

In [28]:
responses[prompts[0]] = get_responses(models, prompts[0])

  0%|          | 0/5 [00:00<?, ?it/s]

prompting


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
This is a friendly reminder - the current text generation call has exceeded the model's predefined maximum length (4096). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  9.07it/s]


prompting


Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  7.69it/s]


prompting


Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  7.70it/s]


prompting


 80%|████████  | 4/5 [1:37:12<20:04, 1204.18s/it]  

prompting


100%|██████████| 5/5 [1:39:22<00:00, 1192.45s/it]


In [29]:
responses[prompts[1]] = get_responses(models, prompts[1])

  0%|          | 0/5 [00:00<?, ?it/s]

prompting


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  8.28it/s]


prompting


Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  7.44it/s]


prompting


Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  9.29it/s]


prompting


 80%|████████  | 4/5 [2:07:25<31:48, 1908.77s/it]  

prompting


100%|██████████| 5/5 [2:39:15<00:00, 1911.20s/it]


In [30]:
responses[prompts[2]] = get_responses(models, prompts[2])

  0%|          | 0/5 [00:00<?, ?it/s]

prompting


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 10.31it/s]


prompting


Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  8.01it/s]


prompting


Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 10.79it/s]


prompting


 80%|████████  | 4/5 [2:04:03<31:01, 1861.41s/it]  

prompting


100%|██████████| 5/5 [2:07:55<00:00, 1535.12s/it]


In [31]:
import json

fp = 'responses_simpler_base_mask_maxlength4096.json'

with open(fp, 'w', encoding='utf-8') as json_file:
    json.dump(responses, json_file, indent=4, ensure_ascii=False)