In [2]:
import torch
import torch.nn as nn

# Example model
model = nn.Linear(256, 128)  # Linear layer: input size 256, output size 128

# Iterate through the parameters of the model
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"Parameter name: {name}")
        print(f"Shape of {name}: {param.data.size()[0]}")
        print(f"Requires gradient: {param.requires_grad}")


Parameter name: weight
Shape of weight: 128
Requires gradient: True
Parameter name: bias
Shape of bias: 128
Requires gradient: True


In [3]:
arr=[1,2,3]
arr[:3]

[1, 2, 3]

In [4]:
def get_mask(rank: torch.Size, k: int):
    # Create a mask based on the rank and the value of k
    mask = torch.zeros((rank, rank))
    if rank >= k:
        mask[:k, :k] = 1
    else:
        mask[:, :] = 1
    return mask

get_mask(5,2)

tensor([[1., 1., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])

In [6]:
import random
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import Trainer, TrainerCallback, TrainingArguments

# Define a simple model (a basic neural network)
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(10, 50)
        self.fc2 = nn.Linear(50, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define a simple dataset (for demonstration)
class SimpleDataset(Dataset):
    def __init__(self, num_samples=100):
        self.data = torch.randn(num_samples, 10)  # 10 features
        self.labels = torch.randn(num_samples, 1)  # 1 target value

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return {"input_ids": self.data[idx], "labels": self.labels[idx]}

# Define the get_mask function
def get_mask(rank: torch.Size, k: int):
    mask = torch.zeros((rank, rank))
    if rank >= k:
        mask[:k, :k] = 1
    else:
        mask[:, :] = 1
    return mask

# MaskingCallback implementation
class MaskingCallback(TrainerCallback):
    def __init__(self, k_min=10, k_max=25):
        self.k_min = k_min
        self.k_max = k_max
        self.cur_k = k_max

    def on_step_begin(self, args, state, control, model, **kwargs):
        k = random.randint(self.k_min, self.k_max)
        print(f"MaskingCallback triggered: Selected k = {k}")

        for name, param in model.named_parameters():
            if param.requires_grad:
                rank = param.data.size()[0]
                mask = get_mask(rank, k)
                print(f"Applying mask to parameter: {name}, with shape: {param.shape}")
                param.data = param.data * mask.to(param.data.device)

        print(f"Masking applied with k = {k} to model parameters.")
        return control

# Instantiate the model and dataset
model = SimpleModel()
train_dataset = SimpleDataset()
eval_dataset = SimpleDataset()

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",  # output directory
    num_train_epochs=3,  # number of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=8,  # batch size per device during evaluation
    logging_dir='./logs',  # directory for storing logs
    logging_steps=10,
)

# Create the Trainer object
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    callbacks=[MaskingCallback(k_min=5, k_max=10)]  # Use your custom MaskingCallback
)

# Start the training
trainer.train()




MaskingCallback triggered: Selected k = 10
Applying mask to parameter: fc1.weight, with shape: torch.Size([50, 10])


RuntimeError: The size of tensor a (10) must match the size of tensor b (50) at non-singleton dimension 1

wandb: Network error (ConnectionError), entering retry loop.
