In [1]:
# Libraries

import torch
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from datasets import Dataset, DatasetDict
from transformers import GPT2Tokenizer
from transformers import GPT2ForSequenceClassification, GPT2Config

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2ForSequenceClassification.from_pretrained("./model")
tokenizer.pad_token = tokenizer.eos_token

In [3]:
transformer_gpt = model.transformer
print(transformer_gpt)


GPT2Model(
  (wte): Embedding(50257, 768)
  (wpe): Embedding(1024, 768)
  (drop): Dropout(p=0.1, inplace=False)
  (h): ModuleList(
    (0-11): 12 x GPT2Block(
      (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (attn): GPT2SdpaAttention(
        (c_attn): Conv1D(nf=2304, nx=768)
        (c_proj): Conv1D(nf=768, nx=768)
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (resid_dropout): Dropout(p=0.1, inplace=False)
      )
      (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (mlp): GPT2MLP(
        (c_fc): Conv1D(nf=3072, nx=768)
        (c_proj): Conv1D(nf=768, nx=3072)
        (act): NewGELUActivation()
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)


In [4]:
for name, param in model.named_parameters():
    print(name)

transformer.wte.weight
transformer.wpe.weight
transformer.h.0.ln_1.weight
transformer.h.0.ln_1.bias
transformer.h.0.attn.c_attn.weight
transformer.h.0.attn.c_attn.bias
transformer.h.0.attn.c_proj.weight
transformer.h.0.attn.c_proj.bias
transformer.h.0.ln_2.weight
transformer.h.0.ln_2.bias
transformer.h.0.mlp.c_fc.weight
transformer.h.0.mlp.c_fc.bias
transformer.h.0.mlp.c_proj.weight
transformer.h.0.mlp.c_proj.bias
transformer.h.1.ln_1.weight
transformer.h.1.ln_1.bias
transformer.h.1.attn.c_attn.weight
transformer.h.1.attn.c_attn.bias
transformer.h.1.attn.c_proj.weight
transformer.h.1.attn.c_proj.bias
transformer.h.1.ln_2.weight
transformer.h.1.ln_2.bias
transformer.h.1.mlp.c_fc.weight
transformer.h.1.mlp.c_fc.bias
transformer.h.1.mlp.c_proj.weight
transformer.h.1.mlp.c_proj.bias
transformer.h.2.ln_1.weight
transformer.h.2.ln_1.bias
transformer.h.2.attn.c_attn.weight
transformer.h.2.attn.c_attn.bias
transformer.h.2.attn.c_proj.weight
transformer.h.2.attn.c_proj.bias
transformer.h.2.ln_2

In [9]:
import copy
model_copy = copy.deepcopy(model)
custom_mask_weight = torch.zeros((768,768))
custom_mask_bias = torch.zeros(768)
# print(custom_mask_weight)
# print(custom_mask_bias)
print()
for name, param in model_copy.named_parameters():
    param.requires_grad = False
    if name == 'transformer.h.10.attn.c_proj.weight':
        print(param.shape)
        param *= custom_mask_weight
    elif name == 'transformer.h.10.attn.c_proj.bias':
        print(param.shape)
        param *= custom_mask_bias

print()
for name, param in model_copy.named_parameters():
    if name == 'transformer.h.10.attn.c_proj.weight' or name == 'transformer.h.10.attn.c_proj.bias':
        print(f"{name}\n{param}")




torch.Size([768, 768])
torch.Size([768])

transformer.h.10.attn.c_proj.weight
Parameter containing:
tensor([[-0., -0., 0.,  ..., -0., -0., 0.],
        [-0., 0., -0.,  ..., 0., 0., -0.],
        [-0., 0., 0.,  ..., -0., -0., -0.],
        ...,
        [0., 0., -0.,  ..., -0., -0., -0.],
        [0., -0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., -0.]])
transformer.h.10.attn.c_proj.bias
Parameter containing:
tensor([-0., 0., -0., -0., -0., 0., 0., 0., 0., 0., 0., -0., 0., -0., 0., 0., -0., -0., 0., 0., 0., 0., -0., -0.,
        0., -0., 0., -0., 0., 0., 0., -0., -0., -0., -0., -0., 0., -0., -0., 0., 0., -0., -0., -0., 0., 0., 0., -0.,
        -0., 0., 0., 0., -0., 0., 0., 0., 0., -0., 0., 0., -0., 0., -0., 0., -0., -0., 0., 0., -0., -0., -0., -0.,
        -0., 0., -0., 0., -0., 0., 0., 0., 0., -0., -0., -0., 0., -0., -0., -0., 0., -0., -0., 0., -0., 0., -0., -0.,
        0., -0., -0., 0., 0., 0., 0., -0., 0., 0., 0., 0., 0., -0., -0., -0., 0., -0., 0., 0., -0., 0., -0.,

In [31]:
dataset_path = "./text.csv"
df = pd.read_csv(dataset_path)
df = df.drop("Unnamed: 0", axis=1)

train_df, test_df = train_test_split(df, test_size=0.2, random_state=10, shuffle=True)

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)