In [1]:
import os
import time 
import math
import pickle
from contextlib import nullcontext

import numpy as np
import torch 
import torch.nn.functional as F

from tqdm import tqdm

from model import GPTConfig, GPT

In [2]:
out_dir = "/raid/slee3473/LLM/shakespeare_char/out"
if not os.path.exists(out_dir): raise RuntimeError(f"No out_dir {out_dir}")
    
eval_iters = 200
ckpt_iters = 1000
log_iters = 100
init_from = "scratch"  # "scratch", "resume", or "gpt2*"
dataset = "shakespeare_char" # "openwebtext"
gradient_accumulation_steps = 1  # 5 * 8  # used to simulate larger batch sizes
batch_size = 64  # 12
block_size = 256  # 1024

n_layer = 6  # 12
n_head = 6  # 12
n_embed = 384  # 768
dropout = 0.2  # 0.0
bias = False

# AdamW optimizer settings
learning_rate = 1e-3  # 6e-4
max_iters = 5000  # 600000
weight_decay = 1e-1
beta1 = 0.9
beta2 = 0.99  # 0.95
grad_clip = 1.0

# lr decay settings
decay_lr = True 
warmup_iters = 100  # 2000
lr_decay_iters = 5000 # 600000
min_lr = 1e-4  # 6e-5

# misc
device = "cuda:6"
dtype = "bfloat16"
compile = True  # use PyTorch 2.0 for faster model compile
master_process = True
seed_offset = 0
ddp_world_size = 1

torch.manual_seed(1337+seed_offset)
torch.backends.cuda.matmul.allow_tf32 = True 
torch.backends.cudnn.allow_tf32 = True
device_type = "cuda"
ptdtype = torch.bfloat16  # {"float32": torch.float32, "bfloat16": torch.bfloat16, "float16": torch.float16}[dtype]
ctx = torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16)

In [3]:
data_dir = os.path.join("./../nanoGPT/data", dataset)
train_data = np.memmap(os.path.join(data_dir, "train.bin"), dtype=np.uint16, mode="r")
val_data = np.memmap(os.path.join(data_dir, "val.bin"), dtype=np.uint16, mode="r")

In [4]:
def get_batch(split):
    data = train_data if split =="train" else val_data 
    ix = torch.randint(len(data)-block_size, (batch_size,))
    x = torch.stack([torch.from_numpy((data[i:i+block_size]).astype(np.int64)) for i in ix]).pin_memory().to(device, non_blocking=True)
    y = torch.stack([torch.from_numpy((data[i+1:i+1+block_size]).astype(np.int64)) for i in ix]).pin_memory().to(device, non_blocking=True)
    return x, y

In [5]:
iter_num = 0
best_val_loss = 1e9
meta_path = os.path.join(data_dir, "meta.pkl")
meta_vocab_size = None
if os.path.exists(meta_path):
    with open(meta_path, "rb") as f:
        meta = pickle.load(f)
    meta_vocab_size = meta["vocab_size"]
    print(f"found vocab_size = {meta_vocab_size} (inside {meta_path})")

found vocab_size = 65 (inside ./../nanoGPT/data/shakespeare_char/meta.pkl)


In [6]:
model_args = dict(n_layer=n_layer, n_head=n_head, n_embd=n_embed, block_size=block_size, bias=bias, vocab_size=None, dropout=dropout)

In [7]:
if meta_vocab_size is None:
    print("Use default vocab_size of GPT-2 (50304 = 50257 rounded up for efficiency)")
model_args["vocab_size"] = meta_vocab_size if meta_vocab_size is not None else 50304
gptconf = GPTConfig(**model_args)
model = GPT(gptconf).to(device)

number of parameters: 10.65M


In [8]:
scaler = torch.cuda.amp.GradScaler(enabled=(dtype=="float16"))
optimizer = model.configure_optimizers(weight_decay, learning_rate, (beta1, beta2), device_type)
checkpoint = None

num decayed parameter tensors: 26, with 10,740,096 parameters
num non-decayed parameter tensors: 13, with 4,992 parameters
using fused AdamW: True


In [9]:
def get_lr(it):
    if it < warmup_iters: return learning_rate * it / warmup_iters
    if it > lr_decay_iters: return min_lr 
    decay_ratio = (it - warmup_iters) / (lr_decay_iters - warmup_iters)
    assert 0 <= decay_ratio <= 1
    coeff = 0.5 * (1. + math.cos(math.pi * decay_ratio))
    return min_lr + coeff*(learning_rate-min_lr)

In [10]:
num_train_iters = (len(train_data)+batch_size-1) // batch_size
print(num_train_iters)

15686


In [11]:
@torch.no_grad()
def model_eval(model=model):
    model.eval()
    losses = torch.zeros(eval_iters)
    for k in range(eval_iters):
        X,Y = get_batch("val")
        with ctx: logits, loss = model(X,Y)
        losses[k] = loss.item()
    model.train()
    return losses.mean()

In [12]:
num_epochs = 1
num_train_iters = (len(train_data)-block_size+batch_size-1) // batch_size

for epoch in range(num_epochs):
    # start eval
    loss = model_eval()
    print(f"Epoch {epoch} eval loss: {loss:.4f}")

    indices = torch.randperm(len(train_data)-block_size)
    
    # start training
    model.train()
    for iter_i in range(max_iters):
        if iter_i%ckpt_iters==0:
            checkpoint = {
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'model_args': model_args,
            }
            torch.save(checkpoint, os.path.join(out_dir, f"ckpt_{iter_i}.pt"))
        ix = indices[iter_i*batch_size:(iter_i+1)*batch_size]
        X = torch.stack([torch.from_numpy((train_data[i:i+block_size]).astype(np.int64)) for i in ix]).pin_memory().to(device, non_blocking=True)
        Y = torch.stack([torch.from_numpy((train_data[i+1:i+1+block_size]).astype(np.int64)) for i in ix]).pin_memory().to(device, non_blocking=True)
        
        with ctx:
            logits, loss = model(X,Y)
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
        optimizer.step()
        optimizer.zero_grad(set_to_none=True)
        if iter_i % log_iters == 0: print(f"Iter {iter_i} --- Loss: {loss.item()}")

Epoch 0 eval loss: 4.2820
Iter 0 --- Loss: 4.271435737609863
Iter 100 --- Loss: 2.5760741233825684
Iter 200 --- Loss: 2.490290880203247
Iter 300 --- Loss: 2.4275081157684326
Iter 400 --- Loss: 2.338355302810669
Iter 500 --- Loss: 2.1652441024780273
Iter 600 --- Loss: 1.9487298727035522
Iter 700 --- Loss: 1.7803409099578857
Iter 800 --- Loss: 1.6560834646224976
Iter 900 --- Loss: 1.5977400541305542
Iter 1000 --- Loss: 1.5184143781661987
Iter 1100 --- Loss: 1.475876808166504
Iter 1200 --- Loss: 1.4374158382415771
Iter 1300 --- Loss: 1.3937320709228516
Iter 1400 --- Loss: 1.3578182458877563
Iter 1500 --- Loss: 1.363014578819275
Iter 1600 --- Loss: 1.3122446537017822
Iter 1700 --- Loss: 1.2949780225753784
Iter 1800 --- Loss: 1.2825500965118408
Iter 1900 --- Loss: 1.2612237930297852
Iter 2000 --- Loss: 1.2471721172332764
Iter 2100 --- Loss: 1.2602064609527588
Iter 2200 --- Loss: 1.2254849672317505
Iter 2300 --- Loss: 1.2261593341827393
Iter 2400 --- Loss: 1.2354092597961426
Iter 2500 --- Lo

In [13]:
final_model = {
    'model': model.state_dict(),
    'optimizer': optimizer.state_dict(),
    'model_args': model_args,   
}
torch.save(checkpoint, os.path.join(out_dir, f"final.pt"))

In [14]:
print(f"After training val loss: {model_eval():.4f}")  # 1.5941

After training val loss: 1.5941


### Load saved final model

In [13]:
model_ckpt = torch.load(os.path.join(out_dir, "final_4999.pt"), map_location=device)
    
state_dict = model_ckpt["model"]
model_args = model_ckpt["model_args"]
opt_args = model_ckpt["optimizer"]
    
model_config = GPTConfig(**model_args)
model = GPT(model_config).to(device)
model.load_state_dict(state_dict)

number of parameters: 10.65M


<All keys matched successfully>

In [14]:
model.eval()
print(f"Loaded model loss: {model_eval():.4f}")  # 1.5832

Loaded model loss: 1.5832


### Generate

In [15]:
with open(os.path.join(data_dir, "meta.pkl"), "rb") as f:
    meta = pickle.load(f)
stoi, itos = meta["stoi"], meta["itos"]
encode = lambda s: [stoi[c] for c in s]
decode = lambda l: "".join([itos[i] for i in l])

In [16]:
start = "\n"
start_ids = encode(start)
x = (torch.tensor(start_ids, dtype=torch.long, device=device)[None, ...])

In [19]:
model.eval()
model.to(device)

with torch.no_grad():
    with ctx:
        for k in range(5):
            y = model.generate(x, 200, temperature=0.8, top_k=200)
            print(decode(y[0].tolist()))
            print("====")



Clown:
So is it and in all thing that was to banishment
what taught the prince hath been with him.

AUTOLYCUS:
Hand, I hope he of these five consulships!

AUTOLYCUS:
I will that Hollaud by the common
====

Behold you all less to expose you our grace!

KING EDWARD IV:
Thanks. But, tell me, do not see the king.

GLOUCESTER:
The earth shall be punish'd, poor gentle lady.

LADY GREY:
I'll swear with thee in
====

Men at the dark not of my fair bears,
Nor warm'd all the great confined cares:
So reprove a loving credent soul
Is your father breaths; and you might be
Down this daughter's blood. You are both.

TYBA
====

That parts the might of England's peace
And wear the king at the palsied mother,
Or that ever the hour and means to be
Maligation that to loud to the old world,
When we are parcell'd for so villain,
F
====

What said 'twas this and 'We can, he shall be past.'

Second Watchman:
Who is't not? why, no man take it you out;
But no more of this world, marry, more cordial.

Third Wa

### Load checkpoints

In [16]:
def load_ckpt(ckpt_name="ckpt.pt", ckpt_dir=out_dir, device=device):
    ckpt = torch.load(os.path.join(ckpt_dir, ckpt_name), map_location=device)
    
    ckpt_state_dict = ckpt["model"]
    ckpt_model_args = ckpt["model_args"]
    ckpt_opt_args = ckpt["optimizer"]
    
    ckpt_config = GPTConfig(**ckpt_model_args)
    ckpt_model = GPT(ckpt_config).to(device)
    ckpt_model.load_state_dict(ckpt_state_dict)

    print(f"Loaded checkpoint model {ckpt_name}")

    return ckpt_model

### Generate something to be attributed

In [18]:
model.eval()
model.to(device)
x = (torch.tensor(encode("\n"), dtype=torch.long, device=device)[None, ...])

with torch.no_grad():
    with ctx:
        out = model.generate(x, 10, temperature=1., top_k=200)
        print(decode(out[0].tolist()))
        print("====")


Romeo, and
====


### Attribute using Gradient

In [17]:
attr_X = (torch.tensor(encode("\nRome"), dtype=torch.long, device=device)[None, ...])
attr_Y = (torch.tensor(encode("o"), dtype=torch.long, device=device)[None, ...])

In [18]:
# probability of 'o' to come after 'Rome'
model.eval()

attr_logits, _ = model(attr_X)
prob = F.softmax(attr_logits, dim=-1)
print("Probability of o to come after Rome:", prob[0,0,attr_Y.item()].item())

tensor_space = (torch.tensor(encode(" "), dtype=torch.long, device=device)[None, ...])
print("Probability of space to come after Rome:", prob[0,0,tensor_space.item()].item())

tensor_newline = (torch.tensor(encode("\n"), dtype=torch.long, device=device)[None, ...])
print("Probability of newline to come after Rome:", prob[0,0,tensor_newline.item()].item())

Probability of o to come after Rome: 0.9756704568862915
Probability of space to come after Rome: 0.012636329047381878
Probability of newline to come after Rome: 1.668629192863591e-05


In [20]:
def input_gradient(idx, targets, model=model):
    b, t = idx.size()
    pos = torch.arange(0, t, dtype=torch.long, device=device) # shape (t)

    # forward the GPT model itself
    tok_emb = model.transformer.wte(idx) # token embeddings of shape (b, t, n_embd)
    pos_emb = model.transformer.wpe(pos) # position embeddings of shape (t, n_embd)
    x = model.transformer.drop(tok_emb + pos_emb)
    for block in model.transformer.h:
        x = block(x)
    x = model.transformer.ln_f(x)

    logits = model.lm_head(x)
    loss = torch.nn.functional.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1)    
    
    # tok_emb.requires_grad = True
    tok_emb.retain_grad()

    gradient = torch.autograd.grad(loss, tok_emb, create_graph=True)[0]  # batch_size x block_size x n_embed
    print(gradient.shape)

In [19]:
def tuple_inner_product(t1, t2):
    assert len(t1) == len(t2)

    inner = 0
    for d1, d2 in zip(t1, t2):
        assert torch.numel(d1) == torch.numel(d2)
        inner += torch.inner(d1.flatten(), d2.flatten())

    return inner

In [20]:
model.eval()
influence_scores = np.zeros(len(train_data))
start = time.time()
time_log_iters = 100

for iter_i in range(max_iters):
    if iter_i % time_log_iters == 0:
        duration = time.time() - start 
        print(f"Time until iter {iter_i}: {duration:.4f}sec")
    if iter_i % ckpt_iters == 0:
        ckpt_model = load_ckpt(ckpt_name=f"ckpt_{iter_i}.pt")
        ckpt_model.eval()
        
        attr_logits, _ = ckpt_model(attr_X)
        attr_logits = attr_logits[:,-1,:] / 1. # temperature
        v, _ = torch.topk(attr_logits, min(200, attr_logits.size(-1)))
        attr_logits[attr_logits < v[:,[-1]]] = -float("Inf")
        probs = F.softmax(attr_logits, dim=-1)
        attr_Y_prob = probs[0, attr_Y.item()]
        attr_Y_prob_gradient = torch.autograd.grad(attr_Y_prob, ckpt_model.parameters())

    for i in indices[iter_i*batch_size:(iter_i+1)*batch_size]:
        X = torch.unsqueeze(torch.from_numpy((train_data[i:i+block_size]).astype(np.int64)).pin_memory().to(device, non_blocking=True), 0)
        Y = torch.unsqueeze(torch.from_numpy((train_data[i+1:i+1+block_size]).astype(np.int64)).pin_memory().to(device, non_blocking=True), 0)
        logits, loss = ckpt_model(X, Y)
        training_gradient = torch.autograd.grad(loss, ckpt_model.parameters())
        influence_scores[i] = tuple_inner_product(training_gradient, attr_Y_prob_gradient)

Time until iter 0: 0.0002sec
number of parameters: 10.65M
Loaded checkpoint model ckpt_0.pt
Time until iter 100: 49.6408sec
Time until iter 200: 99.2457sec
Time until iter 300: 148.5584sec
Time until iter 400: 197.9722sec
Time until iter 500: 247.5307sec
Time until iter 600: 296.8218sec
Time until iter 700: 346.5458sec
Time until iter 800: 396.0292sec
Time until iter 900: 445.4531sec
Time until iter 1000: 495.0766sec
number of parameters: 10.65M
Loaded checkpoint model ckpt_1000.pt
Time until iter 1100: 544.6561sec
Time until iter 1200: 593.9342sec
Time until iter 1300: 643.4771sec
Time until iter 1400: 692.7682sec
Time until iter 1500: 742.0047sec
Time until iter 1600: 791.4791sec
Time until iter 1700: 840.6524sec
Time until iter 1800: 890.1010sec
Time until iter 1900: 939.3306sec
Time until iter 2000: 987.7405sec
number of parameters: 10.65M
Loaded checkpoint model ckpt_2000.pt
Time until iter 2100: 1037.1159sec
Time until iter 2200: 1086.6880sec
Time until iter 2300: 1135.5715sec
Ti

In [54]:
N = 64
cnt = 0
influnece_scores_ = influence_scores.copy()
topN_pos_influence = []  # 483533, 742877, 42431, 514847, 670758

while True:
    if cnt == N: break

    top_influence_idx = np.argmin(influnece_scores_)
    topN_pos_influence.append(top_influence_idx)
    cnt += 1
#     influnece_scores_[top_influence_idx-block_size+1:top_influence_idx+block_size] = float("Inf")
    influnece_scores_[top_influence_idx] = float("Inf")

print(topN_pos_influence)
print(influence_scores[topN_pos_influence])

[483533, 742877, 42431, 483529, 42425, 742895, 483573, 742883, 742874, 514847, 42498, 670758, 60369, 670748, 483574, 42432, 483566, 670737, 483559, 670767, 742870, 514895, 742793, 483672, 42456, 670756, 505991, 42416, 742873, 483681, 60246, 561191, 742851, 483589, 398928, 60251, 765767, 235250, 140424, 670703, 670774, 561196, 397207, 483523, 483596, 230683, 504530, 801169, 561206, 60357, 742910, 670778, 848842, 230677, 742806, 32683, 765809, 54215, 54194, 42409, 483620, 742789, 13932, 483684]
[-10.38266182 -10.20835876  -9.94726467  -9.86074352  -9.79647636
  -9.58438206  -9.50465202  -9.45061588  -9.41490269  -9.39520741
  -9.36907196  -9.36326408  -9.31201935  -9.29672718  -9.27636433
  -9.26184273  -9.22461033  -9.20440483  -9.15724754  -9.12469673
  -9.09008408  -8.98343277  -8.93583107  -8.87154293  -8.84836388
  -8.84537888  -8.84458256  -8.77223015  -8.75917339  -8.75551224
  -8.72925758  -8.70864677  -8.67463684  -8.65811539  -8.58307934
  -8.56818199  -8.56700516  -8.54398537 

In [30]:
N = 5
cnt = 0
influnece_scores_ = influence_scores.copy()
topN_neg_influence = []

while True:
    if cnt == N: break

    top_influence_idx = np.argmax(influnece_scores_)
    topN_neg_influence.append(top_influence_idx)
    cnt += 1
    influnece_scores_[top_influence_idx-block_size+1:top_influence_idx+block_size] = -float("Inf")

print(topN_neg_influence)
print(influence_scores[topN_neg_influence])

[670410, 837416, 662730, 157434, 796033]
[2.6630044  2.54692841 2.30066371 2.06376839 1.96041548]


In [55]:
print("<<< POSITIVE INFLUENCE >>>")
for cnt, i in enumerate(topN_pos_influence):
    print(f"{cnt}.")
    print(decode(train_data[i:i+block_size+1]))
    print("==================")
    
    if cnt == 3: break

<<< POSITIVE INFLUENCE >>>
0.
:
To-morrow will I send.

ROMEO:
So thrive my soul--

JULIET:
A thousand times good night!

ROMEO:
A thousand times the worse, to want thy light.
Love goes toward love, as schoolboys from
their books,
But love from love, toward school with heavy looks.

JUL
1.
who I am.
Good lady,
No court in Europe is too good for thee;
What dost thou then in prison?
Now, good sir,
You know me, do you not?

Gaoler:
For a worthy lady
And one whom much I honour.

PAULINA:
Pray you then,
Conduct me to the queen.

Gaoler:
I may not,
2.
our follows Coriolanus.
Welcome to Rome, renowned Coriolanus!

All:
Welcome to Rome, renowned Coriolanus!

CORIOLANUS:
No more of this; it does offend my heart:
Pray now, no more.

COMINIUS:
Look, sir, your mother!

CORIOLANUS:
O,
You have, I know, petition
3.
rief:
To-morrow will I send.

ROMEO:
So thrive my soul--

JULIET:
A thousand times good night!

ROMEO:
A thousand times the worse, to want thy light.
Love goes toward love, as schoolboys 

### Exclude topN_pos examples and retrain the model

In [25]:
removed_indices = indices[:max_iters*batch_size].numpy().copy()
for i in topN_pos_influence:
    removed_indices = np.delete(removed_indices, removed_indices==i)
print(len(removed_indices))

319936


In [47]:
new_model_args = dict(n_layer=n_layer, n_head=n_head, n_embd=n_embed, block_size=block_size, bias=bias, vocab_size=None, dropout=dropout)
new_model_args["vocab_size"] = meta_vocab_size if meta_vocab_size is not None else 50304
new_gptconf = GPTConfig(**new_model_args)
new_model = GPT(new_gptconf).to(device)

number of parameters: 10.65M


In [48]:
scaler = torch.cuda.amp.GradScaler(enabled=(dtype=="float16"))
new_optimizer = new_model.configure_optimizers(weight_decay, learning_rate, (beta1, beta2), device_type)

num decayed parameter tensors: 26, with 10,740,096 parameters
num non-decayed parameter tensors: 13, with 4,992 parameters
using fused AdamW: True


In [49]:
num_epochs = 1
num_train_iters = (len(train_data)-block_size+batch_size-1) // batch_size

for epoch in range(num_epochs):
    # start eval
    loss = model_eval(new_model)
    print(f"Epoch {epoch} eval loss: {loss:.4f}")
    
    # start training
    new_model.train()
    for iter_i in range(max_iters-1):
        ix = removed_indices[iter_i*batch_size:min((iter_i+1)*batch_size, len(indices))]
        X = torch.stack([torch.from_numpy((train_data[i:i+block_size]).astype(np.int64)) for i in ix]).pin_memory().to(device, non_blocking=True)
        Y = torch.stack([torch.from_numpy((train_data[i+1:i+1+block_size]).astype(np.int64)) for i in ix]).pin_memory().to(device, non_blocking=True)
        
        with ctx:
            logits, loss = new_model(X,Y)
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(new_model.parameters(), grad_clip)
        new_optimizer.step()
        new_optimizer.zero_grad(set_to_none=True)
        if iter_i % log_iters == 0: print(f"Iter {iter_i} --- Loss: {loss.item()}")

Epoch 0 eval loss: 4.3246
Iter 0 --- Loss: 4.3122968673706055
Iter 100 --- Loss: 2.551508903503418
Iter 200 --- Loss: 2.4681930541992188
Iter 300 --- Loss: 2.3844492435455322
Iter 400 --- Loss: 2.169111728668213
Iter 500 --- Loss: 1.9566351175308228
Iter 600 --- Loss: 1.8073127269744873
Iter 700 --- Loss: 1.7086583375930786
Iter 800 --- Loss: 1.559480905532837
Iter 900 --- Loss: 1.523234248161316
Iter 1000 --- Loss: 1.4675263166427612
Iter 1100 --- Loss: 1.406903624534607
Iter 1200 --- Loss: 1.4059370756149292
Iter 1300 --- Loss: 1.3432624340057373
Iter 1400 --- Loss: 1.3720996379852295
Iter 1500 --- Loss: 1.3468955755233765
Iter 1600 --- Loss: 1.2828428745269775
Iter 1700 --- Loss: 1.2782714366912842
Iter 1800 --- Loss: 1.288665771484375
Iter 1900 --- Loss: 1.2476208209991455
Iter 2000 --- Loss: 1.2477058172225952
Iter 2100 --- Loss: 1.2114722728729248
Iter 2200 --- Loss: 1.2032109498977661
Iter 2300 --- Loss: 1.160823941230774
Iter 2400 --- Loss: 1.1823413372039795
Iter 2500 --- Loss

RuntimeError: stack expects a non-empty TensorList

In [None]:
print(f"After training val loss: {model_eval(new_model):.4f}")

In [None]:
# probability of 'o' to come after 'Rome'
new_model.eval()

attr_logits, _ = new_model(attr_X)
prob = F.softmax(attr_logits, dim=-1)
print("Probability of o to come after Rome:", prob[0,0,attr_Y.item()].item())

tensor_space = (torch.tensor(encode(" "), dtype=torch.long, device=device)[None, ...])
print("Probability of space to come after Rome:", prob[0,0,tensor_space.item()].item())

tensor_newline = (torch.tensor(encode("\n"), dtype=torch.long, device=device)[None, ...])
print("Probability of newline to come after Rome:", prob[0,0,tensor_newline.item()].item())

### Remove data with "Romeo"

In [45]:
removed_indices = indices[:max_iters*batch_size].numpy().copy()
for i in tqdm(indices.cpu().numpy()[:max_iters*batch_size]):
    text = decode(train_data[i:i+block_size+1])
    if "Romeo" in text:
        removed_indices = np.delete(removed_indices, np.where(removed_indices==i))

100%|███████████████████████████████████| 320000/320000 [03:09<00:00, 1691.96it/s]


In [46]:
print(len(removed_indices))

312193


In [50]:
new_model_args = dict(n_layer=n_layer, n_head=n_head, n_embd=n_embed, block_size=block_size, bias=bias, vocab_size=None, dropout=dropout)
new_model_args["vocab_size"] = meta_vocab_size if meta_vocab_size is not None else 50304
new_gptconf = GPTConfig(**new_model_args)
new_model = GPT(new_gptconf).to(device)

number of parameters: 10.65M


In [51]:
scaler = torch.cuda.amp.GradScaler(enabled=(dtype=="float16"))
new_optimizer = new_model.configure_optimizers(weight_decay, learning_rate, (beta1, beta2), device_type)

num decayed parameter tensors: 26, with 10,740,096 parameters
num non-decayed parameter tensors: 13, with 4,992 parameters
using fused AdamW: True


In [52]:
num_epochs = 1
num_train_iters = (len(train_data)-block_size+batch_size-1) // batch_size

for epoch in range(num_epochs):
    # start eval
    loss = model_eval(new_model)
    print(f"Epoch {epoch} eval loss: {loss:.4f}")
    
    # start training
    new_model.train()
    for iter_i in range(max_iters):
        ix = removed_indices[iter_i*batch_size:min((iter_i+1)*batch_size, len(indices))]
        X = torch.stack([torch.from_numpy((train_data[i:i+block_size]).astype(np.int64)) for i in ix]).pin_memory().to(device, non_blocking=True)
        Y = torch.stack([torch.from_numpy((train_data[i+1:i+1+block_size]).astype(np.int64)) for i in ix]).pin_memory().to(device, non_blocking=True)
        
        with ctx:
            logits, loss = new_model(X,Y)
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(new_model.parameters(), grad_clip)
        new_optimizer.step()
        new_optimizer.zero_grad(set_to_none=True)
        if iter_i % log_iters == 0: print(f"Iter {iter_i} --- Loss: {loss.item()}")

Epoch 0 eval loss: 4.2766
Iter 0 --- Loss: 4.263223648071289
Iter 100 --- Loss: 2.6077616214752197
Iter 200 --- Loss: 2.4815001487731934
Iter 300 --- Loss: 2.4124205112457275
Iter 400 --- Loss: 2.244668483734131
Iter 500 --- Loss: 2.0451645851135254
Iter 600 --- Loss: 1.8490885496139526
Iter 700 --- Loss: 1.7282294034957886
Iter 800 --- Loss: 1.589887022972107
Iter 900 --- Loss: 1.5398356914520264
Iter 1000 --- Loss: 1.4945701360702515
Iter 1100 --- Loss: 1.444478988647461
Iter 1200 --- Loss: 1.4442970752716064
Iter 1300 --- Loss: 1.3724384307861328
Iter 1400 --- Loss: 1.3988631963729858
Iter 1500 --- Loss: 1.3652894496917725
Iter 1600 --- Loss: 1.2999608516693115
Iter 1700 --- Loss: 1.2941566705703735
Iter 1800 --- Loss: 1.301592469215393
Iter 1900 --- Loss: 1.2709195613861084
Iter 2000 --- Loss: 1.263702154159546
Iter 2100 --- Loss: 1.2355328798294067
Iter 2200 --- Loss: 1.2279256582260132
Iter 2300 --- Loss: 1.1929025650024414
Iter 2400 --- Loss: 1.2127612829208374
Iter 2500 --- Los

RuntimeError: stack expects a non-empty TensorList

In [53]:
# probability of 'o' to come after 'Rome'
new_model.eval()

attr_logits, _ = new_model(attr_X)
prob = F.softmax(attr_logits, dim=-1)
print("Probability of o to come after Rome:", prob[0,0,attr_Y.item()].item())

tensor_space = (torch.tensor(encode(" "), dtype=torch.long, device=device)[None, ...])
print("Probability of space to come after Rome:", prob[0,0,tensor_space.item()].item())

tensor_newline = (torch.tensor(encode("\n"), dtype=torch.long, device=device)[None, ...])
print("Probability of newline to come after Rome:", prob[0,0,tensor_newline.item()].item())

Probability of o to come after Rome: 1.1718222594936378e-05
Probability of space to come after Rome: 0.31933608651161194
Probability of newline to come after Rome: 0.0013829857343807817


### Investigate the attributed data

In [62]:
for i in topN_pos_influence[:10]:
    print("Training data #", np.where(indices==i)[0][0], ":", i)

Training data # 30414 : 483533
Training data # 37839 : 742877
Training data # 54651 : 42431
Training data # 25854 : 483529
Training data # 23214 : 42425
Training data # 51840 : 742895
Training data # 52988 : 483573
Training data # 34977 : 742883
Training data # 22401 : 742874
Training data # 33611 : 514847


### Investigate model embedding

In [29]:
def get_model_embedding(idx):
    b, t = idx.size()
    pos = torch.arange(0, t, dtype=torch.long, device=device) # shape (t)

    # forward the GPT model itself
    tok_emb = model.transformer.wte(idx) # token embeddings of shape (b, t, n_embd)
    pos_emb = model.transformer.wpe(pos) # position embeddings of shape (t, n_embd)
    x = model.transformer.drop(tok_emb + pos_emb)
    for block in model.transformer.h:
        x = block(x)
    x = model.transformer.ln_f(x)

    print(x.shape)

get_model_embedding(attr_X)

torch.Size([1, 5, 384])


torch.Size([1, 5])