In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

gpt_model = AutoModelForCausalLM.from_pretrained("gpt2-medium", device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("gpt2-medium")

In [3]:
from latent import *
model = LatentLM("gpt2-medium", model=gpt_model, tokenizer=tokenizer)

In [4]:
r = model(["How", "What are you up to"])
r.discrete()

tensor([[0, 0, 0, 0, 0, 1, 1],
        [0, 1, 1, 1, 1, 1, 1]], device='cuda:0')
tensor([[0, 0, 0, 0, 0, 0, 1],
        [0, 0, 1, 2, 3, 4, 5]], device='cuda:0')
tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0010, 0.0013],
        [0.0000, 0.0010, 0.0019, 0.0018, 0.0017, 0.0013, 0.0019]],
       device='cuda:0', grad_fn=<MeanBackward1>)
[[0, 0, 0, 0, 0, 50256, 2437], [0, 50256, 2061, 389, 345, 510, 284]]
mean hidden_states tensor([-0.2388, -0.1979], device='cuda:0', grad_fn=<MeanBackward1>)
torch.Size([2, 50257]) tensor([-86.9224, -99.5600], device='cuda:0', grad_fn=<MeanBackward1>)


['How to', 'What are you up to?']

In [5]:
r.layer(-1)

<gpt2-medium.layer(-1).LatentTensor[2, 7] <-> 'How[TOK]'
                                              'What are you up to[TOK]'

In [16]:
r

<gpt2-xl.LatentTensor[49, 2, 6] <-> 'How[TOK]'
                                    'What are you up to[TOK]'

In [6]:
print(r.complete())

torch.Size([2, 50257]) tensor([-86.9224, -99.5600], device='cuda:0', grad_fn=<MeanBackward1>)
tensor([[0, 0, 0, 0, 0, 1, 1, 1],
        [0, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')
tensor([[0, 0, 0, 0, 0, 0, 1, 2],
        [0, 0, 1, 2, 3, 4, 5, 6]], device='cuda:0')
tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0010, 0.0013, 0.0019],
        [0.0000, 0.0010, 0.0019, 0.0018, 0.0017, 0.0013, 0.0019, 0.0014]],
       device='cuda:0', grad_fn=<MeanBackward1>)
[[0, 0, 0, 0, 0, 50256, 2437, 284], [0, 50256, 2061, 389, 345, 510, 284, 30]]
mean hidden_states tensor([-0.2591, -0.2352], device='cuda:0', grad_fn=<MeanBackward1>)
<gpt2-medium.LatentTensor[25, 2, 8] <-> 'How to[TOK]'
                                        'What are you up to?[TOK]'



In [17]:
r.complete()

<gpt2-xl.LatentTensor[49, 2, 7] <-> 'How to[TOK]'
                                    'What are you up to?[TOK]'

In [9]:
r.distribution()

tensor([[ -5.6751,  -8.6013, -13.2165,  ..., -18.4128, -12.4559, -10.6140],
        [ -5.5568, -10.8951, -11.8829,  ..., -19.0959, -19.4955,  -9.3850]],
       grad_fn=<LogSoftmaxBackward0>)

In [10]:
r.layer(-1)

<gpt2-medium.layer(-1).LatentTensor[2, 6] <-> 'How[TOK]'
                                              'What are you up to[TOK]'

In [7]:
color = "red"
y = model(f"The boat is {color}. The color of the boat is", name='hard_color')
y


<gpt2-medium.LatentTensor[25, 1, 12] <-> 'The boat is red. The color of the boat is[TOK]'

In [None]:
colors = ["red", "blue"]
soft_color = model([["The boat is ", colors, "."]], name='soft_color')[-2]
soft_color = adapter(soft_color)

In [8]:
soft_color.shape

[49, 2, 1, 1600]

In [43]:
from latent import *

class Adapter(LatentModule):
    def __init__(self):
        super().__init__()
        
        self.lin1 = torch.nn.Linear(1024, 8)
        self.lin2 = torch.nn.Linear(8, 1024)

    def forward(self, x):
        x = self.lin1(x)
        x = self.lin2(x)
        return x
    
adapter = Adapter().to(model.device)

In [44]:
class DirectPrompt(torch.nn.Module):
    def forward(self, color):
        return model([["The boat is ", color, ". The color of the boat is"]], name='hard_color')
direct = DirectPrompt()

class SoftPrompt(torch.nn.Module):
    def forward(self, color):
        soft_color = model([["The boat is ", color, "."]], name='soft_color')[-2]
        soft_color = adapter(soft_color)
        return model([[soft_color, "The color of the boat is"]], name='soft_color')
soft = SoftPrompt()

In [45]:
direct("red"), soft("red")

(<gpt2-medium.LatentTensor[25, 1, 13] <-> 'The boat is red. The color of the boat is[TOK]',
 <gpt2-medium.LatentTensor[25, 1, 8] <-> '{Adapter(soft_color)}The color of the boat is[TOK]')

In [18]:
from latent import *

def color_match(colors: List[str]):
    x = soft(colors)
    y = direct(colors)
    return x.layer(-1)[-1] == y.layer(-1)[-1]
color_match(["red"])

LatentEqualityObjective:
 - <gpt2-medium.layer(-1).LatentTensor[1, 1] <-> ' is[TOK]'

 - <gpt2-medium.layer(-1).LatentTensor[1, 1] <-> ' is[TOK]'

In [17]:
color = "pink"
direct("pink").discrete()

['The boat is pink. The color of the boat is pink']

In [16]:
colors = []
with open("working_colors.txt") as f:
    for color in f:
        colors.append(color.strip())
print(colors)

['apricot', 'aqua', 'black', 'blue', 'brown', 'cardinal', 'champagne', 'chocolate', 'cinnabar', 'cinnamon', 'cobalt', 'copper', 'coral', 'corn', 'cream', 'crimson', 'dandelion', 'denim', 'emerald', 'eggplant', 'gamboge', 'gold', 'goldenrod', 'green', 'grey', 'indigo', 'jade', 'khaki', 'lavender', 'lemon', 'lime', 'mustard', 'olive', 'orchid', 'peach', 'pink', 'plum', 'puce', 'pumpkin', 'purple', 'red', 'rose', 'ruby', 'salmon', 'shocking-pink', 'silver', 'smalt', 'tan', 'tenné-(tawny)', 'thistle', 'tomato', 'violet', 'wheat', 'white', 'yellow']


In [14]:
direct(["orchid", "olive"]).complete()

<gpt2-medium.LatentTensor[25, 2, 15] <-> 'The boat is orchid. The color of the boat is a[TOK]'
                                         'The boat is olive. The color of the boat is olive[TOK]'

In [15]:
direct(["olive"]).complete()

<gpt2-medium.LatentTensor[25, 1, 14] <-> 'The boat is olive. The color of the boat is olive[TOK]'

In [None]:
test = colors[:30]
batch_size = 2
n = 0
for i in range(0, len(test), batch_size):
    batch = test[i:i+batch_size]
    n += LatentModule.token_match(color_match(batch)).sum().item()
    print(batch, direct(batch).discrete(), soft(batch).discrete())
    print(n)

In [50]:
import neptune
def experiment(loss_fct, lr=1e-4, epochs=10):
    run = neptune.init_run(
        project="lbeurerkellner/lat",
        api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI4ZDk4ZjE0ZS0wM2M1LTRkZmItODcxMC02OGYxOGFmZWM3OGYifQ==",
    )  # your credentials
    
    params = {"learning_rate": lr, "loss_fct": loss_fct, "epochs": epochs}
    run["parameters"] = params

    def reporter(epoch, train_acc, test_acc, loss):
        run["train/loss"].append(loss)
        run["train/accuracy"].append(train_acc)
        run["test/accuracy"].append(test_acc)
    
    train, test = colors[:30], colors[30:]
    train_acc, test_acc = adapter.fit(color_match, train, epochs=epochs, lr=1e-4, loss_fct=loss_fct, test=test, epoch_callback=reporter)
    
    run.stop()

for lf in ["mse", "crossentropy", "cosine", "mse+cosine", "mse+crossentropy", "cosine+crossentropy", "cosine+crossentropy+mse"]:
    for lr in [1e-3, 1e-4, 1e-5]:
        experiment(lf, lr=lr, epochs=1000)

https://app.neptune.ai/lbeurerkellner/lat/e/LAT-1


Epoch 100/100, loss=4.5265 Loss: 4.5265 Train accuracy: 23.33% Test accuracy: 8.00%: : 400it [01:40,  3.97it/s]                                                                   

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 12 operations to synchronize with Neptune. Do not kill this process.





All 12 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/lbeurerkellner/lat/e/LAT-1/metadata
