In [None]:
import transformers
import torch as t
import torchvision
import einops

In [None]:
gpt_model = transformers.AutoModelForSequenceClassification.from_pretrained("EleutherAI/gpt-j-6B")

In [None]:
n_gpus = 4

class BlockWrapper(t.nn.Module):
    def __init__(self, gpt_block):
        super().__init__()
        self.model = gpt_block
    
    def forward(self, inputs):
        activations, *_ = self.model(inputs)
        return activations

def split_model(model, n_gpus):
    starts = t.linspace(0, 28, n_gpus + 1).int()[:-1] # Starting index of each section
    ends = t.linspace(0, 28, n_gpus + 1).int()[1:]
    blocks = [BlockWrapper(block) for block in model.transformer.h]
    gpt_block_sections = [t.nn.Sequential(*blocks[start:end]) for start, end in zip(starts, ends)]

    first = t.nn.Sequential(
        model.transformer.wte,
        model.transformer.drop,
        gpt_block_sections[0]
    )

    last = t.nn.Sequential(
        gpt_block_sections[-1],
        model.transformer.ln_f,
        model.score
    )

    models = [first] + gpt_block_sections[1:-1] + [last]
    return models


In [None]:
models = split_model(gpt_model, n_gpus)
for i, model in enumerate(models):
    t.save(model, 'gpt-j-%d.pt' % i)

In [None]:
inputs_list[0].shape

In [9]:
a = models[0](inputs_list)
b = models[1](a)
c = models[2](b)
d = models[3](c)

NameError: name 'models' is not defined

In [None]:
[x.shape for x in (a,b,c,d)]

In [None]:
tokenizer = transformers.AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")


In [None]:
tokenizer.vocab_size

In [None]:
tokenizer = transformers.AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")

input_texts = [
    "Should Tamera refactor the code? Answer: ",
    "Some other text! Longer now",
    "Happy happy happy sad sad sad",
    "happy glad excited thrilled ecstatic",
    "sad blue bummed glum low"
]
input_texts = [
    "Should Tamera refactor the code? Answer: ",
    "Should Tamera refactor the code? Answer: ",
]

inputs_list = [t.Tensor(text).int().unsqueeze(0) for text in tokenizer(input_texts)['input_ids']]

for i, inputs in enumerate(inputs_list):

    original_output = gpt_model(inputs).logits # (1, 2)

    our_model = t.nn.Sequential(*models)
    our_output = our_model(inputs)[:,-1]
    
    if i in (3, 4):
        print(our_output)

    assert original_output.equal(our_output)

In [10]:
a, b = t.tensor([1, 2])

In [11]:
a

tensor(1)

In [None]:
%load_ext autoreload
%autoreload 2

In [1]:
from utils import *

#train, test = imdb_data()

In [None]:
t.randint(0, 50257, (512,)).long().dtype

In [None]:
def fake_imdb(batch_size=32, n_batches=10, max_len=512, vocab_size=50257):
    
    def sample():
        sentiment = int(t.randint(0, 2, (1,)))
        tokens = t.randint(0, vocab_size, (max_len,)).long()
        return sentiment, tokens
    
    train_batches = [[sample() for _ in range(batch_size)] for _ in range(n_batches)]
    test_batches = [[sample() for _ in range(batch_size)] for _ in range(n_batches)]
    
    return train_batches, test_batches

In [12]:
train, test = fake_imdb_data()

In [14]:
train_labels, train_inputs = zip(*train[0])

In [18]:
t.stack(train_inputs).shape

torch.Size([32, 512])

In [21]:
t.tensor(train_labels).shape

torch.Size([32])

In [8]:
train[0][0][1].shape

torch.Size([512])

In [None]:
my_imdb[0][0][1][1].size()