In [1]:
from transformers import CLIPTextModel, CLIPTokenizer, CLIPFeatureExtractor


PRETRAINED_PATH = 'pretrained/models--CompVis--stable-diffusion-v1-4'

tokenizer = CLIPTokenizer.from_pretrained(PRETRAINED_PATH, subfolder='tokenizer')
text_encoder = CLIPTextModel.from_pretrained(PRETRAINED_PATH, subfolder='text_encoder')

tokens = tokenizer('A spine-chillingly terrifying landscape never before seen by mankind.',
			padding="max_length",
			max_length=tokenizer.model_max_length,
			return_tensors="pt",
		)
print(tokens.input_ids, tokens.attention_mask)

embeddings = text_encoder(tokens.input_ids)
embeddings.last_hidden_state.shape, embeddings.pooler_output.shape


  from .autonotebook import tqdm as notebook_tqdm


tensor([[49406,   320, 19646,   268,  6498,  4796, 18526,  5727,  1426,  1348,
          2041,   638, 24155,   269, 49407, 49407, 49407, 49407, 49407, 49407,
         49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407,
         49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407,
         49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407,
         49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407,
         49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407,
         49407, 49407, 49407, 49407, 49407, 49407, 49407]]) tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0]])


(torch.Size([1, 77, 768]), torch.Size([1, 768]))

In [5]:
text_encoder.config

CLIPTextConfig {
  "_name_or_path": "pretrained/models--CompVis--stable-diffusion-v1-4",
  "architectures": [
    "CLIPTextModel"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "dropout": 0.0,
  "eos_token_id": 2,
  "hidden_act": "quick_gelu",
  "hidden_size": 768,
  "initializer_factor": 1.0,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 77,
  "model_type": "clip_text_model",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "torch_dtype": "float32",
  "transformers_version": "4.21.2",
  "vocab_size": 49408
}

In [36]:
tokenizer([tokenizer.bos_token, tokenizer.eos_token, 'a'])

{'input_ids': [[49406, 49406, 49407], [49406, 49407, 49407], [49406, 320, 49407]], 'attention_mask': [[1, 1, 1], [1, 1, 1], [1, 1, 1]]}

In [18]:
import torch


VOCAB_SIZE = 49408

for p in text_encoder.text_model.embeddings.parameters():
	p.requires_grad = False

unembed = torch.nn.Linear(768, VOCAB_SIZE)

mask = None #1 - torch.triu(torch.ones(1, tokenizer.model_max_length, tokenizer.model_max_length))
embeddings = text_encoder(tokens.input_ids, attention_mask=mask)

output_ids = unembed(embeddings.last_hidden_state)
o2 = output_ids.permute(0, 2, 1)

loss = torch.nn.functional.cross_entropy(o2, tokens.input_ids)
loss.backward()

In [16]:
m = 1 - torch.triu(torch.ones(1, 5, 5))
m

tensor([[[0., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0.],
         [1., 1., 0., 0., 0.],
         [1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 0.]]])

In [1]:

import os

import starry.utils.config
from starry.text.data.sentences import SentenceShift



DATA_DIR = os.environ.get('DATA_DIR')
CLIP_PATH = 'pretrained/models--CompVis--stable-diffusion-v1-4'

data, = SentenceShift.load(os.path.join(DATA_DIR, 'mj-desc.txt'), {'tokenizer_path': CLIP_PATH}, '0/1')
#data.entries

[entry['input_ids'].shape for entry in data.entries]

  from .autonotebook import tqdm as notebook_tqdm


[torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Size([1, 77]),
 torch.Siz

In [1]:
import os
import matplotlib.pyplot as plt
import numpy as np

from starry.utils.config import Configuration
from starry.utils.dataset_factory import loadDataset


DATA_DIR = os.getenv('DATA_DIR')

config = Configuration.create('configs/sentencegen-test.yaml', volatile=True)
data, = loadDataset(config, data_dir=DATA_DIR, splits='*0/1')

it = iter(data)
next(it)

  from .autonotebook import tqdm as notebook_tqdm


{'input_ids': tensor([[49406,   967,  6792,   530,  1573,   638, 15689, 21726,  2067,  1338,
            328,   267,  3696,   617,  4810, 49211,   269,   320,  5352,   539,
          25410,  2202,  5352,   539,   320,  1215,  2308,  3168, 47402,   530,
           4023,   638,  6473,   268, 45770,   749,   710,   267,  1549, 11737,
            267,   765,  1006,   597,   267,  1116,  6052, 14211,  3892,  4927,
          15733,   652, 38787,  2802, 16505,   532, 13316,  2353,   267,  1549,
          11737,   267,   765,  1006,   597,   267,  1116,  6052, 14211,  3892,
           4927, 15733,   652, 38787,  2802, 16505,   267],
         [49406,  2863,   539,   320,  3769,  6982,  1449,  1611,   267, 47323,
           4998,  1449,  8306,   267,   593, 15562, 11006, 12252,  5185,   556,
          14922,  2225,   267,  1220, 24344,   539,  4769,   267,  4809,  1492,
           7626,  1746,   267,   736,   267, 29802,  7118,   267,  1400, 22014,
            267,  3144,  6493,   267, 27520,  3

In [1]:
# model test with loss
import os
import matplotlib.pyplot as plt
import numpy as np
import torch

from starry.utils.config import Configuration
from starry.utils.dataset_factory import loadDataset
from starry.utils.model_factory import loadModel


DATA_DIR = os.getenv('DATA_DIR')

config = Configuration.createOrLoad('configs/sentencegen-test.yaml', volatile=True)
data, = loadDataset(config, data_dir=DATA_DIR, splits='*0/1')

model = loadModel(config['model'], postfix='Loss')

it = iter(data)

batch = next(it)
pred = model(batch)
pred


  from .autonotebook import tqdm as notebook_tqdm


(tensor(10.7691, grad_fn=<NllLoss2DBackward0>), {'acc': 0.0})