In [24]:
from transformer import Transformer
from transformers import AutoModelForSeq2SeqLM
from transformers import RobertaModel, AutoTokenizer, RobertaForCausalLM
from transformers import GenerationConfig
from transformers import RobertaConfig
from transformers import AutoTokenizer

In [25]:
model_config = RobertaConfig.from_pretrained("roberta-base")
model_config.is_decoder = True

In [27]:
roberta = RobertaForCausalLM.from_pretrained('roberta-base', config=model_config)
tokenizer = AutoTokenizer.from_pretrained("roberta-base", padding_side = "left")


In [33]:
text = "The quick brown fox jumps over the lazy dog"
input_ids = tokenizer(text, return_tensors="pt")
input_ids
#print(input_ids)
#roberta(input_ids)


{'input_ids': tensor([[    0,   133,  2119,  6219, 23602, 13855,    81,     5, 22414,  2335,
             2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [8]:
gen_config = GenerationConfig(min_new_tokens = 15, max_new_tokens = 25)
gen_config

GenerationConfig {
  "max_new_tokens": 25,
  "min_new_tokens": 15
}

In [9]:
model = roberta
text = "The three colors are"
model_inputs = tokenizer([text], return_tensors="pt")#,padding='max_length',truncation=True)#.to("cuda")
model_inputs[0]


Encoding(num_tokens=6, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing])

In [10]:
generated_ids = model.generate(**model_inputs, generation_config=gen_config)#, do_sample = True)
tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

'The three colors are'

In [139]:
generated_ids

tensor([[   0,  133,  130, 8089,   32,    2,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0]])

In [12]:
from transformers import RobertaForCausalLM


model = RobertaForCausalLM.from_pretrained("roberta-base", is_decoder=True)
tokenizer = AutoTokenizer.from_pretrained("roberta-base", device_map="auto")
prompt = "Tell me about the french revolution."

inputs = tokenizer(prompt, return_tensors="pt")
generate_ids = model.generate(inputs.input_ids.to(model.device), max_new_tokens=128, do_sample=True)
print(tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0])

Tell me about the french revolution...


In [140]:
from transformers import OPTForCausalLM, AutoTokenizer

model = OPTForCausalLM.from_pretrained("facebook/opt-350m")
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m", device_map="auto")
prompt = "Tell me about the french revolution."

inputs = tokenizer(prompt, return_tensors="pt")
generate_ids = model.generate(inputs.input_ids.to(model.device), max_new_tokens=128, do_sample=True)
print(tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0])

config.json:   0%|          | 0.00/644 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/663M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/441 [00:00<?, ?B/s]

Tell me about the french revolution.
Tell me about French socialism.
I mean, what is it you like about french politics?


In [14]:
# instantiate sentence fusion model
from transformers import EncoderDecoderModel, AutoTokenizer
sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")

input_ids = tokenizer(
    "This is the first sentence. This is the second sentence.", add_special_tokens=False, return_tensors="pt"
).input_ids

outputs = sentence_fuser.generate(input_ids)

print(tokenizer.decode(outputs[0]))

<s>This is the first sentence. This is the second sentence.</s>


In [23]:
from transformers import EncoderDecoderModel, BertTokenizer

tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
#model = EncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-cased", "bert-base-cased")
model = RobertaForCausalLM.from_pretrained("roberta-base", is_decoder=True)

In [17]:
from adapters import BnConfig, AutoAdapterModel, PromptTuningConfig
import adapters
from peft import PeftConfig


adapters.init(model)



In [18]:
config = PromptTuningConfig(prompt_length=10)
model.add_adapter("dummy", config=config)

In [19]:
for n,p in model.named_parameters():
    print(n)
    

roberta.embeddings.word_embeddings.weight
roberta.embeddings.position_embeddings.weight
roberta.embeddings.token_type_embeddings.weight
roberta.embeddings.LayerNorm.weight
roberta.embeddings.LayerNorm.bias
roberta.encoder.layer.0.attention.self.query.weight
roberta.encoder.layer.0.attention.self.query.bias
roberta.encoder.layer.0.attention.self.key.weight
roberta.encoder.layer.0.attention.self.key.bias
roberta.encoder.layer.0.attention.self.value.weight
roberta.encoder.layer.0.attention.self.value.bias
roberta.encoder.layer.0.attention.output.dense.weight
roberta.encoder.layer.0.attention.output.dense.bias
roberta.encoder.layer.0.attention.output.LayerNorm.weight
roberta.encoder.layer.0.attention.output.LayerNorm.bias
roberta.encoder.layer.0.intermediate.dense.weight
roberta.encoder.layer.0.intermediate.dense.bias
roberta.encoder.layer.0.output.dense.weight
roberta.encoder.layer.0.output.dense.bias
roberta.encoder.layer.0.output.LayerNorm.weight
roberta.encoder.layer.0.output.LayerNorm

In [22]:
from transformers import TrainingArguments, Seq2SeqTrainingArguments, Seq2SeqTrainer

train_args = Seq2SeqTrainingArguments(output_dir="tbag")
trainer = Seq2SeqTrainer(model=model, args=train_args)
train_args



In [None]:
from dataset.scan_dataset import ScanDataset
def _mk_train_dataset(e_type):
    paths = e_type.get_data_paths()
    max_len = self.config.emb_dim
    device = self.device
    train_dataset = ScanDataset(
        dataset_path= paths["train"],
        in_seq_len=max_len,
        out_seq_len=max_len + 20,
        device=device,
    )
    return train_dataset

def _mk_test_dataset():
    # call alwyas after mk_train_dataset
    paths = self.e_type.get_data_paths()
    max_len = self.config.emb_dim
    device = self.device
    test_dataset = ScanDataset(
        dataset_path= paths["test"],
        vocab=self.train_dataset.vocab,
        in_seq_len=max_len,
        out_seq_len=max_len + 20,
        device=device,
    ) 
    return test_dataset

#   def _mk_dataloaders(self): 

#         train_loader = DataLoader(self.train_dataset,
#                                 batch_size=self.config.batch_size,
#                                 shuffle=True
#                                 )
#         test_loader = DataLoader(self.test_dataset,
#                                 batch_size=self.config.batch_size_eval,
#                                 )
#         return train_loader, test_loader

In [175]:
from transformers import BartTokenizer, BartForCausalLM, BartModel
# https://github.com/huggingface/transformers/blob/v4.48.0/src/transformers/models/bart/modeling_bart.py#L948
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
model = BartModel.from_pretrained('facebook/bart-base')

inputs = tokenizer(["Three colors: blue,","turn left after jump twice"], return_tensors="pt", padding="max_length", truncation=True, max_length=30,padding_side='right')
lbls = tokenizer(["Three colors: blue, red, green","I_JUMP I_JUMP I_TURN_LEFT"], return_tensors="pt", padding="max_length", truncation=True, max_length=30,padding_side='right')
#outputs = model.generate(**inputs,max_new_tokens=20)
outputs = model(**inputs)
inputs.keys()


dict_keys(['input_ids', 'attention_mask'])

In [214]:
import torch
upscale = torch.nn.Linear(768, tokenizer.vocab_size)

In [176]:
enc_out = model.encoder(inputs['input_ids'], attention_mask=inputs['attention_mask'])
enc_out["last_hidden_state"].shape

torch.Size([2, 30, 768])

In [177]:
dec_out = model.decoder(inputs['input_ids'], attention_mask=inputs['attention_mask'], encoder_hidden_states=enc_out["last_hidden_state"])
dec_out["last_hidden_state"].shape

torch.Size([2, 30, 768])

In [217]:
outt = upscale(dec_out["last_hidden_state"])
outt.shape

torch.Size([2, 30, 50265])

In [216]:
preds = outt.argmax(-1)
preds.shape

torch.Size([2, 30])

In [218]:
# decode with tokenizer
tokenizer.batch_decode(preds)

[' baffled reservations]} redirect contextsAid Rece cafes ABVajoajoajoajoajohalhalhal cafes cafes handsomehalhalhalhal cafes cafes cafes cafes Spell cafes',
 ' baffledUnlike Chineseessim /* Dex Rece ridge Simulator containingayson GeForceayson containing containing containingayson ridge ridgeayson containing containing Malt Malt McF jur jur jurampsdoor']

In [219]:
from torch.nn import CrossEntropyLoss
criterion = CrossEntropyLoss()#(ignore_index=self.train_dataset.vocab.pad_idx)

In [220]:
lbls['input_ids'].shape

torch.Size([2, 30])

In [222]:
loss = criterion(outt.permute(0,2,1), lbls["input_ids"])
loss

tensor(10.6045, grad_fn=<NllLoss2DBackward0>)