In [1]:
import pandas as pd
import torch
import random
import numpy as np

from sklearn.model_selection import train_test_split

from torch.utils.data import DataLoader
from torch.optim import Adam

from transformers import BartTokenizerFast, DataCollatorWithPadding
from transformers import BartModel, BartForConditionalGeneration, Trainer, TrainingArguments, EvalPrediction
from datasets import Dataset, DatasetDict
from datasets import load_from_disk

# from transformers import AdamW
from transformers import get_scheduler

from tqdm.auto import tqdm

import argparse

CHECKPOINT = 'facebook/bart-base'

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
def load_model():
    model = BartForConditionalGeneration.from_pretrained(CHECKPOINT)
    model.load_state_dict(torch.load("../models/bart-paradetox"))
    return model

In [5]:
tokenizer = BartTokenizerFast.from_pretrained(CHECKPOINT)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

model = load_model()
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)


    

BartForConditionalGeneration(
  (model): BartModel(
    (shared): Embedding(50265, 768, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): Embedding(50265, 768, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 768)
      (layers): ModuleList(
        (0-5): 6 x BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (final_layer_norm): LayerNorm((768,), eps=

In [14]:
sequence_to_predict = "Hello, bloody world"
tokenized = tokenizer(sequence_to_predict, padding=True, truncation=True, return_tensors="pt")
train_data = Dataset.from_dict(tokenized)
dataloader = DataLoader(
    train_data, shuffle=False, batch_size=1, collate_fn=data_collator
)
for batch in dataloader:
    batch = {k: v.to(device) for k, v in batch.items()}
    outputs = model.generate(**batch)
    text = tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
    print(sequence_to_predict)
    print(text)

Hello, bloody world
Hello, world.
