In [1]:
! pip install transformers

[0m

In [2]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer
from transformers import BartTokenizer, BartForConditionalGeneration
from transformers import BlenderbotTokenizer, BlenderbotModel
from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration

import torch
import numpy as np
from torch.utils.data import Dataset
import csv
import pandas as pd

In [3]:
torch.cuda.empty_cache()
import gc
gc.collect()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
data=[]
with open('/notebooks/csvfile112.csv', mode='r') as csv_file:
    csv_reader = csv.reader(csv_file)
    for row in csv_reader:
        data.append(row)

In [5]:
# model_checkpoint = "t5-small"
# model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
# tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model_checkpoint = "facebook/blenderbot-400M-distill"
model = BlenderbotForConditionalGeneration.from_pretrained(model_checkpoint)
tokenizer = BlenderbotTokenizer.from_pretrained(model_checkpoint)

Downloading:   0%|          | 0.00/1.54k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/696M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/124k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/61.4k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.13k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/16.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/772 [00:00<?, ?B/s]

In [6]:
class AdditionDataset():

    def __init__(self, split):
        self.split = split # train/test
        num_test= 2926
        self.ixes = data[:num_test] if split == 'train' else data[num_test:]

    def __len__(self):
        return len(self.ixes)

    def __getitem__(self, idx):
        # given a problem index idx, first recover the associated a + b
        idx = self.ixes[idx]
        return {"input_ids":tokenizer(idx[0])["input_ids"], "labels":tokenizer(idx[1])["input_ids"]}

In [7]:
d=AdditionDataset("train")
dt=AdditionDataset("test")

In [8]:
d.__getitem__(0)

{'input_ids': [6950,
  395,
  1356,
  315,
  3501,
  298,
  281,
  632,
  358,
  5019,
  4472,
  1254,
  21,
  2],
 'labels': [1445, 6950, 3501, 21, 2]}

In [9]:
batch_size = 128
model_name = model_checkpoint.split("/")[-1]
args = Seq2SeqTrainingArguments(
    f"{model_name}-finetuned-xsum",
    evaluation_strategy = "epoch",
    learning_rate=2e-3,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=150,
    predict_with_generate=True,
    push_to_hub=False,
)

data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

In [10]:
trainer = Seq2SeqTrainer(
    model,
    args,
    train_dataset=AdditionDataset("train"),
    eval_dataset=AdditionDataset("test"),
    data_collator=data_collator,
    tokenizer=tokenizer,
)

trainer.train()

***** Running training *****
  Num examples = 2926
  Num Epochs = 150
  Instantaneous batch size per device = 128
  Total train batch size (w. parallel, distributed & accumulation) = 128
  Gradient Accumulation steps = 1
  Total optimization steps = 3450


Epoch,Training Loss,Validation Loss
1,No log,5.157915
2,No log,4.431519
3,No log,3.957599
4,No log,3.875774
5,No log,3.752138
6,No log,3.742131
7,No log,3.89807
8,No log,3.881974
9,No log,3.959706
10,No log,4.135173


***** Running Evaluation *****
  Num examples = 1254
  Batch size = 128
***** Running Evaluation *****
  Num examples = 1254
  Batch size = 128
***** Running Evaluation *****
  Num examples = 1254
  Batch size = 128
***** Running Evaluation *****
  Num examples = 1254
  Batch size = 128
***** Running Evaluation *****
  Num examples = 1254
  Batch size = 128
***** Running Evaluation *****
  Num examples = 1254
  Batch size = 128
***** Running Evaluation *****
  Num examples = 1254
  Batch size = 128
***** Running Evaluation *****
  Num examples = 1254
  Batch size = 128
***** Running Evaluation *****
  Num examples = 1254
  Batch size = 128
***** Running Evaluation *****
  Num examples = 1254
  Batch size = 128
***** Running Evaluation *****
  Num examples = 1254
  Batch size = 128
***** Running Evaluation *****
  Num examples = 1254
  Batch size = 128
***** Running Evaluation *****
  Num examples = 1254
  Batch size = 128
***** Running Evaluation *****
  Num examples = 1254
  Batch siz

TrainOutput(global_step=3450, training_loss=0.634521701094033, metrics={'train_runtime': 3674.1579, 'train_samples_per_second': 119.456, 'train_steps_per_second': 0.939, 'total_flos': 1.979079701372928e+16, 'train_loss': 0.634521701094033, 'epoch': 150.0})

In [11]:
# test = AdditionDataset("test")
# count = 0
# for index in range(len(test)):
#   device = 'cuda'
#   preds = model.generate(input_ids = torch.tensor(test[index]["input_ids"]).to(device).view(1,-1))
#   print(tokenizer.decode(np.array(preds.cpu()[0]))," ::: ",tokenizer.decode(np.array(test[index]["labels"])))

In [12]:
type(model)

transformers.models.blenderbot.modeling_blenderbot.BlenderbotForConditionalGeneration

In [14]:
UTTERANCE="What number can I call you back at?"
inputs = tokenizer([UTTERANCE], return_tensors="pt").to(device)
reply_ids = model.generate(**inputs)
print("Bot: ", tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0])

Bot:   Please do so as soon as possible so this can be resolved. you can beolved.


In [24]:
File="/notebooks/test/mymodel.pth"
torch.save(model.state_dict(),File)

In [18]:
# ! zip -r mathformer blenderbot-400M-distill-finetuned-xsum

In [25]:
load_model = BlenderbotForConditionalGeneration.from_pretrained(model_checkpoint)
load_model.load_state_dict(torch.load(File))
load_model.eval()

loading configuration file https://huggingface.co/facebook/blenderbot-400M-distill/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/cec3df71c8d94a67ad9280220975d00732c9dbcf7adaa3afe440d6626e5fdf02.6d6f9df9c9b98c6aa3a827af7ec4aae285715ccb3673cadfe98cacb7235b5809
Model config BlenderbotConfig {
  "_name_or_path": "./",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": true,
  "architectures": [
    "BlenderbotForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1280,
  "decoder_attention_heads": 32,
  "decoder_ffn_dim": 5120,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": true,
  "dropout": 0.1,
  "encoder_attention_heads": 32,
  "encoder_ffn_dim": 5120,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 2,
  "encoder_no_repeat_ngram_size

BlenderbotForConditionalGeneration(
  (model): BlenderbotModel(
    (shared): Embedding(8008, 1280, padding_idx=0)
    (encoder): BlenderbotEncoder(
      (embed_tokens): Embedding(8008, 1280, padding_idx=0)
      (embed_positions): BlenderbotLearnedPositionalEmbedding(128, 1280)
      (layers): ModuleList(
        (0): BlenderbotEncoderLayer(
          (self_attn): BlenderbotAttention(
            (k_proj): Linear(in_features=1280, out_features=1280, bias=True)
            (v_proj): Linear(in_features=1280, out_features=1280, bias=True)
            (q_proj): Linear(in_features=1280, out_features=1280, bias=True)
            (out_proj): Linear(in_features=1280, out_features=1280, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1280, out_features=5120, bias=True)
          (fc2): Linear(in_features=5120, out_features=1280, bias=True)
          (

In [30]:
UTTERANCE="But, Are you a scammer?"
inputs = tokenizer([UTTERANCE], return_tensors="pt")
reply_ids = load_model.generate(**inputs)
print("Bot: ", tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0])

Bot:   But I assure you I am Amazon Customer Service and I could help with your billing address.
