# T5 Tuned - Inference

## Install Dependencies

In [None]:
GDRIVE_BASE = 'drive/MyDrive/MIDS/w266/project/'

!pip install transformers
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.27.4-py3-none-any.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m55.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m97.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 KB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.4 tokenizers-0.13.3 transformers-4.27.4
Looking in indexes: https://pypi.org/simple, https://us

## Connect to Google Drive
We will be loading data from google drive and also save trained models to google drive. So lets mount google drive.

In [None]:
import sys
from google.colab import drive
drive.mount('/content/drive')
sys.path.insert(0, GDRIVE_BASE)

Mounted at /content/drive


## Imports and Constants

In [24]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, OPTForCausalLM
import torch
import transformers
from collections import deque
import common

print(f'common.__version__: {common.__version__}')

transformers.logging.set_verbosity_error()
tuning_configs = common.create_configs(GDRIVE_BASE, None, None, None, None)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
common.__version__: 1.4


# Story Bot

In [None]:
class StoryBot:
  """Class to mimic a bot that continues the story."""
  def __init__(self, inferencer, n_iters=20, lines_to_use=1):
    """
      Creates the interactive story bot.
      inferencer - class to use for generating lines of the story.
      n_iters - number of iterations to do for story generation.
      lines_to_use - The number of lines from the story to use as context for 
                     generating the next line.
    """
    self._n_iters = n_iters
    self.lines_to_use = lines_to_use
    self.inferencer = inferencer
    self.re_init()

  def re_init(self):
    self.story = []
    # Initialize queue to hold just the last "lines_to_use" lines of story.
    self.context_lines = deque([], self.lines_to_use)

  def display_line_choices(self, output_lines):
    print('Choose the line of your choice:')
    for i, line in enumerate(output_lines):
      print(f'{i}:', line)
    print(f'{i+1}: Regenerate')
    print(f'{i+2}: End')

  def get_user_choice(self):
    output_lines = self.inferencer(self.context_lines)
    if len(output_lines) > 1:
      self.display_line_choices(output_lines)
      user_opt = -1
      while user_opt == -1:
        try:
            user_input = input('Input the number of your choice (or ): ')
            user_opt = int(user_input)
            if user_opt < len(output_lines):
              return output_lines[user_opt]
            elif user_opt == len(output_lines):
              return 'regenerate'
            elif user_opt == len(output_lines) + 1:
              return 'end'
        except ValueError:
            user_opt = -1
    else:
      return output_lines[0]

  def print_story(self):
    for i, line in enumerate(self.story):
      if i%2 == 0:
        print(f'User: {line}') 
      else:
        print(f'Generated: {line}') 

  def __call__(self):
    print('*'*50)
    print('Welcome to StoryBot!\n')
    print('This program simulates an MMS kind of interaction with a bot to create a story sequentially.')
    print('When the prompt appears below, start typing as if it were the input on your mobile.')
    print('Enter end to end the story and restart to restart.') 
    print('*'*50, '\n')
    restart = False
    i = 0
    while i < self._n_iters:
      if i > 0:
        print('The story so far:')
        self.print_story()
      i = i + 1
      # get the sentence from the user
      sentence_in = input('Enter next line (or end): ').strip()
      # accomodate special prompts
      if sentence_in == 'end':
        break
      if sentence_in == 'restart':
        i = 0
        self.re_init()
        continue
      self.context_lines.append(sentence_in)
      self.story.append(sentence_in)
      output = 'regenerate'
      while output == 'regenerate':
        output = self.get_user_choice()
      if output == 'end':
        break
      self.context_lines.append(output)
      self.story.append(output)

    print()
    print('\n======== Final story: =========\n')
    self.print_story()



In [None]:
def run_story_bot(config, device):
  tokenizer = AutoTokenizer.from_pretrained(config.model_name)
  if config.model_family == 't5':
    model = T5ForConditionalGeneration.from_pretrained(config.tuned_model_path).to(device)
    inferencer = common.T5Inferencer(device, model, tokenizer, prompt=config.prompt)
  else:
    model = OPTForCausalLM.from_pretrained(config.model_name).to(device)
    inferencer = common.OptInferencer(device, model, tokenizer)
  story_bot = StoryBot(inferencer, n_iters=1, lines_to_use=1)
  story_bot()


In [None]:
# Run story bot on t5 s1
run_story_bot(tuning_configs['t5_s1'], device)

**************************************************
Welcome to StoryBot!

This program simulates an MMS kind of interaction with a bot to create a story sequentially.
When the prompt appears below, start typing as if it were the input on your mobile.
Enter end to end the story and restart to restart.
************************************************** 

Enter next line (or end): The little girl felt very lonely and scared.
Choose the line of your choice:
0: "It's a good thing," said she.
1: "It's a very pleasant place," said the little girl.
2: "It's all right," she said.
3: Regenerate
4: End
Input the number of your choice (or ): 2



User: The little girl felt very lonely and scared.
Generated: "It's all right," she said.


In [None]:
# Run story bot on opt s2
run_story_bot(tuning_configs['opt_s2'], device)

**************************************************
Welcome to StoryBot!

This program simulates an MMS kind of interaction with a bot to create a story sequentially.
When the prompt appears below, start typing as if it were the input on your mobile.
Enter end to end the story and restart to restart.
************************************************** 

Enter next line (or end): The little girl felt very lonely and scared.
Choose the line of your choice:
0: The little girl felt very lonely and scared. She was crying and crying, and her parents were crying with her, but she didn't want to talk to them because she couldn't say anything, so she just sat there and cried for a
1: The little girl felt very lonely and scared. She wanted to go to school, but she didn't know where to find a teacher to help her. The teacher told her that she had to get a diploma to be accepted into a school.
2: The little girl felt very lonely and scared.  She was called to the hospital to be checked out, but she 

# Batch Inferencing

In [84]:
def generate_next_line(config, lines, device, num_sequences=5):
  tokenizer = AutoTokenizer.from_pretrained(config.model_name)
  for i, test_input_text in enumerate(lines):
      test_inputs = tokenizer([config.prompt + test_input_text], return_tensors='pt')
      input_ids = test_inputs['input_ids'].to(device)
      if config.model_family == 't5':
        model = T5ForConditionalGeneration.from_pretrained(config.tuned_model_path).to(device)
        outputs = model.generate(
            input_ids,
            num_beams=num_sequences,
            no_repeat_ngram_size=3,
            num_return_sequences=num_sequences,
            max_new_tokens=100,
            do_sample=True,
            top_k=0,
            return_dict_in_generate=True,
            output_scores=True,
            renormalize_logits=True,
            # normalize_logits=True,
          )
      else:
        model = OPTForCausalLM.from_pretrained(config.model_name).to(device)  
        outputs = model.generate(
          input_ids,
          num_beams=num_sequences,
          no_repeat_ngram_size=2,
          num_return_sequences=num_sequences,
          max_length = 50,
          do_sample=True,
          top_k=0,
          early_stopping=True,
          return_dict_in_generate=True,
          output_scores=True,
          renormalize_logits=True
        )

      scores = -1 * outputs.sequences_scores.cpu()
      test_output_ids = outputs.sequences
      decoded = [tokenizer.decode(out_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False).replace('\n', ' ') for out_ids in test_output_ids]
      print(f'Input: {test_input_text}')
      # decoded = '\n\t'.join(decoded)
      # print(f'Output: {decoded}')
      for (score, output) in zip(scores, decoded):
        print(f'\t{score}: {output}')

def generate_for(config, device, first_lines):
  print('*' *50)
  print(f'Evaluating {config.model_name} tuned on {config.dataset} dataset')
  generate_next_line(config, first_lines, device)



In [85]:
first_lines = ['Lara felt very sad and scared.', 'All the dragons of the world lived on one mountain called the dragon mountain.']

generate_for(tuning_configs['t5_s1'], device, first_lines)
generate_for(tuning_configs['opt_s2'], device, first_lines)


**************************************************
Evaluating google/t5-v1_1-base tuned on s1 dataset
Input: Lara felt very sad and scared.
	3.085955904680304e-05: “I don't know,” said her mother.
	3.301061951788142e-05: "I'll be able to change a few things," he said.
	0.00010224935977021232: "My dear, I'm afraid that you are too fond of me, and you must be afraid.
	3.072101026191376e-05: "It's time for a change," she said, looking up.
	2.518398832762614e-05: Then, seated herself upon the bed, and began to scream, “Then I'll go and get hold of my sword.”
Input: All the dragons of the world lived on one mountain called the dragon mountain.
	8.454718044959009e-05: The dragon was snatched up by a peasant who was run off as swiftly as a wolf.
	1.8762297258945182e-05: But the dragon was so strong that the dragons were terribly frightened and they called to the King and said, “Now you go down here, and I will tell you what I have done.”
	8.076785888988525e-05: When they reached the cave, the