In [1]:
%config Completer.use_jedi = False

In [2]:
import sys
modules_path = '../modules/'
sys.path.insert(0, modules_path)

In [3]:
from transformers import AutoTokenizer, pipeline
from torch.utils.data import Dataset
from tqdm import tqdm
import json

In [4]:
def postprocess_output(text, end_punct='"', start_punct=None):         
    try: 
        end = text.index(end_punct)
    except ValueError: 
        end = len(text)
    text = text[:end].strip()
    # return text    
    if start_punct is not None: 
        start = text.find(start_punct)
        while start >= 0: 
            text = text[start+1:].strip()
            start = text.find(start_punct)

    try: 
        end = text.index('.')
    except ValueError: 
        end = len(text)

    try: 
        end = min(end, text.index('!'))
    except ValueError: 
        end = end

    try: 
        end = min(end, text.index('?'))
    except ValueError: 
        end = end

    return text[:end+1].strip().lower()

In [5]:
model_name = 'gpt2-xl'
tokenizer = AutoTokenizer.from_pretrained(model_name, pad_token='<|endoftext|>')
generator = pipeline("text-generation",
                          model=model_name,
                          tokenizer=tokenizer,
                          device=0)

In [6]:
tst_template = '{prompt} "{sentence_1}" "'

In [7]:
prompt = 'Fixed (− contrasts contrasts contrasts'
sentence_1 = "it's small yet they make you feel right at home."
formatted_template = tst_template.format(prompt=prompt, sentence_1=sentence_1)

In [10]:
# generator_outputs = generator(formatted_template,
#                                     pad_token_id=50256,
#                                     top_k=10,
#                                     top_p=1.0,
#                                     num_return_sequences=32,
#                                     temperature=1,
#                                     # Only return generated text, without the prompt
#                                     return_full_text=False)

In [44]:
%%timeit
generator_outputs = generator(formatted_template,
                                pad_token_id=50256,
                                # top_k=10,
                                # top_p=1.0,
                               do_sample=True,
                                num_beams=10,
                               
                                num_return_sequences=10,
                                # temperature=1,
                               output_scores=True,
                                # Only return generated text, without the prompt
                                return_full_text=False)

2.22 s ± 4.55 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [9]:
%%timeit
generator_outputs = generator(formatted_template,
                                pad_token_id=50256,
                                # top_k=10,
                                # top_p=1.0,
                               do_sample=True,
                                num_beams=10,
                                num_return_sequences=3,
                               output_scores=True,
                                # Only return generated text, without the prompt
                                return_full_text=False)



1.26 s ± 614 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [45]:
%%timeit
generator_outputs = generator(formatted_template,
                                pad_token_id=50256,
                                # top_k=10,
                                # top_p=1.0,
                               do_sample=True,
                                num_beams=10,
                               
                                num_return_sequences=1,
                                # temperature=1,
                               output_scores=True,
                                # Only return generated text, without the prompt
                                return_full_text=False)

1.11 s ± 1.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
%%timeit
generator_outputs = generator(formatted_template,
                                pad_token_id=50256,
                                top_k=10,
                                # top_p=1.0,
                               do_sample=True,
                                # num_beams=10,
                               
                                num_return_sequences=32,
                                # temperature=1,
                               output_scores=True,
                                # Only return generated text, without the prompt
                                return_full_text=False)

1.04 s ± 1.22 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [42]:
[postprocess_output(o['generated_text']) for o in generator_outputs]

["it's big yet they make you feel out of place."]

In [14]:
[postprocess_output(o['generated_text']) for o in generator_outputs]

["it's big yet they make you feel right at home.",
 "it's large yet they make you feel right at home.",
 "it's big yet they make you feel right at home.",
 "it's big yet they make you feel out of place.",
 "it's big yet they make you feel at home.",
 "it's big yet they make you feel out of place.",
 "it's big yet they make you feel right at home.",
 "it's small yet they make you feel right at home.",
 "it's big yet they make you feel right at home.",
 "it's small yet they make you feel right at home."]

In [16]:
[postprocess_output(o['generated_text']) for o in generator_outputs]

["it's big yet they make you feel out of place.",
 "it's big yet they make you feel right at home.",
 "it's big yet they make you feel right at home.",
 "it's big yet they make you feel small.",
 "it's big yet they make you feel out of place.",
 "it's big yet they make you feel right at home.",
 "it's big yet they make you feel out of place.",
 "it's big yet they make you feel out of place.",
 "it's big yet they make you feel small.",
 "it's big yet they make you feel right at home."]

In [18]:
[postprocess_output(o['generated_text']) for o in generator_outputs]

["it's big yet they make you feel out of place.",
 "it's small yet they make you feel right at home.",
 "it's small yet they make you feel right at home.",
 "it's small yet they make you feel right at home.",
 "it's small yet they make you feel out of place.",
 "it's small yet they make you feel right at home.",
 "it's big yet they make you feel right at home.",
 "it's big yet they make you feel right at home.",
 "it's small yet they make you feel right at home.",
 "it's big yet they make you feel out of place."]

In [14]:
[postprocess_output(o['generated_text']) for o in generator_outputs]

['small',
 "it's large yet they make you feel out of place",
 "it's big but you feel like you're not quite in it.",
 "it's large yet they make you feel small.",
 "it's small yet they make you feel right at home.",
 'the people are nice and welcoming.',
 "i know it's small but i still like it.",
 "it's big but they make you feel small.",
 "it's big, they make you feel right at home.",
 'i feel like i can go anywhere.',
 "it's large but it's so much easier to navigate.",
 "it's large and intimidating, yet they make you feel right at home.",
 "you're a good friend, but it's small yet i feel right at home.",
 "it's small and it's not at home.",
 "it's big, but it's home.",
 "it's big yet they make you feel like you could live there forever.",
 "it's large but they make you feel small.",
 "you don't have to feel small at all.",
 "it's small yet they make you feel right at home.",
 "it's very small, and they make you feel like an intruder.",
 "it's large enough, but not so big that you need 