In [1]:
import os
from pathlib import Path

import torch
from transformers import OpenAIGPTLMHeadModel, OpenAIGPTTokenizer, pipeline, set_seed

In [2]:
model = "openai-gpt"
cache_dir = Path("./models")

In [3]:
tokenizer = OpenAIGPTTokenizer.from_pretrained(
    pretrained_model_name_or_path=model, cache_dir=cache_dir
)
model = OpenAIGPTLMHeadModel.from_pretrained(
    pretrained_model_name_or_path=model, cache_dir=cache_dir
)

ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.


In [6]:
print(f"Number of model parameters: {model.num_parameters():_}")

Number of model parameters: 116_534_784


In [25]:
inputs = tokenizer("black shoes for", return_tensors="pt")
outputs = model(**inputs)

In [31]:
last_hidden_states = outputs.last_hidden_state
last_hidden_states.shape

torch.Size([1, 3, 768])

In [46]:
model_local_path = Path(
    "./models/models--openai-gpt/snapshots/1e0d4f3028acbffb47fe933cea64619c5ec1a002"
)

In [56]:
generator = pipeline(
    task="text-generation", tokenizer=str(model_local_path), model=str(model_local_path)
)

In [62]:
%%timeit
generator("black running", max_length=5, num_return_sequences=5, truncation=True)

90 ms ± 2.38 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [88]:
generator("<cars> a red ", max_length=10, num_return_sequences=5, truncation=True)

[{'generated_text': '<cars> a red  porsche 306, and'},
 {'generated_text': '<cars> a red  toyota was parked in her'},
 {'generated_text': '<cars> a red  pontiac. the last'},
 {'generated_text': '<cars> a red  honda accord pulled up,'},
 {'generated_text': '<cars> a red  mazda and a white'}]

In [21]:
"idemo idemo"[2:]

'emo idemo'

In [24]:
inputs

NameError: name 'inputs' is not defined

In [27]:
outputs

CausalLMOutput(loss=None, logits=tensor([[[ -7.4770,  -4.0750, -13.4248,  ..., -11.0718,  -7.2856,  -2.0470],
         [ -9.4497,  -5.7451, -18.7257,  ..., -12.9325, -14.6207,  -1.6253],
         [ -8.8216,  -6.6085, -16.2628,  ..., -10.8845,  -6.8129,  -2.1984]]],
       grad_fn=<UnsafeViewBackward0>), hidden_states=None, attentions=None)

## Teller without pipeline class

In [32]:
import torch
from transformers import OpenAIGPTLMHeadModel, OpenAIGPTTokenizer


class GPTeller:
    def __init__(self, model_id="openai-gpt", cache_dir="./cache_dir", device=None):
        self.tokenizer = OpenAIGPTTokenizer.from_pretrained(
            model_id, cache_dir=cache_dir
        )
        self.model = OpenAIGPTLMHeadModel.from_pretrained(model_id, cache_dir=cache_dir)
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() and device is None else "cpu"
        )
        self.model.to(self.device)

    def generate_text(self, prompt, max_length=50, temperature=1.0, top_k=0, top_p=0.9):
        encoded_input = self.tokenizer.encode(prompt, return_tensors="pt")
        encoded_input = encoded_input.to(self.device)

        # Generate outputs
        outputs = self.model.generate(
            encoded_input,
            max_length=max_length + len(encoded_input[0]),
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            pad_token_id=self.tokenizer.eos_token_id,
            no_repeat_ngram_size=2,
        )

        generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return generated_text


# Example usage:
gpt_teller = GPTeller(cache_dir="../cache_dir")
prompt = "[car dealership] A black"
generated_text = gpt_teller.generate_text(
    prompt, max_length=20, temperature=0.8, top_k=50, top_p=0.95
)
print(generated_text)

ftfy or spacy is not installed using BERT BasicTokenizer instead of SpaCy & ftfy.


[ car dealership ] a black mercedes benz. 
 " i'm not sure, " he said. " it's a little


In [167]:
# prompt = "[car dealership] A black"
prompt = "[web shop] sugar free"

encoded_input = tokenizer.encode(prompt, return_tensors="pt")
print(encoded_input[0])
len(encoded_input[0])

tensor([ 293, 8148, 3080,  294, 6444, 1964])


6

In [168]:
outputs = model.generate(
    inputs=encoded_input,
    max_length=len(encoded_input[0]) + 3,
    num_return_sequences=3,
    num_beams=5,
    num_beam_groups=5,
    no_repeat_ngram_size=2,
    early_stopping=True,
    diversity_penalty=1.0,
)

In [169]:
outputs

tensor([[  293,  8148,  3080,   294,  6444,  1964, 40477, 11731, 11189],
        [  293,  8148,  3080,   294,  6444,  1964,   267, 40477,   481],
        [  293,  8148,  3080,   294,  6444,  1964,   239, 40477,   244]])

In [170]:
[tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

['[ web shop ] sugar free \n smashwords edition',
 '[ web shop ] sugar free! \n the',
 '[ web shop ] sugar free. \n "']