In [1]:
from transformers import pipeline
from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config

tokenizer = GPT2Tokenizer.from_pretrained('gpt2', bos_token='<|startoftext|>', eos_token='<|endoftext|>', pad_token='<|pad|>')
configuration = GPT2Config.from_pretrained('gpt2', output_hidden_states=False)
configuration.max_length = 1024

tokenizer.pad_token = tokenizer.eos_token

gen = pipeline('text-generation',model='./my_model', tokenizer=tokenizer,config=configuration)

result = gen('In')[0]['generated_text']
print(result)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


InThe episode begins with Timothy telling Wesley and Gagarin that the planet is in danger, but then the two are interrupted by the apparent destruction of a weapons platforms sent out to intercept the Federation fleet. Wesley and Gagarin rush to the platforms


In [2]:
import random
configuration.vocab_size = 50259
import re


alphabets= "([A-Za-z])"
prefixes = "(Mr|St|Mrs|Ms|Dr|Lt)[.]"
suffixes = "(Inc|Ltd|Jr|Sr|Co)"
starters = "(Mr|Mrs|Ms|Dr|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)"
websites = "[.](com|net|org|io|gov)"

def split_into_sentences(text):
    text = " " + text + "  "
    text = text.replace("\n"," ")
    text = re.sub(prefixes,"\\1<prd>",text)
    text = re.sub(websites,"<prd>\\1",text)
    if "Ph.D" in text: text = text.replace("Ph.D.","Ph<prd>D<prd>")
    text = re.sub("\s" + alphabets + "[.] "," \\1<prd> ",text)
    text = re.sub(acronyms+" "+starters,"\\1<stop> \\2",text)
    text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>\\3<prd>",text)
    text = re.sub(alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>",text)
    text = re.sub(" "+suffixes+"[.] "+starters," \\1<stop> \\2",text)
    text = re.sub(" "+suffixes+"[.]"," \\1<prd>",text)
    text = re.sub(" " + alphabets + "[.]"," \\1<prd>",text)
    if "”" in text: text = text.replace(".”","”.")
    if "\"" in text: text = text.replace(".\"","\".")
    if "!" in text: text = text.replace("!\"","\"!")
    if "?" in text: text = text.replace("?\"","\"?")
    text = text.replace(".",".<stop>")
    text = text.replace("?","?<stop>")
    text = text.replace("!","!<stop>")
    text = text.replace("<prd>",".")
    sentences = text.split("<stop>")
    sentences = sentences[:-1]
    sentences = [s.strip() for s in sentences]
    return sentences

def next_line(s):
    input_ids = tokenizer.encode(s, return_tensors='pt')
    model = GPT2LMHeadModel.from_pretrained("./my_model", config=configuration)
    model.resize_token_embeddings(len(tokenizer))
    sample_outputs = model.generate(
                                input_ids,
                                do_sample=True,   
                                top_k=50, 
                                max_length = 800,
                                top_p=0.95, 
                                num_return_sequences=1
                        )
    return [(i, tokenizer.decode(sample_output, skip_special_tokens=True)) for i, sample_output in enumerate(sample_outputs)]


In [3]:
def make_acts(seed_s, min_acts= 3, prior_acts=[]):
    o = next_line(seed_s)
    acts = [next_line(s)[0][1] for s in split_into_sentences(o[0][1])]
    if len(acts) < min_acts:
        acts = [make_acts(a, min_acts= 2) for a in acts]
    else:
        return acts
        
        


In [4]:
seed_s = "The Klingons have recently been transmitting messages of glee and whimsy"

acts = make_acts(seed_s)
print("\n----\n".join(acts))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


The Klingons have recently been transmitting messages of glee and whimsy to the Federation. Worf suggests that the Federation can intervene, by sending representatives from a number of non-Federation states. Riker advises that he cannot interfere with a Federation civil government, which he sees as dangerous. Riker then states that while the Federation might be good, it is not ideal for Starfleet and he believes that "any interference is welcome." Riker sends for advice from his ship's navigational systems, but they are unable to correct their warp engines. Although the Enterprise approaches the warbirds, and despite the advice that one star, the Enterprise approaches a star that is half obscured by the cloak, and the Enterprise is caught in the star's atmosphere, the ship has traveled over 285,000 kilometers before the warbirds have gone. This is the first time that the ship had traveled over 285,000 kilometers before the Enterprise. Riker is told that if he is able to correct his shi