In [1]:
import torch
import random

from transformers import AutoTokenizer, AutoModelWithLMHead

In [2]:
tokenizer = AutoTokenizer.from_pretrained("openai-gpt")

model = AutoModelWithLMHead.from_pretrained("openai-gpt")


In [3]:
def generate_text(input_text: str, tokens_to_generate: int):
    text_generated = torch.tensor([tokenizer.encode(input_text)], dtype=torch.long)
    with torch.no_grad():
        for _ in range(tokens_to_generate):
            predictions = model(text_generated)
            next_token_logits = predictions[0][:, -1, :]

            next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1)
            
            text_generated = torch.cat((text_generated, next_token_id), dim=1)
    result = tokenizer.decode(text_generated.squeeze().tolist())
    return result

In [4]:
text = "My favourite movie is"

In [5]:
text_encoded = torch.tensor([tokenizer.encode(text)], dtype=torch.long)

In [6]:
model.eval()
with torch.no_grad():
    predictions = model(text_encoded)

In [7]:
print(generate_text(text, 100))

my favourite movie is the one where the guy is in love with the girl. " 
 " i don't think that's a good idea. " 
 " why not? " 
 " because it's not a good idea. " 
 " why not? " 
 " because it's not a good idea. " 
 " why not? " 
 " because it's not a good idea. " 
 " why not? " 
 " because it's not a good idea. " 
 " why not? "


In [8]:
def gen_topK_text(input_text: str, tokens_to_generate: int, k: int):
    text_generated = torch.tensor([tokenizer.encode(input_text)], dtype=torch.long)
    with torch.no_grad():
        for _ in range(tokens_to_generate):
            predictions = model(text_generated)
            
            next_token_logits = predictions[0][:, -1, :]
            next_token_id = torch.LongTensor(random.choices(next_token_logits.topk(k, dim=-1).indices.tolist()[0], torch.nn.functional.softmax(next_token_logits).topk(k, dim=-1).values.tolist()[0])).unsqueeze(-1)            
            text_generated = torch.cat((text_generated, next_token_id), dim=1)
    result = tokenizer.decode(text_generated.squeeze().tolist())
    return result

In [9]:
print(gen_topK_text(text, 100, 10))

  


my favourite movie is about a guy who goes into a house with two girls at the end of the hall. the girls aren't as cute as they used to be, and i don't want to be the first person they see when they come out. " she laughs. " you 'd better get your butt over to that house. " 
 " i can't believe you just said that, " i mumble, trying not to laugh. 
 " come on, you 'll be glad you did, " she says,


In [10]:
print(gen_topK_text(text, 100, 3))


  


my favourite movie is the one with the two of us. " he smiled at her. " i think you're going to like it, too. " 
 " what do you mean? " 
 " it's the one with the two of us. i'm going to make love to you in the middle of the night and we're going to be late for our appointment with dr. demarco. " 
 " you're going to make love to me? " she repeated, her heart racing. " you're going to


In [11]:
def gen_topK_text_penalized(input_text: str, tokens_to_generate: int, k: int, penalty: float):
    text_generated = torch.tensor([tokenizer.encode(input_text)], dtype=torch.long)
    with torch.no_grad():
        for _ in range(tokens_to_generate):
            predictions = model(text_generated)
            
            next_token_logits = predictions[0][:, -1, :]
            
            #applying penalty (>1 to cut repetition) to logits that are already in text_generated
            #we add (penalty-1)% to logits < 0 and substract (penalty-1)% if logits > 0

            if penalty != 1:
              for i in range(len(next_token_logits)):
                if i in text_generated[0]:
                  if next_token_logits[0][i]>=0:
                    next_token_logits[0][i] *= (1-penalty)  
                  else:
                    next_token_logits[0][i] *= (1+penalty)
                            
                            

            next_token_id = torch.LongTensor(random.choices(next_token_logits.topk(k, dim=-1).indices.tolist()[0], torch.nn.functional.softmax(next_token_logits).topk(k, dim=-1).values.tolist()[0])).unsqueeze(-1)   
            
            text_generated = torch.cat((text_generated, next_token_id), dim=1)
    result = tokenizer.decode(text_generated.squeeze().tolist())
    return result


In [12]:
print(gen_topK_text_penalized(text, 200, 2, 10))




my favourite movie is the one where you get a kiss. " 
 " i'm sorry. i didn't know. " 
 " i'm not. i'm just saying, you're a good guy. " 
 " thanks. " i smiled at him, feeling a little bit better. 
 " so, you want to come over and watch the movie? " 
 " yeah, i guess so. " 
 " cool. " 
 i sat down next to him on the sofa, my legs stretched out in front of me, and he pulled me into him, wrapping his arm around my waist. " i've been meaning to ask you something. " 
 " what? " 
 " how did you get to be a vampire? " 
 " i don't know. " 
 " you're a vampire? " 
 " yes. " 
 " how? " 
 " i'm not sure. i just know i was born. " 
 i laughed. " you're kidding, right? "


In [13]:
def gen_topP_text_penalized(input_text: str, tokens_to_generate: int, p: float, penalty: float):
    text_generated = torch.tensor([tokenizer.encode(input_text)], dtype=torch.long)
    with torch.no_grad():
        for _ in range(tokens_to_generate):
            predictions = model(text_generated)
            
            next_token_logits = predictions[0][:, -1, :]
            
            #applying penalty (>1 to cut repetition) to logits that are already in text_generated
            #we add (penalty-1)% to logits < 0 and substract (penalty-1)% if logits > 0

            if penalty != 1:
              for i in range(len(next_token_logits)):
                if i in text_generated[0]:
                  if next_token_logits[0][i]>=0:
                    next_token_logits[0][i] *= (1-penalty)  
                  else:
                    next_token_logits[0][i] *= (1+penalty)
 
            
            cum_sum_prob = 0
            chosen_logits_indices = []
            sorted_softmax_values, indices = torch.sort(torch.nn.functional.softmax(next_token_logits), descending=True)
            
            for i in range(len(sorted_softmax_values[0])):
              if cum_sum_prob <= p:
                cum_sum_prob += sorted_softmax_values[0][i]
                chosen_logits_indices.append(indices[0][i].tolist())
              else:
                break
            
            next_token_id = torch.LongTensor(random.choices(chosen_logits_indices)).unsqueeze(-1)
            
            text_generated = torch.cat((text_generated, next_token_id), dim=1)
    result = tokenizer.decode(text_generated.squeeze().tolist())
    return result


In [14]:
print(gen_topP_text_penalized(text, 200, 0.7, 5))



my favourite movie is haunted, " miranda informed me as she gave me a sly grin. 
 i reached for the soda she 'd handed me. " yes, " i replied. 
 " it's not supposed to be on until six, " she replied. 
 " you should have been able to turn it on. i 'd hate to ruin it, " i told her. 
 miranda turned to look at me. " yeah, but i had no idea it would make me fall asleep, " she teased. 
 " sorry. " 
 miranda rolled her eyes and walked back over to the chair she 'd vacated. i stood there a moment and thought about calling rush and leaving. it would be rude not to call him. he 'd probably tell me to fuck off and leave him be. 
 i turned to leave when a loud banging at the door caught my attention. i headed over to see who it was and let rush's muffled voice drift out. i stood in the doorway and watched him for a moment before


In [15]:
def gen_topP_penalized_w_temp(input_text: str, tokens_to_generate: int, p: float, penalty: float, temperature: int =1):
    text_generated = torch.tensor([tokenizer.encode(input_text)], dtype=torch.long)
    with torch.no_grad():
        for _ in range(tokens_to_generate):
            predictions = model(text_generated)
            
            next_token_logits = predictions[0][:, -1, :]
            
            #applying penalty (>1 to cut repetition) to logits that are already in text_generated
            #we add (penalty-1)% to logits < 0 and substract (penalty-1)% if logits > 0

            if penalty != 1:
              for i in range(len(next_token_logits)):
                if i in text_generated[0]:
                  if next_token_logits[0][i]>=0:
                    next_token_logits[0][i] *= (1-penalty)  
                  else:
                    next_token_logits[0][i] *= (1+penalty)
 
            
            cum_sum_prob = 0
            chosen_logits_indices = []
            sorted_softmax_values, indices = torch.sort(torch.nn.functional.softmax(next_token_logits), descending=True)
            
            if temperature != 0:
                sorted_softmax_values = sorted_softmax_values/temperature
            
            for i in range(len(sorted_softmax_values[0])):
              if cum_sum_prob <= p:
                cum_sum_prob += sorted_softmax_values[0][i]
                chosen_logits_indices.append(indices[0][i].tolist())
              else:
                break
            
            next_token_id = torch.LongTensor(random.choices(chosen_logits_indices)).unsqueeze(-1)
            
            text_generated = torch.cat((text_generated, next_token_id), dim=1)
    result = tokenizer.decode(text_generated.squeeze().tolist())
    return result

In [16]:
print(gen_topP_penalized_w_temp(text, 200, 0.4, 5, 1.1))



my favourite movie is " some life - sucking evil bitch ". 
 it's a pity i don't have a gun. 
 * * * 
 i can't help but feel bad for leaving that day. i was only ten years old. i didn't know what to do. i had no friends. no friends. i didn't even know if i wanted to live. i had no idea if i could live without my dad. 
 my dad was gone. 
 my mother had died in a car accident. 
 i didn't know if i wanted to live. 
 my dad wasn't a great dad. he was a jerk. 
 he wasn't a good man. 
 i 'd had a lot of time to think about what my life would be like if i hadn't gone to college. 
 i 'd been living with my mom for a year. 
 i was so lonely. 
 and i had a big brother. 
 he was my best friend. 
 i didn't want to live without him
