In [1]:
# imports
from transformers import pipeline
import numpy as np
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [81]:
# define the unmasker, most of the bert family work.
# (if you are changing the unmasker, also change the mask token to the one yours uses)
unmasker = pipeline('fill-mask', model='xlm-roberta-large', device=0)
mask_token = "<mask>"

In [82]:
# a function to generate a random sequence of words
# seed length refers to the amount of words to be generated
# this function is used when generating the un-completed text for the model for the first time
def random_seed(seed_length):
    # generate a random sequence of numbers from 0 to 2999 (because there are 3000 words in the text file)
    seed = np.random.randint(0, 2999, seed_length)
    # we will be adding the words to this variable
    sequence = ""
    
    # change the filepath to wherever your text file is located
    # text file should be formatted like so:
    #     word 1
    #     word 2
    #     ...
    #     word 2999
    #     word 3000
    
    with open(r"C:\Users\user\Desktop\text nn\words.txt") as file:
        # replace the file to be the words in it, in a list
        # [word 1, word 2, ... word 2999, word 3000]
        file = file.read().split("\n")
        
        # loop over the random sequence generated earlier
        for x in seed:
            # and add the word correspondint to the current number to the sequence
            sequence += f"{file[int(x)]} "

    # reuturn the sequence accept the last character since its a space
    # the space is added to seperate the words at the ned of each word, so there is a space after the last word
    return sequence[:-1]

In [83]:
# a function to go over the text with a mask once
# text is reffering to the text we are trying to modify
# topic is referring to what we are trying to complete the text to
def run_mask(text, topic):
    # split the text into individual words
    split_text = text.split()
    
    # loop over each word
    for x in range(len(split_text)):
        # change the current word we are looping on to the mask token
        split_text[x] = mask_token
        # combine the topic with the whole text (now with the mask token on one of the words)
        # example:
        #            topic                    masked text
        #  ____________|___________    ____________|___________
        # |                       |   |                       |
        # apples are green because    they have <mask> in them.
        masked = f"""{topic} {" ".join(split_text)}."""
        # change the masked word to whatever the model predicted
        split_text[x] = unmasker(masked)[0]["token_str"]
    
    # return the text, this time we turn the list back into a sentence
    return " ".join(split_text)

In [84]:
# an all in one function to completly modify the trext towards the topic
# text is reffering to the text we are trying to modify
# topic is referring to what we are trying to complete the text to
def run_text(text, topic):
    # this counts the number of edurations we have done over the text
    # its not required, its only used in tthe print command
    edurations = 0
    
    # loop until the text stops being changed by the model
    # this means its done tuning it
    while True:
        # add an eduration
        edurations += 1
        
        # set the previous text as a variable
        # this is used to check if the model stopped changing the sentence
        ptext = text
        
        # run the mask over the current text
        text = run_mask(text, topic)
        
        # this is just used to monitor the models progress
        print(f"{edurations}: {topic}   --->   {text}")
        
        # check if the new sentence is equal to the previous sentence,
        # if it is, then we can stop the loop.
        if text == ptext:
            break

In [162]:
run_text(random_seed(15), "lemons are")

1: lemons are   --->   the best not the best to make a new , you are a bit old
2: lemons are   --->   the best and the way to start something new when you get a little tired
3: lemons are   --->   the best and easy way to try something new when you are a little older
4: lemons are   --->   the quick and easy way to try something new when you get a little tired
5: lemons are   --->   a quick and easy way to try something new when you are a little tired
6: lemons are   --->   a quick and easy way to try something new when you are a little tired
