In [1]:
import tensorflow as tf 
from tensorflow import keras
import tkinter as tk
import pickle
import re
import textwrap
from datetime import datetime
import numpy as np

## Loading in our model and tokenizer:

In [2]:
model=keras.models.load_model('BidenBot_word_lvl.h5')

#Pickling has some security issues so we will retrain our tokenizer on the same dataset:
def get_tokenizer():
    global tokenizer
    global max_id
    global dataset_size

    string_Biden='' #We will store the Biden dataset here

    with open('text_Biden.txt','r') as file:
        list_Biden=file.readlines()

    for line in list_Biden:
        string_Biden+=line

    tokenizer = keras.preprocessing.text.Tokenizer(char_level=False)
    tokenizer.fit_on_texts([string_Biden])

    max_id = len(tokenizer.word_index) # number of distinct characters
    dataset_size = tokenizer.document_count # total number of characters

get_tokenizer()

## Data Preprocessing:
Similar to what we did for training our RNN model, we will need to use tokenizer to create a list of sequences represented by a sparse one-hot tensor object.

In [3]:
def preprocess(texts):
    X = np.array(tokenizer.texts_to_sequences(texts)) - 1
    return tf.one_hot(X, max_id)

## Creating the output:
We will call the complete_text func. with a text variable which acts to initialize our string. 
For the number of words requested (n_words), we will create said number of characters by predicting the next word, n_words times. To create unique strings, we will use a log function on our predicted probabilities before applying a tf.random.categorical function which uses such values in order to generate a set of values (in this case we only generate one sample). I will show you an example of how temperature affects our output:

In [4]:
#Generate a random value 40 times
temperature=.1
probability=.9
tf.random.categorical([[np.log(probability)/temperature, np.log(1-probability)/temperature]], num_samples=40).numpy()

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [5]:
temperature=5
probability=.9
tf.random.categorical([[np.log(probability)/temperature, np.log(1-probability)/temperature]], num_samples=40).numpy()

array([[0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0,
        0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1]])

So as you can see, we are able to directly make our model generate more random output just by increasing the temperature values. Let's apply this below:

In [6]:
def complete_text(text, n_words=100, temperature=1):
    for i in range(n_words):
        text += next_word(text, temperature)+' '
    return text
def next_word(text, temperature=1):
    X_new = preprocess([text])
    y_proba = model(X_new)[0, -1:, :]
    rescaled_logits = tf.math.log(y_proba) / temperature
    word_id = tf.random.categorical(rescaled_logits, num_samples=1) + 1
    return tokenizer.sequences_to_texts(word_id.numpy())[0]

We will now create functions necessary to run our tkinter code. This will call complete_text(), add new lines with add_n_lines(), so as to not have the output run off the tk window, and capitalize every letter that comes after the end of a sentence. click_generate() will activate when we press the button on our tk window.

In [7]:
def click_generate(entry):
    label2['text']=output(entry)

def replace_lower_case(speech):
    #We will replace all lower case characters after period_space, period_question mark, period_exclamation.
    indeces=[i.end() for i in re.finditer('(\. *)|(\? *)|(\! *)', speech)] #Get the indices of the char proceeding a period,...
    for i in indeces:
        #Separate into three pieces and capitalize the letter. If not a letter then continue,
        try:
            string_pre=speech[:(i)]
            char_Upper=speech[i].capitalize() 
            string_post=speech[(i+1):]
            speech=string_pre+char_Upper+string_post
        except Exception as x:
            continue
    return speech

def add_n_lines(speech):
    #Add new lines:
    string=textwrap.dedent(speech)
    speech='\n'.join(i for line in string.splitlines() for i in textwrap.wrap(line, width=44))
    return speech

def output(entry):
    if len(entry)==0:
        return 'Must include text' #If no input given
    elif entry[-1]!=' ':
        return 'Must end with a space'
    #You can play around with the temperature yourself. Anything between .01 and 1.0 will give you a relatively good output, with a higher value being more 'random', and with a lower value           allowing the model to more likely choose the word arrangement that it believes to be correct (which often leads to repetition unfortunately)!
    speech=complete_text(entry,n_words=84, temperature=0.27)
    speech=replace_lower_case(speech)
    speech=add_n_lines(str(speech))

    speech+= '\n \n \t \t\t - BidenBot' #Add a signature
    print(speech)
    return speech

Running this may take several minutes, depending on what hardware you are using (especially if you switched to char level encoding)!

# Run:
Let's now create our window. I have included images for the background, trying to keep it from being political despite the political nature of the training data. Sometimes, if the image doesn't load, you have to close the window and run the code again.

In [8]:

root=tk.Tk()

canvas=tk.Canvas(root, height=750, width=540)
canvas.pack()

try:
    background_image=tk.PhotoImage(file='Biden.png')
    background_label=tk.Label(root, image=background_image)
    background_label.place(relwidth=1, relheight=1)

except Exception as x:
    print(x)

frame=tk.Frame(root, bg='#1a1aff', bd=4)
frame.place(relx=0.5, rely=0.02, relwidth=.9, relheight=0.05, anchor='n')

label1=tk.Label(frame, text='Start output with:', bg='#e6ffff', font=30)
label1.place(relx=0, rely=0, relwidth=0.3, relheight=1)

entry=tk.Entry(frame, bg='#e6ffff', font=50)
entry.place(relx=0.3, rely=0, relwidth=.5, relheight=1)

button=tk.Button(frame, text='Generate', bg='blue', fg='blue', font=78, command=lambda: click_generate(entry.get()))
button.place(relx=.8, rely=0, relwidth=.2, relheight=1)

bottom_frame=tk.Frame(root, bg='#1a1aff', bd=4)
bottom_frame.place(relx=0.5, rely=.2, relwidth=.8, relheight=.4, anchor='n')

label2=tk.Label(bottom_frame, bg='#e6ffff', font=('Microsoft Sans Serif', 20), anchor='nw', justify='left')
label2.place(relwidth=1, relheight=1)

root.mainloop()

Hello pittsburgh it’s true honk i have to
you going to pay your mortgage pay your rent
worried about the poison in the air you
breathe the water you drink worried about
your civil rights even your basic right to
dignity which is under attack with this
administration kamala harris has had your
back and now we have to have her back she’s
going to stand with me in this campaign and
all of us are going to stand up for her on
january 20
 
 	 		 - BidenBot
Donald trump is going to meet the challenge
of climate crisis donald trump calls it a
hoax i call it about our health and the
ability to create millions of good paying
jobs right here in western pennsylvania we
can combat climate change with american
ingenuity and manufacturing creating
millions of new high paying union jobs let
me be clear i will not ban fracking in
pennsylvania i’ll protect those jobs period
no matter what donald trump says but i tell
you what i will
 
 	 		 - BidenBot
Donald trump is going to be the first
president in 9