<a href="https://colab.research.google.com/github/mvenouziou/Capstone-Project-Text-Generation/blob/main/Text_Generator_Anvil_Web_App.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Text Generation Models

This file loads trained models for character-level text generation, then runs the prediction model on Anvil's servers

Load imports

In [32]:
#### PACKAGE IMPORTS ####
# install Anvil's web app link
!pip install -q anvil-uplink
import anvil.server
anvil.server.connect('53NFXI7IX7IE233XQTVJDXUM-PUGRV2WON2LETWBG')

# ML design
import tensorflow as tf
from tensorflow import keras
!pip install -q tensorflow-text
import tensorflow_text as text

# data handling
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# other
import re
import os
import json
import pickle

# load google drive:
from google.colab import drive
gdrive_dir = '/content/gdrive/'
drive.mount(gdrive_dir)

[K     |████████████████████████████████| 61kB 3.4MB/s 
[K     |████████████████████████████████| 61kB 3.6MB/s 
[?25h  Building wheel for ws4py (setup.py) ... [?25l[?25hdone
Connecting to wss://anvil.works/uplink
Anvil websocket open
Connected to "Default environment (dev)" as SERVER
[K     |████████████████████████████████| 3.4MB 6.4MB/s 
[?25hMounted at /content/gdrive/


Set File directories and hyperparameters

In [33]:
# set hyperparameters
batch_size=32

# Set save/load directory info
filepath = {}
tokenizer = {}
prediction_model = {}

# set model options
model_names = ['buy_local', 'Shakespeare', 'Robert_Frost']

# set file paths
models_folder = 'MyDrive/Colab_Notebooks/models/'

for model in model_names:
    filepath[model] = gdrive_dir + models_folder + model + '/'

Load saved models

In [34]:
for model in model_names:

    # set directory
    directory = filepath[model]

    # load tokenizer
    with open(directory + 'prediction_model/tokenizer.pickle', 'rb') as handle:
        tokenizer[model] = pickle.load(handle)

    # load prediction model
    prediction_model[model] = \
        tf.keras.models.load_model(directory + 'prediction_model/')

FileNotFoundError: ignored

##### Functions generating text from models

In [None]:
# Function: Outputs weighted predictions for next token 
# (as logits / log-odds ration)
def get_logits(model, token_sequence, initial_state=None):
    """
    Paramater:
    model - our prediction model set with batch size = 1
    """
    
    # carry forward previous state from GRU layer
    GRU_layer = model.get_layer('GRU_1')
    GRU_layer.reset_states(initial_state)

    # Get the model's next token prediction (as logits)
    input = tf.constant(token_sequence)
    final_pred = model(input)[:, -1, :]
        
    return final_pred.numpy()    


# Function: selects a value from logits prediction
def sample_token(logits, precision_reduction=0):   

    # choose a value from logits distribution
    # fuzz_factor: adds some imprecision to results
    fuzz_factor = tf.random.normal(shape=logits.shape, mean=1, stddev=.2)

    sample = tf.random.categorical(
                        logits=logits * (1 + precision_reduction * fuzz_factor), 
                        num_samples=1, 
                        )

    # convert to integer
    next_token = sample[0,0].numpy()

    return next_token

""" old version
def sample_token(logits):   

    # choose a value from logits distribution
    sample = tf.random.categorical(
                        logits=logits, 
                        num_samples=1, 
                        )
    
    # convert to integer
    next_token = sample[0,0].numpy()

    return next_token
"""

In [None]:
# Final Prediction Function #######

# Use the model to generate a token sequence
def make_prediction(init_string, num_generation_steps, precision_reduction=0,
                    model_name='Shakespeare', print_result=False):
    

    our_model = prediction_model[model_name]
    our_tokenizer = tokenizer[model_name]

    GRU_layer = our_model.get_layer('GRU_1')

    batch_size=1
    
    token_sequence = our_tokenizer.texts_to_sequences([init_string])
    initial_state = None
    input_sequence = token_sequence
    init_len = len(input_sequence[0])

    for i in range(num_generation_steps):
        logits = get_logits(our_model, input_sequence, initial_state=initial_state)
        sampled_token = sample_token(logits, precision_reduction)
        token_sequence[0].append(sampled_token)
        input_sequence = [[sampled_token]]  # use only last letter because previous model state is carried forward
        initial_state = GRU_layer.states[0].numpy()

    predicted_text = our_tokenizer.sequences_to_texts(token_sequence)[0][::2]

    
    
    if print_result:
        print(predicted_text)

    return predicted_text

In [None]:
temp_String = 'gjj, kgk. f'

split_text = re.split('[?.,]', temp_String)

print(split_text)

Sample text generation

In [None]:
########### set this to false to prevent training from overqriti


init_string = 'EMMY:'  # starting point for prediction
num_generation_steps = 300  # max number characters to produce

make_prediction(init_string, num_generation_steps, 
                precision_reduction=5, 
                model_name='Shakespeare', print_result=True)

""" old version
make_prediction(init_string, num_generation_steps=num_generation_steps, 
                print_result=True)
"""

## Run Program for Up Anvil Server

In [48]:
prediction = 'fdkjdfj, fsdjk.'

split_lines_prediction = re.split('([?.,])', prediction)

print(split_lines_prediction)

output = ''
i=0
for line in split_lines_prediction:
    if i%2==0:
        output= ''.join([output, line])
    else:
        output= '\n'.join([output, line])

output= '... '.join([output, line])

print(output)

['fdkjdfj', ',', ' fsdjk', '.', '']
fdkjdfj, fsdjk.... 


In [36]:
@anvil.server.callable

def generate_text(starting_text, precision_reduction=0, author='assorted'):

    num_generation_steps = 250  # max number characters to produce
  
    prediction = make_prediction(init_string=starting_text, 
                               num_generation_steps=num_generation_steps, 
                               precision_reduction=precision_reduction, 
                               model_name=author, 
                               print_result=True)
  
    split_lines_prediction = re.split('([?.,;!:])', prediction)


    output = ''
    i = 0
    for line in split_lines_prediction:
        if i%2==1:
            output= ''.join([output, line])
        else:
            output= '\n'.join([output, line])
            
        i += 1

    output= '... '.join([output, line])
  
    return output

anvil.server.wait_forever()

" old version\n@anvil.server.callable\ndef generate_text(starting_text, author):\n\n  num_generation_steps = 350  # max number characters to produce\n  \n  prediction = make_prediction(starting_text, \n                               num_generation_steps=num_generation_steps, \n                               model_name=author,\n                               print_result=False)\n  \n  \n  return prediction + '... '\n\n\nanvil.server.wait_forever()\n"

In [37]:
generate_text(starting_text='test, this is . how', precision_reduction=0, author='assorted')

NameError: ignored