# Text generation neural network

This neural network generates new pet names by first training a neural network on pet names from Seattle pet license data. The training runs very quickly by distributing the training work across multiple computers with GPUs in Saturn Cloud.

## Training the model

In [13]:
import pandas as pd
import re
import uuid
import datetime
import pickle
import json
import torch
import math
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [14]:
# additional libraries for doing the Saturn Cloud parallel work
from torch.nn.parallel import DistributedDataParallel as DDP
import torch.distributed as dist
from dask_pytorch_ddp import data, dispatch, results
from dask_saturn import SaturnCluster
from dask.distributed import Client
from distributed.worker import logger

In [15]:
# This chunk creates the X and y matrices to train a model on and the model structure itself. The 3-dimensional X matrix represents (data point, place in text sequence, character [1-hot encoded]). The 2-dimensional y matrix represents (data point, character to predict)

# Our list of characters, where * represents blank and + represents stop
characters = list("*+abcdefghijklmnopqrstuvwxyz-. ")

str_len = 8
num_epochs = 100
batch_size = 16384
print_every = 16384
lstm_size = 128
lstm_layers = 4

pet_names_raw = pd.read_csv("https://raw.githubusercontent.com/saturncloud/saturn-cloud-examples/master/text-generation-nn/seattle_pet_licenses.csv")

pet_names = pet_names_raw["Animal's Name"].tolist()

def get_substrings(in_str):
    in_str = in_str.lower() + "+"
    res = [in_str[0: j] for j in range(1, len(in_str) + 1)]
    return res

pattern = re.compile("^[ \\.\\-a-zA-Z]*$")
pet_names_filtered = [name for name in pet_names if isinstance(name, str) and not name.isspace() and pattern.match(name)]
pet_names_expanded = [get_substrings(name) for name in  pet_names_filtered]
pet_names_expanded = [item for sublist in pet_names_expanded for item in sublist]
pet_names_characters = [list(name) for name in pet_names_expanded]
pet_names_padded = [name[-(str_len + 1):] for name in pet_names_characters]
pet_names_padded = [list((str_len + 1- len(characters)) * "*") + characters for characters in pet_names_padded]
pet_names_numeric = [[characters.index(char) for char in name] for name in pet_names_padded]

# the final x and y data
y = torch.tensor([name[1:] for name in pet_names_numeric])
x = torch.tensor([name[:-1] for name in pet_names_numeric])
x = torch.nn.functional.one_hot(x, num_classes = len(characters)).float()

# the lstm model
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.lstm = nn.LSTM(
            input_size=len(characters),
            hidden_size=lstm_size,
            num_layers=lstm_layers,
            batch_first=True,
            dropout=0.1,
        )
        self.fc = nn.Linear(lstm_size, len(characters))

    def forward(self, x, prev_state):
        output, state = self.lstm(x, prev_state)
        logits = self.fc(output)
        return logits, state

    def init_state(self, custom_batch_size=None):
        if custom_batch_size is None:
            custom_batch_size = batch_size
        return (torch.zeros(lstm_layers, custom_batch_size, lstm_size),
                torch.zeros(lstm_layers, custom_batch_size, lstm_size))

In [16]:
# This starts the parallel cluster in Saturn
key = uuid.uuid4().hex
rh = results.DaskResultsHandler(key)
cluster = SaturnCluster()
client = Client(cluster)
client.wait_for_workers(3)

INFO:dask-saturn:Cluster is ready
INFO:dask-saturn:Registering default plugins
INFO:dask-saturn:{'tcp://10.0.0.118:41631': {'status': 'repeat'}, 'tcp://10.0.0.67:36669': {'status': 'repeat'}, 'tcp://10.0.27.147:41551': {'status': 'repeat'}}


In [17]:
# model training function
# when this is run it saved the model output after each epoch (overwriting the previous one)
# If multiple computers are training the model, they'll each save to the same place
def train():
    # send the model to the computer the code is running on
    worker_rank = int(dist.get_rank())
    logger.info(f"Worker - {worker_rank} - starting up")
    orig_model = Model()
    device = torch.device(0)
    orig_model = orig_model.to(device)
    device_ids = [0]
    model = DDP(orig_model, device_ids=device_ids)
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    logger.info(f"Worker - {worker_rank} - setup complete")
    for epoch in range(num_epochs):
        logger.info(f"Worker - {worker_rank} - Epoch {epoch} - beginning")
        state_h, state_c = orig_model.init_state()
        state_h = state_h.to(device)
        state_c = state_c.to(device)
        permutation = torch.randperm(x.size()[0])
        for i in range(0,x.size()[0] - (x.size()[0] % batch_size), batch_size):
            logger.info(f"Worker - {worker_rank} - Epoch {epoch} - Batch {round(i/batch_size)} - beginning")
            indices = permutation[i:i+batch_size]
            batch_x, batch_y = x[indices], y[indices]
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)
            optimizer.zero_grad()
            batch_y_pred, (state_h, state_c) = model(batch_x, (state_h, state_c))
            loss = criterion(batch_y_pred.transpose(1, 2), batch_y)
            state_h = state_h.detach()
            state_c = state_c.detach()
            loss.backward()
            optimizer.step()
            # store metrics while the model is training
            if (i / batch_size) % print_every == 0:
                rh.submit_result(
                    f"worker/{worker_rank}/data-{datetime.datetime.now().isoformat()}.json", 
                    json.dumps({'loss': loss.item(),
                                'epoch': epoch,
                                'pct': (i/x.size()[0]),
                                'iter': (i/batch_size), 
                                'total': (x.size()[0])/batch_size,
                                'worker': worker_rank})
                )
            logger.info(f"Worker - {worker_rank} - Epoch {epoch} - Batch {round(i/batch_size)} - Loss {loss.item()}")
        # save the model at the end of each epoch
        rh.submit_result(f"model.pkl", pickle.dumps(model.state_dict()))  

In [18]:
# start the parallel job, and use process_results to save the output
client.restart()
futures = dispatch.run(client, train)
rh.process_results("/home/jovyan/training/", futures, raise_errors=False)

KeyboardInterrupt: 

In [19]:
client.close()

## Generating names with a trained model

In [7]:
def generate_name(lstm_model, init_state, characters, str_len):
    in_progress_name = []
    next_letter = ""
    state_h, state_c = init_state(1)
    while(not next_letter == "+" and len(in_progress_name) < 30):
        # prep the data to run in the model again
        in_progress_name_padded = in_progress_name[-str_len:]
        in_progress_name_padded = list((str_len - len(in_progress_name_padded)) * "*") + in_progress_name_padded
        in_progress_name_numeric = [characters.index(char) for char in in_progress_name_padded]
        in_progress_name_tensor = torch.tensor(in_progress_name_numeric)
        in_progress_name_tensor = torch.nn.functional.one_hot(in_progress_name_tensor, num_classes = len(characters)).float()
        in_progress_name_tensor = torch.unsqueeze(in_progress_name_tensor, 0)
        # get the probabilities of each possible next character by running the model
        with torch.no_grad():
            next_letter_probabilities, (state_h, state_c) = lstm_model(in_progress_name_tensor, (state_h, state_c))
        next_letter_probabilities = next_letter_probabilities[0,-1,:]
        next_letter_probabilities = torch.nn.functional.softmax(next_letter_probabilities, dim=0).detach().cpu().numpy()
        next_letter_probabilities = next_letter_probabilities[1:]
        next_letter_probabilities = [p/sum(next_letter_probabilities) for p in next_letter_probabilities]
        # determine what the actual letter is
        next_letter = characters[np.random.choice(len(characters)-1, p=next_letter_probabilities) + 1]
        if(next_letter != "+"):
            # if the next character isn't stop add the latest generated character to the name and continue
            in_progress_name.append(next_letter)
  # turn the list of characters into a single string
    raw_name = "".join(in_progress_name)
    # capitalize the first letter of each word
    capitalized_name = raw_name.title()
    return capitalized_name

In [11]:
# load the model and the trained parameters
model_state = pickle.load(open("/home/jovyan/training/model.pkl", "rb"))
model = Model()
model_parallel = torch.nn.DataParallel(model).cuda()  
model_parallel.load_state_dict(model_state)

<All keys matched successfully>

In [12]:
# Generate 50 names then filter out existing ones
generated_names = [generate_name(model_parallel, model.init_state, characters, str_len) for i in range(0,50)]
generated_names = [name for name in generated_names if name not in pet_names]
print(generated_names)

['Beeger Len Laccencterny', 'Ctontor', 'Krin', 'Smurkle', 'Monsu', 'Jascenfa', 'Tipake', 'Yooru', 'Prade', 'Roea', 'Munashhetearlavy Radmasb', 'Jakis', 'Scogenea', 'Tassin', 'Boanne', 'Koqrey', 'Gusker', 'Ganlie', 'Chacgly', 'Kolonc', 'Cacdy', 'Laley', 'Hajisa', 'Kar Don', 'Mingly Wicsy Salri Goddimtons', 'Feora', 'Kalry', 'Zoa', 'Burac Ah-Srassy Nucchhel', 'Grue', 'Bezbynant Gortedde', 'Kaziy', 'Shouu', 'Raffekla', 'Mikim', 'Jodpe', 'Muitn', 'Markor', 'Echee', 'Iski', 'Yick', 'Kissia', 'Bryrpy', 'Jatar Hecsrurnes', 'Maidie', 'Rramd', 'Cine', 'Judde', 'Joszar']
