In [1]:
import torch
from torch import nn
import torch.nn.functional as F

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Get Text Data

In [2]:
with open('./datasets/borges.txt','r',encoding='utf8') as f:
    text = f.read()

In [3]:
print(text[:1000])

El Aleph
[Cuento. Texto completo]

O God, | could be bounded in a nutshell
and count myself a King of infinite space

Hamiet, ll, 2

But they will teach us that Eternity is the
Standing still of the Present Time, a
Nunc-stans (ast the Schools call it);

which neither they, nor any else
understand, no more than they would a
Hic-stans for an Infinite greatnesse of
Place.

Leviathan, IV, 46

La candente mafana de febrero en que Beatriz Viterbo muriéd, después de una
imperiosa agonia que no se rebaj6 un solo instante ni al sentimentalismo ni al
miedo, noté que las carteleras de fierro de la Plaza Constituci6n habian
renovado no sé qué aviso de cigarrillos rubios; el hecho me doliéd, pues
comprendi que el incesante y vasto universo ya se apartaba de ella y que ese
cambio era el primero de una serie infinita. Cambiara el universo pero yo no,
pensé con melancélica vanidad; alguna vez, lo sé, mi vana devocidén la habia
exasperado; muerta yo podia consagrarme a su memoria, sin esperanza, pero
t

In [4]:
len(text)

89845

## Encode Entire Text

In [5]:
all_characters = set(text)
len(all_characters)

91

In [6]:
decoder = dict(enumerate(all_characters))

In [7]:
encoder = {char: ind for ind,char in decoder.items()}

In [8]:
encoded_text = np.array([encoder[char] for char in text])

## One Hot Encoding

As previously discussed, we need to one-hot encode our data inorder for it to work with the network structure. Make sure to review numpy if any of these operations confuse you!

In [9]:
def one_hot_encoder(encoded_text, num_uni_chars):
    '''
    encoded_text : batch of encoded text
    
    num_uni_chars = number of unique characters (len(set(text)))
    '''
    
    # METHOD FROM:
    # https://stackoverflow.com/questions/29831489/convert-encoded_textay-of-indices-to-1-hot-encoded-numpy-encoded_textay
      
    # Create a placeholder for zeros.
    one_hot = np.zeros((encoded_text.size, num_uni_chars))
    
    # Convert data type for later use with pytorch (errors if we dont!)
    one_hot = one_hot.astype(np.float32)

    # Using fancy indexing fill in the 1s at the correct index locations
    one_hot[np.arange(one_hot.shape[0]), encoded_text.flatten()] = 1.0
    

    # Reshape it so it matches the batch sahe
    one_hot = one_hot.reshape((*encoded_text.shape, num_uni_chars))
    
    return one_hot

In [10]:
one_hot_encoder(np.array([1,2,0]),3)

array([[0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.]], dtype=float32)

--------------
---------------
# Creating Training Batches

We need to create a function that will generate batches of characters along with the next character in the sequence as a label.

-----------------
------------

In [28]:
def generate_batches(encoded_text, samp_per_batch=10, seq_len=50):

    char_per_batch = samp_per_batch * seq_len
    num_batches_avail = int(len(encoded_text)/char_per_batch)
    encoded_text = encoded_text[:num_batches_avail * char_per_batch]
    encoded_text = encoded_text.reshape((samp_per_batch, -1))

    def batching(encoded_text, seq_len):
        # Go through each row in array.
        for n in range(0, encoded_text.shape[1], seq_len):

            # Grab feature characters
            x = encoded_text[:, n:n+seq_len]

            # y is the target shifted over by 1
            y = np.zeros_like(x)

            #
            try:
                y[:, :-1] = x[:, 1:]
                y[:, -1]  = encoded_text[:, n+seq_len]

            # FOR POTENTIAL INDEXING ERROR AT THE END
            except:
                y[:, :-1] = x[:, 1:]
                y[:, -1] = encoded_text[:, 0]

            yield x, y

    return batching(encoded_text, seq_len)

gen = generate_batches(encoded_text)
x, y = next(iter(gen))
x.shape


(10, 50)

# Model architecture

In [29]:
class CharModel(nn.Module):
    
    def __init__(self, all_chars, num_hidden=256, num_layers=4,drop_prob=0.5,use_gpu=False):
        
        
        # SET UP ATTRIBUTES
        super().__init__()
        self.drop_prob = drop_prob
        self.num_layers = num_layers
        self.num_hidden = num_hidden
        self.use_gpu = use_gpu
        
        #CHARACTER SET, ENCODER, and DECODER
        self.all_chars = all_chars
        self.decoder = dict(enumerate(all_chars))
        self.encoder = {char: ind for ind,char in decoder.items()}
        
        
        self.lstm = nn.LSTM(len(self.all_chars), num_hidden, num_layers, dropout=drop_prob, batch_first=True)
        
        self.dropout = nn.Dropout(drop_prob)
        
        self.fc_linear = nn.Linear(num_hidden, len(self.all_chars))
      
    
    def forward(self, x, hidden):
                  
        
        lstm_output, hidden = self.lstm(x, hidden)
        
        
        drop_output = self.dropout(lstm_output)
        
        drop_output = drop_output.contiguous().view(-1, self.num_hidden)
        
        
        final_out = self.fc_linear(drop_output)
        
        
        return final_out, hidden
    
    
    def hidden_state(self, batch_size):
        '''
        Used as separate method to account for both GPU and CPU users.
        '''
        
        if self.use_gpu:
            
            hidden = (torch.zeros(self.num_layers,batch_size,self.num_hidden).to("mps"),
                     torch.zeros(self.num_layers,batch_size,self.num_hidden).to("mps"))
        else:
            hidden = (torch.zeros(self.num_layers,batch_size,self.num_hidden),
                     torch.zeros(self.num_layers,batch_size,self.num_hidden))
        
        return hidden
        

## Training Data and Validation Data

In [30]:
train_percent = 0.1
train_ind = int(len(encoded_text) * (train_percent))
train_data = encoded_text[:train_ind]
val_data = encoded_text[train_ind:]

In [33]:
torch.cuda.is_available()

False

In [34]:
## HyperParams
epochs = 20
batch_size = 1
seq_len = 100
tracker = 10
num_lstm_layers = 3
dropout_p = .03
hidden_dim = 256
num_char = max(encoded_text)+1

model = CharModel(
    all_chars=all_characters,
    num_hidden=hidden_dim,
    num_layers=num_lstm_layers,
    drop_prob=dropout_p,
    use_gpu=False,
)


data_iter = generate_batches(train_data,batch_size,seq_len)
print(f" size of generator {sum(1 for _ in data_iter)}")
train_data.shape

 size of generator 89


(8984,)

In [None]:
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
criterion = nn.CrossEntropyLoss()

model.train()

# Check to see if using GPU

if model.use_gpu:
    torch.device("mps")
    model.to("mps")

for i in range(epochs):
    
    hidden = model.hidden_state(batch_size)
    
    for x,y in generate_batches(train_data,batch_size,seq_len):
        
        tracker += 1
        x = one_hot_encoder(x,num_char)
        inputs = torch.from_numpy(x)
        targets = torch.from_numpy(y)
        if model.use_gpu:
            inputs = inputs.to("mps")
            targets = targets.to("mps")
            
        # If we don't reset we would backpropagate through all training history
        hidden = tuple([state.data for state in hidden])
        
        optimizer.zero_grad()
        lstm_output, hidden = model.forward(inputs,hidden)
        loss = criterion(lstm_output,targets.view(batch_size*seq_len).long())
        loss.backward()
        
        # LET"S CLIP JUST IN CASE
        nn.utils.clip_grad_norm_(model.parameters(),max_norm=5)
        optimizer.step()

        
        ###################################
        ### CHECK ON VALIDATION SET ######
        #################################
        
        if tracker % 25 == 0:
            
            val_hidden = model.hidden_state(batch_size)
            val_losses = []
            model.eval()
            
            for x,y in generate_batches(val_data,batch_size,seq_len):
                
                x = one_hot_encoder(x,num_char)
                inputs = torch.from_numpy(x)
                targets = torch.from_numpy(y)
                if model.use_gpu:
                    inputs = inputs.to("mps")
                    targets = targets.to("mps")
                    
                val_hidden = tuple([state.data for state in val_hidden])
                lstm_output, val_hidden = model.forward(inputs,val_hidden)
                val_loss = criterion(lstm_output,targets.view(batch_size*seq_len).long())
                val_losses.append(val_loss.item())
            
            # Reset to training model after val for loop
            model.train()
            
            print(f"Epoch: {i} Step: {tracker} Val Loss: {val_loss.item()}")

Epoch: 0 Step: 25 Val Loss: 3.119316816329956
Epoch: 0 Step: 50 Val Loss: 3.0813751220703125
Epoch: 0 Step: 75 Val Loss: 3.027113676071167
Epoch: 1 Step: 100 Val Loss: 3.060596227645874
Epoch: 1 Step: 125 Val Loss: 3.05379056930542
Epoch: 1 Step: 150 Val Loss: 3.052138328552246
Epoch: 1 Step: 175 Val Loss: 3.0348711013793945
Epoch: 2 Step: 200 Val Loss: 3.0866332054138184
Epoch: 2 Step: 225 Val Loss: 3.07075834274292
Epoch: 2 Step: 250 Val Loss: 3.0325026512145996
Epoch: 2 Step: 275 Val Loss: 3.054020881652832
Epoch: 3 Step: 300 Val Loss: 3.054258108139038
Epoch: 3 Step: 325 Val Loss: 3.061342239379883
Epoch: 3 Step: 350 Val Loss: 3.0154459476470947
Epoch: 4 Step: 375 Val Loss: 3.0664584636688232
Epoch: 4 Step: 400 Val Loss: 2.9404304027557373
Epoch: 4 Step: 425 Val Loss: 2.8671436309814453
Epoch: 4 Step: 450 Val Loss: 2.9249086380004883
Epoch: 5 Step: 475 Val Loss: 2.780125617980957
Epoch: 5 Step: 500 Val Loss: 2.8080756664276123
Epoch: 5 Step: 525 Val Loss: 2.7295358180999756
Epoch: 

-------
------

## Saving the Model

https://pytorch.org/tutorials/beginner/saving_loading_models.html

In [43]:
# Be careful to overwrite our original name file!
model_name = 'borges_first_pass.net'

In [44]:
torch.save(model.state_dict(),model_name)

## Load Model

In [45]:
# MUST MATCH THE EXACT SAME SETTINGS AS MODEL USED DURING TRAINING!

model = CharModel(
    all_chars=all_characters,
    num_hidden=hidden_dim,
    num_layers=num_lstm_layers,
    drop_prob=dropout_p,
    use_gpu=False,
)

In [46]:
model.load_state_dict(torch.load(model_name))
model.eval()

CharModel(
  (lstm): LSTM(91, 256, num_layers=3, batch_first=True, dropout=0.03)
  (dropout): Dropout(p=0.03, inplace=False)
  (fc_linear): Linear(in_features=256, out_features=91, bias=True)
)

# Generating Predictions

--------

In [47]:
def predict_next_char(model, char, hidden=None, k=1):
        
        encoded_text = model.encoder[char]
        encoded_text = np.array([[encoded_text]])
        encoded_text = one_hot_encoder(encoded_text, len(model.all_chars))
        inputs = torch.from_numpy(encoded_text)
        
        if(model.use_gpu):
            inputs = inputs.to("mps")
        

        hidden = tuple([state.data for state in hidden])
        lstm_out, hidden = model(inputs, hidden)
        probs = F.softmax(lstm_out, dim=1).data
        
        if(model.use_gpu):
            probs = probs.to("mps")
        

        probs, index_positions = probs.topk(k)
        index_positions = index_positions.numpy().squeeze()
        
        probs = probs.numpy().flatten()
        probs = probs/probs.sum()
        
        # randomly choose a character based on probabilities
        char = np.random.choice(index_positions, p=probs)
       
        # return the encoded value of the predicted char and the hidden state
        return model.decoder[char], hidden

In [48]:
def generate_text(model, size, seed='The', k=1):
        
    if(model.use_gpu):
        model.to("mps")
    else:
        model.cpu()
    
    model.eval()

    output_chars = [c for c in seed]

    hidden = model.hidden_state(1)
    for char in seed:
        char, hidden = predict_next_char(model, char, hidden, k=k)
    

    output_chars.append(char)

    for i in range(size):
        
        # predict based off very last letter in output_chars
        char, hidden = predict_next_char(model, output_chars[-1], hidden, k=k)
        
        # add predicted character
        output_chars.append(char)
    
    # return string of predicted text
    return ''.join(output_chars)

In [49]:
print(generate_text(model, 1000, seed='The ', k=3))

The un los de en de entesteria distera de lara leren es des de el de lo les el elas de le dera en de de en des de lo el le de les lo enteria la el la le el dian an de da lerasa en el de lostos lo los de de lerio de la elterasa des le da el entaria el ena dienta le la es las le ente les de des la elte en la le la lo elas de es de de el derio dista les enta los es lesa le entas de es de en le eltaro de le dia lariana le lo elte lo de lerena les enta le le le ena de de les es de dera de la le de de en de lerento de da elte la des le de da de dertoro en ena ena estas lesas el dertes da dertosta da le le el es lere de las la de de ela lo le les en le laresa le de le la la de el los derta ena de le la el esto des des de el de en de de dicen le en de de el esto le lo des la de le dicio le lo les entara en en de dis las de de la des de es de el das dica de le ente de ela le de le da eltes el los de les las de de la le la la el le le de eltara des
de les das elta lorante le esa lo de ena esta e