In [1]:
import time
import random 
import unicodedata
import string 
import re

import matplotlib.pyplot as plt 
import torch

torch.backends.cudnn.deterministic = True

In [3]:
# General Settings 
RANDOM_SEED = 123
torch.manual_seed(RANDOM_SEED) 

DEVICE =  "mps" #torch.device('cuda' if torch.cuda.is_available() else 'cpu')
TEXT_PORTION_SIZE = 200 
NUM_ITER = 5_000 
LEARNING_RATE = 0.005
EMBEDDING_DIM = 100 
HIDDEN_DIM = 128 

print(f"Device: {DEVICE}")

Device: mps


In [4]:
# Dataset 
with open("data/anna.txt", "r") as f: 
    textfile = f.read() 

# Strip extra spaces 
textfile = re.sub(" +", " ", textfile)

TEXT_LENGTH = len(textfile) 
print(f"Number of chars in text: {TEXT_LENGTH}")

Number of chars in text: 1984768


In [5]:
TEXT_LENGTH - TEXT_PORTION_SIZE

1984568

In [6]:
# Divide the text into similar portions 

def random_portion(textfile): 
    # choose a rand int btween 0 and tot. number of chars in df - text_portion_size
    start_index = random.randint(0, TEXT_LENGTH - TEXT_PORTION_SIZE)
    # from start_idx add the text_portion_size + 1 to get the end_indx
    end_index = start_index + TEXT_PORTION_SIZE + 1
    return textfile[start_index:end_index]

print(random_portion(textfile))

e Masha a little
fungus, split in half across its rosy cap by the dry grass from under
which it thrust itself. Varenka got up while Masha picked the fungus,
breaking it into two white halves. "This bri


In [7]:
# Convert Chars to Tensors
def char_to_tensor(text) -> torch.tensor: 
    _list = [string.printable.index(c) for c in text]
    tensor = torch.tensor(_list).long()
    return tensor

print(string.printable)
print(char_to_tensor("01abc"))

0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 	

tensor([ 0,  1, 10, 11, 12])


In [8]:
def draw_random_sample(textfile): 
    text_long = char_to_tensor(random_portion(textfile))
    inputs = text_long[:-1]
    targets = text_long[1:]
    return inputs, targets

print(draw_random_sample(textfile))

(tensor([13, 78, 94, 32, 24, 30, 21, 13, 94, 18, 29, 94, 17, 10, 31, 14, 94, 28,
        30, 18, 29, 14, 13, 94, 34, 24, 30, 27, 96, 29, 10, 28, 29, 14, 28, 94,
        11, 14, 29, 29, 14, 27, 94, 29, 24, 94, 11, 14, 94, 29, 27, 18, 14, 13,
        94, 18, 23, 94, 29, 17, 14, 94, 24, 21, 13, 94, 12, 27, 18, 22, 18, 23,
        10, 21, 94, 29, 27, 18, 11, 30, 23, 10, 21, 82, 63, 96, 96, 63, 44, 68,
        22, 94, 23, 24, 29, 94, 16, 24, 18, 23, 16, 94, 29, 24, 94, 11, 14, 94,
        29, 27, 18, 14, 13, 75, 94, 44, 94, 28, 17, 10, 23, 68, 29, 94, 22, 30,
        27, 13, 14, 27, 94, 10, 23, 34, 11, 24, 13, 34, 73, 94, 10, 23, 13, 94,
        44, 68, 31, 14, 94, 23, 24, 94, 23, 14, 14, 13, 94, 24, 15, 96, 18, 29,
        75, 94, 58, 14, 21, 21, 73, 94, 44, 94, 29, 14, 21, 21, 94, 34, 24, 30,
        94, 32, 17, 10, 29, 73, 63, 94, 17, 14, 94, 32, 14, 23, 29, 94, 24, 23,
        73, 94]), tensor([78, 94, 32, 24, 30, 21, 13, 94, 18, 29, 94, 17, 10, 31, 14, 94, 28, 30,
        18, 29, 14, 1

In [9]:
# Model 

class RNN(torch.nn.Module): 
    def __init__(
        self, 
        input_size: int, 
        embed_size: int, 
        hidden_size: int, 
        output_size: int
        ): 
        super(RNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.embed = torch.nn.Embedding(
                                    num_embeddings=input_size, 
                                    embedding_dim=embed_size) 
        self.rnn = torch.nn.LSTMCell(input_size=embed_size, 
                                     hidden_size=hidden_size) 
        self.fc = torch.nn.Linear(hidden_size, output_size) 
        
    def forward(self, character, hidden_state, cell_state):
        # expects character as size (batch_size, 1) 
        
        # (batch_size, embedding_dim) = (1, embedding_dim=100) 
        embedded = self.embed(character) # (1 * 100)
        (hidden_state, cell_state) = self.rnn(embedded, (hidden_state, cell_state))
        output = self.fc(hidden_state)
        return output, hidden_state, cell_state
                                              
    def init_zero_state(self):
        # dimension = (1 x 128)
        init_hidden = torch.zeros(1, self.hidden_size).to(DEVICE) 
        init_cell = torch.zeros(1, self.hidden_size).to(DEVICE)
        return (init_hidden, init_cell)
                                                                                    
                                              
        

In [10]:
torch.manual_seed(RANDOM_SEED) 
model = RNN(input_size=len(string.printable), # 100
            embed_size=EMBEDDING_DIM,         # 100
            hidden_size=HIDDEN_DIM,           # 128
            output_size=len(string.printable) # 100
           ) 
model = model.to(DEVICE) 
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) 

In [24]:
# Training 

def evaluate(model, prime_str='A', predict_len=100, temperature=0.8):
    ## based on https://github.com/spro/practical-pytorch/
    ## blob/master/char-rnn-generation/char-rnn-generation.ipynb

    (hidden, cell_state) = model.init_zero_state()
    prime_input = char_to_tensor(prime_str)
    predicted = prime_str

    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        inp = prime_input[p].unsqueeze(0)
        _, hidden, cell_state = model(inp.to(DEVICE), hidden, cell_state)
    inp = prime_input[-1].unsqueeze(0)
    
    for p in range(predict_len):

        outputs, hidden, cell_state = model(inp.to(DEVICE), hidden, cell_state)
        
        # Sample from the network as a multinomial distribution
        output_dist = outputs.data.view(-1).div(temperature).exp() # e^{logits / T}
        top_i = torch.normal(output_dist, 1)[0]

        # Add predicted character to string and use as next input
        predicted_char = string.printable[top_i]
        predicted += predicted_char
        inp = char_to_tensor(predicted_char)

    return predicted

In [25]:
start_time = time.time()

loss_list = []

for iteration in range(NUM_ITER):

    hidden, cell_state = model.init_zero_state()
    optimizer.zero_grad()
    
    loss = 0.
    inputs, targets = draw_random_sample(textfile)
    inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
    
    for c in range(TEXT_PORTION_SIZE):
        outputs, hidden, cell_state = model(inputs[c].unsqueeze(0), hidden, cell_state)
        loss += torch.nn.functional.cross_entropy(outputs, targets[c].view(1))

    loss /= TEXT_PORTION_SIZE
    loss.backward()
    
    ### UPDATE MODEL PARAMETERS
    optimizer.step()

    ### LOGGING
    with torch.no_grad():
        if iteration % 200 == 0:
            print(f'Time elapsed: {(time.time() - start_time)/60:.2f} min')
            print(f'Iteration {iteration} | Loss {loss.item():.2f}\n\n')
            print(evaluate(model, 'Th', 200), '\n')
            print(50*'=')
            
            loss_list.append(loss.item())
            plt.clf()
            plt.plot(range(len(loss_list)), loss_list)
            plt.ylabel('Loss')
            plt.xlabel('Iteration x 1000')
            plt.savefig('loss1.pdf')
            
plt.clf()
plt.ylabel('Loss')
plt.xlabel('Iteration x 1000')
plt.plot(range(len(loss_list)), loss_list)
plt.show()

Time elapsed: 0.01 min
Iteration 0 | Loss 3.09




  nonzero_finite_vals = torch.masked_select(


tensor(0.4867, device='mps:0')


TypeError: only integer tensors of a single element can be converted to an index

In [16]:
import os 

os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"