In [1]:
#Character-Level LSTM in PyTorch
#In this notebook, I'll construct a character-level LSTM with PyTorch. The network will train character by character on some text, then generate new text character by character. As an example, I will train on Anna Karenina. This model will be able to generate new text based on the text from the book!
#This network is based off of Andrej Karpathy's post on RNNs and implementation in Torch. Below is the general architecture of the character-wise RNN.

import numpy as np
import torch
from torch import nn
import torch.nn.functional as F

In [2]:
#Open text file dan baca data sebagai text
with open('data/anna.txt', 'r') as f:
    text = f.read()

In [3]:
#Let's check out the first 100 characters, make sure everything is peachy. According to the American Book Review, this is the 6th best first line of a book ever.

text[:100]

'Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverythin'

In [4]:
#encode text dan petakan masing masing karakter menjadi interger dan vise verca

#kita membuat 2 kamus
#1. int2char, dimana interger dipetakan menjadi character
#2. char2int, dimana character dipetakan menjadi integer yg unik

chars = tuple(set(text))
intToChar = dict(enumerate(chars))
charToInt = {ch : ii for ii, ch in intToChar.items()}

#encode the text
encoded = np.array([charToInt[ch] for ch in text])

In [5]:
encoded[:100]

array([19, 69, 14, 78, 39, 55, 44, 17, 13, 53, 53, 53, 11, 14, 78, 78, 21,
       17, 67, 14, 68, 36, 22, 36, 55,  9, 17, 14, 44, 55, 17, 14, 22, 22,
       17, 14, 22, 36, 20, 55, 80, 17, 55, 70, 55, 44, 21, 17, 45, 32, 69,
       14, 78, 78, 21, 17, 67, 14, 68, 36, 22, 21, 17, 36,  9, 17, 45, 32,
       69, 14, 78, 78, 21, 17, 36, 32, 17, 36, 39,  9, 17, 31, 66, 32, 53,
       66, 14, 21, 57, 53, 53, 40, 70, 55, 44, 21, 39, 69, 36, 32])

In [6]:
#Pre-processing the data
#As you can see in our char-RNN image above, our LSTM expects an input that is one-hot encoded meaning that each character is converted into an integer (via our created dictionary) and then converted into a column vector where only it's corresponding integer index will have the value of 1 and the rest of the vector will be filled with 0's. Since we're one-hot encoding the data, let's make a function to do that!

def oneHotEncode(arr, nLabels):
    
    #inisialisasi encoded array
    oneHot = np.zeros((np.multiply(*arr.shape), nLabels), dtype = np.float32)
    
    #isi element dengan ones
    oneHot[np.arange(oneHot.shape[0]), arr.flatten()] = 1.
    
    #terakhir, bentuk ke array yg asli
    oneHot = oneHot.reshape((*arr.shape, nLabels))
    
    return oneHot

In [7]:
#Check fungsi berkerja dengan semestinya
testSeq = np.array([[3, 5, 1]])
oneHot  = oneHotEncode(testSeq, 8)

print(oneHot)

[[[0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 1. 0. 0. 0. 0. 0. 0.]]]


In [8]:
def getBatches(arr, batchSize, seqLength):
    ''' Membuat generator yang mengembalikan ukuran batch
     argument :
     arr : Array yang ingin di buat per batch
     batchSize : jumlah ukuran batch
     seqLenght : panjang encode char
     '''
    
    batchSizeTotal = batchSize * seqLength
    #total jumat batch yang dapat di buat
    nBatches = len(arr)//batchSizeTotal
    
    #menyimpan karakter yang hanya dapat membuat batch nya penuh
    arr = arr[:nBatches * batchSizeTotal]
    #bentuk ulang ke bari sbatchSize
    arr = arr.reshape((batchSize, -1))
    
    #iterasi lewat array, 1 sequen per waktu
    
    for n in range(0, arr.shape[1], seqLength):
        # feature
        x = arr[:, n : n + seqLength]
        #target bergeser 1
        y = np.zeros_like(x)
        try :
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n + seqLength]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

In [9]:
#Test implementasi 

batches = getBatches(encoded, 8, 50)
x, y = next(batches)

In [10]:
# printing out the first 10 items in a sequence
print('x\n', x[:10, :10])
print('\ny\n', y[:10, :10])

x
 [[19 69 14 78 39 55 44 17 13 53]
 [ 9 31 32 17 39 69 14 39 17 14]
 [55 32 42 17 31 44 17 14 17 67]
 [ 9 17 39 69 55 17 82 69 36 55]
 [17  9 14 66 17 69 55 44 17 39]
 [82 45  9  9 36 31 32 17 14 32]
 [17 26 32 32 14 17 69 14 42 17]
 [71 41 22 31 32  9 20 21 57 17]]

y
 [[69 14 78 39 55 44 17 13 53 53]
 [31 32 17 39 69 14 39 17 14 39]
 [32 42 17 31 44 17 14 17 67 31]
 [17 39 69 55 17 82 69 36 55 67]
 [ 9 14 66 17 69 55 44 17 39 55]
 [45  9  9 36 31 32 17 14 32 42]
 [26 32 32 14 17 69 14 42 17  9]
 [41 22 31 32  9 20 21 57 17 51]]


In [11]:
#Cek GPU tersedia
trainOnGPU = torch.cuda.is_available()
if(trainOnGPU):
    print('Training on GPU')
else:
    print('No GPU available, training on CPU; consider making n_epochs very small!')

Training on GPU


In [12]:
class CharRNN(nn.Module):
    
    def __init__(self, tokens, nHidden = 256, nLayers = 2,
                dropProb = 0.5, lr = 0.001):
        
        super().__init__()
        self.dropProb = dropProb
        self.nLayers = nLayers
        self.nHidden = nHidden
        self.lr  = lr
        
        #Membuat kamus character
        self.chars = tokens
        self.intToChar = dict(enumerate(self.chars))
        self.charToInt = {
            ch : ii for ii, ch in self.intToChar.items() }
        
        #define the LTSM
        self.lstm = nn.LSTM(len(self.chars), nHidden, nLayers,
                           dropout = dropProb, batch_first = True)
        
        #define a dropout Layer
        self.dropout = nn.Dropout(dropProb)
        
        #define akhir, koneksi output layer
        self.fc = nn.Linear(nHidden, len(self.chars))
        
    def forward(self, x, hidden):
        ''' Forward pass through the network. 
            These inputs are x, and the hidden/cell state `hidden`. '''
        
        ## ambil output dan hidden yang baru dari LSTM
        rOutput, hidden = self.lstm(x, hidden)
        
        #Lewati dropout layer
        out = self.dropout(rOutput)
        
        #Stack output LSTM dengan view
        #gunakan contiguous untuk membentuk output
        out = out.contiguous().view(-1, self.nHidden)
        
        #taro x dengan melewati layer yang sudah terkoneksi
        out = self.fc(out)
        
        #return hasil akhir dan hidden state
        return out, hidden
    
    def initHidden(self, batchSize):
        
        weight = next(self.parameters()).data
        
        if(trainOnGPU):
             hidden = (weight.new(self.nLayers, batchSize, self.nHidden).zero_().cuda(),
                  weight.new(self.nLayers, batchSize, self.nHidden).zero_().cuda())
        else:
            hidden = (weight.new(self.nLayers, batchSize, self.nHidden).zero_(),
                      weight.new(self.nLayers, batchSize, self.nHidden).zero_())
            
        return hidden
        

In [13]:
def train(net, data, epochs = 10, batchSize = 10, seqLength = 50, lr = 0.001, clip = 5, valFrac = 0.1, printEvery = 10):
    ''' Training a network 
    
        Arguments
        ---------
        
        net: CharRNN network
        data: text data to train the network
        epochs: Number of epochs to train
        batch_size: Number of mini-sequences per mini-batch, aka batch size
        seq_length: Number of character steps per mini-batch
        lr: learning rate
        clip: gradient clipping
        val_frac: Fraction of data to hold out for validation
        print_every: Number of steps for printing training and validation loss
    
    '''
    net.train()
    
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    
    # Membuat data training dan validasi data
    valIdx = int(len(data)*(1-valFrac))
    data, valData = data[:valIdx], data[valIdx:]
    
    if(trainOnGPU):
        net.cuda()
    
    counter = 0
    nChars = len(net.chars)
    for e in range(epochs):
        # inisialisai hidden state
        h = net.initHidden(batchSize)
        
        for x, y in getBatches(data, batchSize, seqLength):
            counter += 1
            
            # Encode data dan buat mereka menjadi torch Tensor
            x = oneHotEncode(x, nChars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            
            if(trainOnGPU):
                inputs, targets = inputs.cuda(), targets.cuda()

            # Buat variabel baru untuk hidden state, bagaimanapun
            # kita backprop semua training history
            h = tuple([each.data for each in h])

            # akumulasi gradien = 0
            net.zero_grad()
            
            # get output model
            output, h = net(inputs, h)
            
            # kalkulasi loss dan lakukan backprop
            loss = criterion(output, targets.view(batchSize * seqLength).type(torch.cuda.LongTensor))
            loss.backward()
            
            # `clip_grad_norm` membantu mencegah ledakan gradien di RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()
            
            # loss stats
            if counter % printEvery == 0:
                # Get validation loss
                valH = net.initHidden(batchSize)
                valLosses = []
                net.eval()
                for x, y in getBatches(valData, batchSize, seqLength):
                    # One-hot encode our data and make them Torch tensors
                    x = oneHotEncode(x, nChars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
                    
                    # Creating new variables for the hidden state, otherwise
                    # we'd backprop through the entire training history
                    valH = tuple([each.data for each in valH])
                    
                    inputs, targets = x, y
                    if(trainOnGPU):
                        inputs, targets = inputs.cuda(), targets.cuda()

                    output, valH = net(inputs, valH)
                    valLoss = criterion(output, targets.view(batchSize * seqLength).type(torch.cuda.LongTensor))
                
                    valLosses.append(valLoss.item())
                
                net.train() # reset to train mode after iterationg through validation data
                
                print("Epoch: {}/{}...".format(e + 1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(valLosses)))

In [14]:
#Inisialisasi Model
# define and print the net
nHidden = 512
nLayers = 2

net = CharRNN(chars, nHidden, nLayers)
print(net)

CharRNN(
  (lstm): LSTM(83, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5)
  (fc): Linear(in_features=512, out_features=83, bias=True)
)


In [15]:
batchSize = 128
seqLength = 100
nEpochs = 20 # start smaller if you are just testing initial behavior

# train the model
train(net, encoded, epochs = nEpochs, batchSize = batchSize, seqLength = seqLength, lr = 0.001, printEvery = 10)

Epoch: 1/20... Step: 10... Loss: 3.2472... Val Loss: 3.1858
Epoch: 1/20... Step: 20... Loss: 3.1374... Val Loss: 3.1285
Epoch: 1/20... Step: 30... Loss: 3.1380... Val Loss: 3.1207
Epoch: 1/20... Step: 40... Loss: 3.1092... Val Loss: 3.1190
Epoch: 1/20... Step: 50... Loss: 3.1385... Val Loss: 3.1161
Epoch: 1/20... Step: 60... Loss: 3.1151... Val Loss: 3.1130
Epoch: 1/20... Step: 70... Loss: 3.1016... Val Loss: 3.1088
Epoch: 1/20... Step: 80... Loss: 3.1116... Val Loss: 3.0961
Epoch: 1/20... Step: 90... Loss: 3.0859... Val Loss: 3.0665
Epoch: 1/20... Step: 100... Loss: 3.0185... Val Loss: 2.9988
Epoch: 1/20... Step: 110... Loss: 2.9259... Val Loss: 2.8949
Epoch: 1/20... Step: 120... Loss: 2.7962... Val Loss: 2.7973
Epoch: 1/20... Step: 130... Loss: 2.7037... Val Loss: 2.6951
Epoch: 2/20... Step: 140... Loss: 2.6380... Val Loss: 2.5797
Epoch: 2/20... Step: 150... Loss: 2.5602... Val Loss: 2.5148
Epoch: 2/20... Step: 160... Loss: 2.5093... Val Loss: 2.4685
Epoch: 2/20... Step: 170... Loss:

Epoch: 10/20... Step: 1350... Loss: 1.3714... Val Loss: 1.4100
Epoch: 10/20... Step: 1360... Loss: 1.3762... Val Loss: 1.4114
Epoch: 10/20... Step: 1370... Loss: 1.3716... Val Loss: 1.4054
Epoch: 10/20... Step: 1380... Loss: 1.4042... Val Loss: 1.3994
Epoch: 10/20... Step: 1390... Loss: 1.4129... Val Loss: 1.4068
Epoch: 11/20... Step: 1400... Loss: 1.4201... Val Loss: 1.4022
Epoch: 11/20... Step: 1410... Loss: 1.4288... Val Loss: 1.4030
Epoch: 11/20... Step: 1420... Loss: 1.4162... Val Loss: 1.3964
Epoch: 11/20... Step: 1430... Loss: 1.3875... Val Loss: 1.4018
Epoch: 11/20... Step: 1440... Loss: 1.4089... Val Loss: 1.3974
Epoch: 11/20... Step: 1450... Loss: 1.3348... Val Loss: 1.3991
Epoch: 11/20... Step: 1460... Loss: 1.3598... Val Loss: 1.3938
Epoch: 11/20... Step: 1470... Loss: 1.3539... Val Loss: 1.3917
Epoch: 11/20... Step: 1480... Loss: 1.3715... Val Loss: 1.3885
Epoch: 11/20... Step: 1490... Loss: 1.3652... Val Loss: 1.3893
Epoch: 11/20... Step: 1500... Loss: 1.3543... Val Loss:

Epoch: 20/20... Step: 2660... Loss: 1.2312... Val Loss: 1.2821
Epoch: 20/20... Step: 2670... Loss: 1.2332... Val Loss: 1.2774
Epoch: 20/20... Step: 2680... Loss: 1.2180... Val Loss: 1.2907
Epoch: 20/20... Step: 2690... Loss: 1.2199... Val Loss: 1.2836
Epoch: 20/20... Step: 2700... Loss: 1.2266... Val Loss: 1.2815
Epoch: 20/20... Step: 2710... Loss: 1.1898... Val Loss: 1.2790
Epoch: 20/20... Step: 2720... Loss: 1.1916... Val Loss: 1.2774
Epoch: 20/20... Step: 2730... Loss: 1.1792... Val Loss: 1.2799
Epoch: 20/20... Step: 2740... Loss: 1.1895... Val Loss: 1.2828
Epoch: 20/20... Step: 2750... Loss: 1.1985... Val Loss: 1.2931
Epoch: 20/20... Step: 2760... Loss: 1.1869... Val Loss: 1.2817
Epoch: 20/20... Step: 2770... Loss: 1.2284... Val Loss: 1.2826
Epoch: 20/20... Step: 2780... Loss: 1.2435... Val Loss: 1.2763


In [17]:
# change the name, for saving multiple files
model_name = 'rnn_20_epoch.net'

checkpoint = {'nHidden': net.nHidden,
              'nLayers': net.nLayers,
              'stateDict': net.state_dict(),
              'tokens': net.chars}

with open(model_name, 'wb') as f:
    torch.save(checkpoint, f)

In [28]:

def predict(net, char, h = None, topK = None):
        ''' Given a character, predict the next character.
            Returns the predicted character and the hidden state.
        '''
        
        # tensor inputs
        x = np.array([[net.charToInt[char]]])
        x = oneHotEncode(x, len(net.chars))
        inputs = torch.from_numpy(x)
        
        if(trainOnGPU):
            inputs = inputs.cuda()
        
        # detach hidden state from history
        h = tuple([each.data for each in h])
        # get the output of the model
        out, h = net(inputs, h)

        # get the character probabilities
        p = F.softmax(out, dim=1).data
        if(trainOnGPU):
            p = p.cpu() # move to cpu
        
        # get top characters
        if topK is None:
            topCh = np.arange(len(net.chars))
        else:
            p, topCh = p.topk(topK)
            topCh = topCh.numpy().squeeze()
        
        # select the likely next character with some element of randomness
        p = p.numpy().squeeze()
        char = np.random.choice(topCh, p=p/p.sum())
        
        # return the encoded value of the predicted char and the hidden state
        return net.intToChar[char], h

In [29]:
def sample(net, size, prime='The', topK = None):
        
    if(trainOnGPU):
        net.cuda()
    else:
        net.cpu()
    
    net.eval() # eval mode
    
    # First off, run through the prime characters
    chars = [ch for ch in prime]
    h = net.initHidden(1)
    for ch in prime:
        char, h = predict(net, ch, h, topK = topK)

    chars.append(char)
    
    # Now pass in the previous character and get a new one
    for ii in range(size):
        char, h = predict(net, chars[-1], h, topK = topK)
        chars.append(char) 

    return ''.join(chars)

In [30]:
print(sample(net, 1000, prime='Anna', topK=5))

Anna Arkadyevna
shoot her at the conversation, told her husband. "What are the man and
the comprehition of my watches one. You can't be all that something of
herself instances! Yes, you know that. I am going in for things in the
coarment too the princess, it may be as if you were in any day, in a long
whine or this starticial time," said Vronsky that her
face and the children are so looking at her stuck of the party all
that was at the sourd, simply a famoring of the completely observantess and
conversation with him and takons of a minute to his son, and what had sudeenly
come to him with some son in the station of the doctor, and all on
his head, and he felt that he was suffering and sawing and so in this standing
to the carriage. The position when their head the carriage seemed to hild all
such a condition, he frowned, and she had not the panifue consisting of the man she
was aware of sick, he had a second starmond of what he was all one
of his standinds and satisfied about it is to 

In [33]:
# Here we have loaded in a model that trained over 20 epochs `rnn_20_epoch.net`
with open('rnn_20_epoch.net', 'rb') as f:
    checkpoint = torch.load(f)
    
loaded = CharRNN(checkpoint['tokens'], nHidden = checkpoint['nHidden'], nLayers=checkpoint['nLayers'])
loaded.load_state_dict(checkpoint['stateDict'])

In [34]:
# Sample using a loaded model
print(sample(loaded, 2000, topK = 5, prime="And Levin said"))

And Levin said:
"I she say all themelves, I see..."

She could not be said, where intinations were stood at home in the same
time. The minutes senking the soft and tried to him that in the frommonest
show, who had answered thround her.

"Well, we won't see him," said Levin.

"I've been a fire to ask her, and so it seemed!... There's no one," he said,
like taking about the disagreeable answer, that her face atsoned
her strictly. She had sat drew in a concistoving a chair, had been
seefing his wife's seating open. She went up to the prayers, he was
so intense to the door and help in the first tomeshooking and a single of severe tree at
once and a secrative, and having had been always a crimace stood, he felt
immoralle to the most time of her. She would have been to be something,
and the books's wife had been to go on, and took to him all there, and went
over a sort of treet, and he had saying her husband to the standing of his
condition, who was still to be able to bake it, he had not al