In [1]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F

In [3]:
#tokenization
#text file opened and read in data as `text`
with open('C:/Users/subha/OneDrive/Desktop/anna.txt', 'r') as f:
    text = f.read()

In [4]:
# encode the text and map each character to an integer and vice versa

# we create two dictonaries:
# 1. int2char, which maps integers to characters
# 2. char2int, which maps characters to unique integers

chars = tuple(set(text))

int2char = dict(enumerate(chars))

char2int = {ch: ii for ii, ch in int2char.items()}

encoded = np.array([char2int[ch] for ch in text])

In [5]:
text[:100]

'Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverythin'

In [6]:
encoded[:100]

array([ 2, 44, 66, 51, 55, 15, 40, 58,  5, 74, 74, 74,  1, 66, 51, 51, 11,
       58, 16, 66, 29, 42, 59, 42, 15, 76, 58, 66, 40, 15, 58, 66, 59, 59,
       58, 66, 59, 42, 38, 15, 47, 58, 15, 79, 15, 40, 11, 58, 18, 39, 44,
       66, 51, 51, 11, 58, 16, 66, 29, 42, 59, 11, 58, 42, 76, 58, 18, 39,
       44, 66, 51, 51, 11, 58, 42, 39, 58, 42, 55, 76, 58, 24, 35, 39, 74,
       35, 66, 11, 14, 74, 74, 61, 79, 15, 40, 11, 55, 44, 42, 39])

In [7]:
def one_hot_encode(arr, n_labels):
    
    # Initialize the the encoded array
    one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
    
    # Fill the appropriate elements with ones
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    
    # Finally reshape it to get back to the original array
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    
    return one_hot


In [8]:
def get_batches(arr, n_seqs, n_steps):
    '''Create a generator that returns batches of size
       n_seqs x n_steps from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       n_seqs: Batch size, the number of sequences per batch
       n_steps: Number of sequence steps per batch
    '''
    
    batch_size = n_seqs * n_steps
    n_batches = len(arr)//batch_size
    
    # Keep only enough characters to make full batches
    arr = arr[:n_batches * batch_size]
    
    # Reshape into n_seqs rows
    arr = arr.reshape((n_seqs, -1))
    
    for n in range(0, arr.shape[1], n_steps):
        
        # The features
        x = arr[:, n:n+n_steps]
        
        # The targets, shifted by one
        y = np.zeros_like(x)
        
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+n_steps]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

In [9]:
batches = get_batches(encoded, 10, 50)
x, y = next(batches)

In [10]:
print('x\n', x[:10, :10])
print('\ny\n', y[:10, :10])

x
 [[ 2 44 66 51 55 15 40 58  5 74]
 [58 66 29 58 39 24 55 58 33 24]
 [79 42 39 14 74 74 41 52 15 76]
 [39 58 17 18 40 42 39 33 58 44]
 [58 42 55 58 42 76 80 58 76 42]
 [58 78 55 58 35 66 76 74 24 39]
 [44 15 39 58 54 24 29 15 58 16]
 [47 58 32 18 55 58 39 24 35 58]
 [55 58 42 76 39 81 55 14 58 72]
 [58 76 66 42 17 58 55 24 58 44]]

y
 [[44 66 51 55 15 40 58  5 74 74]
 [66 29 58 39 24 55 58 33 24 42]
 [42 39 14 74 74 41 52 15 76 80]
 [58 17 18 40 42 39 33 58 44 42]
 [42 55 58 42 76 80 58 76 42 40]
 [78 55 58 35 66 76 74 24 39 59]
 [15 39 58 54 24 29 15 58 16 24]
 [58 32 18 55 58 39 24 35 58 76]
 [58 42 76 39 81 55 14 58 72 44]
 [76 66 42 17 58 55 24 58 44 15]]


In [25]:
class CharRNN(nn.Module):
    
    def __init__(self, tokens, n_steps=100, n_hidden=256, n_layers=2,
                               drop_prob=0.5, lr=0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr
        
        # Creating character dictionaries
        self.chars = tokens
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch: ii for ii, ch in self.int2char.items()}
        
        ## Define the LSTM
        self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers, 
                            dropout=drop_prob, batch_first=True)
        
        ## Define a dropout layer
        self.dropout = nn.Dropout(drop_prob)
        
        ## Define the final, fully-connected output layer
        self.fc = nn.Linear(n_hidden, len(self.chars))
        
        # Initialize the weights
        self.init_weights()
        
    def forward(self, x, hc):
        ''' Forward pass through the network. 
            These inputs are x, and the hidden/cell state `hc`. '''
        
        ## Get x, and the new hidden state (h, c) from the lstm
        x, (h, c) = self.lstm(x, hc)
        
        ## Ppass x through the dropout layer
        x = self.dropout(x)
        #x.is_contiguous()==True
        # Stack up LSTM outputs using view
        x = x.contiguous().view(x.size()[0]*x.size()[1], self.n_hidden)
        
        ## Put x through the fully-connected layer
        x = self.fc(x)
        
        # Return x and the hidden state (h, c)
        return x, (h, c)
    
    def predict(self, char, h=None, cuda=False, top_k=None):
        ''' Given a character, predict the next character.
        
            Returns the predicted character and the hidden state.
        '''
        if cuda:
            self.cuda()
        else:
            self.cpu()
        
        if h is None:
            h = self.init_hidden(1)
        
        x = np.array([[self.char2int[char]]])
        x = one_hot_encode(x, len(self.chars))
        
        inputs = torch.from_numpy(x)
        
        if cuda:
            inputs = inputs.cuda()
        
        h = tuple([each.data for each in h])
        out, h = self.forward(inputs, h)

        p = F.softmax(out, dim=1).data
        
        if cuda:
            p = p.cpu()
        
        if top_k is None:
            top_ch = np.arange(len(self.chars))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()
        
        p = p.numpy().squeeze()
        
        char = np.random.choice(top_ch, p=p/p.sum())
            
        return self.int2char[char], h
    
    def init_weights(self):
        ''' Initialize weights for fully connected layer '''
        initrange = 0.1
        
        # Set bias tensor to all zeros
        self.fc.bias.data.fill_(0)
        # FC weights as random uniform
        self.fc.weight.data.uniform_(-1, 1)
        
    def init_hidden(self, n_seqs):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x n_seqs x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        return (weight.new(self.n_layers, n_seqs, self.n_hidden).zero_(),
                weight.new(self.n_layers, n_seqs, self.n_hidden).zero_())

In [26]:
def train(net, data, epochs=10, n_seqs=10, n_steps=50, lr=0.001, clip=5, val_frac=0.1, cuda=False, print_every=10):
    ''' Training a network 
    
        Arguments
        ---------
        
        net: CharRNN network
        data: text data to train the network
        epochs: Number of epochs to train
        n_seqs: Number of mini-sequences per mini-batch, aka batch size
        n_steps: Number of character steps per mini-batch
        lr: learning rate
        clip: gradient clipping
        val_frac: Fraction of data to hold out for validation
        cuda: Train with CUDA on a GPU
        print_every: Number of steps for printing training and validation loss
    
    '''
    
    net.train()
    
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    
    criterion = nn.CrossEntropyLoss()
    
    # create training and validation data
    val_idx = int(len(data)*(1-val_frac))
    data, val_data = data[:val_idx], data[val_idx:]
    
    if cuda:
        net.cuda()
    
    counter = 0
    n_chars = len(net.chars)
    
    for e in range(epochs):
        
        h = net.init_hidden(n_seqs)
        
        for x, y in get_batches(data, n_seqs, n_steps):
            
            counter += 1
            
            # One-hot encode our data and make them Torch tensors
            x = one_hot_encode(x, n_chars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            
            if cuda:
                inputs, targets = inputs.cuda(), targets.cuda()

            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            h = tuple([each.data for each in h])

            net.zero_grad()
            
            output, h = net.forward(inputs, h)
            
            loss = criterion(output, targets.view(n_seqs*n_steps).type(torch.cuda.LongTensor))

            loss.backward()
            
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)

            opt.step()
            
            if counter % print_every == 0:
                
                # Get validation loss
                val_h = net.init_hidden(n_seqs)
                val_losses = []
                
                for x, y in get_batches(val_data, n_seqs, n_steps):
                    
                    # One-hot encode our data and make them Torch tensors
                    x = one_hot_encode(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
                    
                    # Creating new variables for the hidden state, otherwise
                    # we'd backprop through the entire training history
                    val_h = tuple([each.data for each in val_h])
                    
                    inputs, targets = x, y
                    if cuda:
                        inputs, targets = inputs.cuda(), targets.cuda()

                    output, val_h = net.forward(inputs, val_h)
                    val_loss = criterion(output, targets.view(n_seqs*n_steps).type(torch.cuda.LongTensor))
                
                    val_losses.append(val_loss.item())
                
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))

In [27]:
if 'net' in locals():
    del net

In [28]:
# Initialize and print the network
net = CharRNN(chars, n_hidden=512, n_layers=2)

print(net)

CharRNN(
  (lstm): LSTM(83, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=83, bias=True)
)


In [29]:
n_seqs, n_steps = 128, 100

train(net, encoded, epochs=25, n_seqs=n_seqs, n_steps=n_steps, lr=0.001, cuda=True, print_every=10)

Epoch: 1/25... Step: 10... Loss: 3.3370... Val Loss: 3.3316
Epoch: 1/25... Step: 20... Loss: 3.1898... Val Loss: 3.2156
Epoch: 1/25... Step: 30... Loss: 3.1037... Val Loss: 3.0916
Epoch: 1/25... Step: 40... Loss: 2.9192... Val Loss: 2.9271
Epoch: 1/25... Step: 50... Loss: 2.7824... Val Loss: 2.7507
Epoch: 1/25... Step: 60... Loss: 2.6235... Val Loss: 2.6460
Epoch: 1/25... Step: 70... Loss: 2.5546... Val Loss: 2.5676
Epoch: 1/25... Step: 80... Loss: 2.4853... Val Loss: 2.5111
Epoch: 1/25... Step: 90... Loss: 2.4574... Val Loss: 2.4631
Epoch: 1/25... Step: 100... Loss: 2.3917... Val Loss: 2.4250
Epoch: 1/25... Step: 110... Loss: 2.3516... Val Loss: 2.3943
Epoch: 1/25... Step: 120... Loss: 2.2955... Val Loss: 2.3667
Epoch: 1/25... Step: 130... Loss: 2.3094... Val Loss: 2.3394
Epoch: 2/25... Step: 140... Loss: 2.2801... Val Loss: 2.3133
Epoch: 2/25... Step: 150... Loss: 2.2478... Val Loss: 2.3146
Epoch: 2/25... Step: 160... Loss: 2.2364... Val Loss: 2.2684
Epoch: 2/25... Step: 170... Loss:

Epoch: 10/25... Step: 1350... Loss: 1.3515... Val Loss: 1.5393
Epoch: 10/25... Step: 1360... Loss: 1.3561... Val Loss: 1.5256
Epoch: 10/25... Step: 1370... Loss: 1.3311... Val Loss: 1.5226
Epoch: 10/25... Step: 1380... Loss: 1.3822... Val Loss: 1.5262
Epoch: 10/25... Step: 1390... Loss: 1.3948... Val Loss: 1.5202
Epoch: 11/25... Step: 1400... Loss: 1.3886... Val Loss: 1.5190
Epoch: 11/25... Step: 1410... Loss: 1.3994... Val Loss: 1.5145
Epoch: 11/25... Step: 1420... Loss: 1.3873... Val Loss: 1.5082
Epoch: 11/25... Step: 1430... Loss: 1.3602... Val Loss: 1.5131
Epoch: 11/25... Step: 1440... Loss: 1.3768... Val Loss: 1.5082
Epoch: 11/25... Step: 1450... Loss: 1.3090... Val Loss: 1.5069
Epoch: 11/25... Step: 1460... Loss: 1.3389... Val Loss: 1.5066
Epoch: 11/25... Step: 1470... Loss: 1.3191... Val Loss: 1.5105
Epoch: 11/25... Step: 1480... Loss: 1.3547... Val Loss: 1.5066
Epoch: 11/25... Step: 1490... Loss: 1.3316... Val Loss: 1.5027
Epoch: 11/25... Step: 1500... Loss: 1.3245... Val Loss:

Epoch: 20/25... Step: 2660... Loss: 1.2001... Val Loss: 1.4126
Epoch: 20/25... Step: 2670... Loss: 1.2124... Val Loss: 1.4116
Epoch: 20/25... Step: 2680... Loss: 1.2012... Val Loss: 1.4131
Epoch: 20/25... Step: 2690... Loss: 1.1944... Val Loss: 1.4157
Epoch: 20/25... Step: 2700... Loss: 1.2080... Val Loss: 1.4156
Epoch: 20/25... Step: 2710... Loss: 1.1750... Val Loss: 1.4123
Epoch: 20/25... Step: 2720... Loss: 1.1748... Val Loss: 1.4112
Epoch: 20/25... Step: 2730... Loss: 1.1735... Val Loss: 1.4056
Epoch: 20/25... Step: 2740... Loss: 1.1677... Val Loss: 1.4082
Epoch: 20/25... Step: 2750... Loss: 1.1804... Val Loss: 1.4091
Epoch: 20/25... Step: 2760... Loss: 1.1630... Val Loss: 1.4063
Epoch: 20/25... Step: 2770... Loss: 1.2109... Val Loss: 1.4031
Epoch: 20/25... Step: 2780... Loss: 1.2316... Val Loss: 1.4089
Epoch: 21/25... Step: 2790... Loss: 1.2116... Val Loss: 1.4105
Epoch: 21/25... Step: 2800... Loss: 1.2248... Val Loss: 1.4031
Epoch: 21/25... Step: 2810... Loss: 1.2257... Val Loss:

In [30]:
# change the name, for saving multiple files
model_name = 'rnn_25_epoch.net'

checkpoint = {'n_hidden': net.n_hidden,
              'n_layers': net.n_layers,
              'state_dict': net.state_dict(),
              'tokens': net.chars}

with open(model_name, 'wb') as f:
    torch.save(checkpoint, f)

In [31]:
def sample(net, size, prime='The', top_k=None, cuda=False):
        
    if cuda:
        net.cuda()
    else:
        net.cpu()

    net.eval()
    
    # First off, run through the prime characters
    chars = [ch for ch in prime]
    
    h = net.init_hidden(1)
    
    for ch in prime:
        char, h = net.predict(ch, h, cuda=cuda, top_k=top_k)

    chars.append(char)
    
    # Now pass in the previous character and get a new one
    for ii in range(size):
        
        char, h = net.predict(chars[-1], h, cuda=cuda, top_k=top_k)
        chars.append(char)

    return ''.join(chars)

In [32]:
print(sample(net, 2000, prime='Anna', top_k=5, cuda=True))

Anna had been
thereful, the promase seemed to ask her heart that had been
asked him about it. At the picture of her simple tries they were
there the bits of the counters of a minute from the story of
the carriage in the server to her heaved in a found of a sport merchance,
her lover, a condingers held him the prayers of her, and almost in the
solutions of the stands, the music worse to the carriage, he would hear
them, and she came into the carryarm.

"Why, you'll be thinking about this face, I should have been talking of him to
her.




Chapter 22


"Yes, I've not been a cruel of the soul of the pordiculation of
this whole fright of the children and he wanted to talk about it." He
took the same as a smile of consciousness of an impossibility in
the country house as he was the carriage of the desire in the priest of
the children, and so sort of having shutt the corridor.

"I say anything," she said, and said her eyes asked it was not his
soft hair, whatever in the following on his big 

In [33]:
# Here we have loaded in a model that trained over 1 epoch `rnn_1_epoch.net`
with open('rnn_25_epoch.net', 'rb') as f:
    checkpoint = torch.load(f)
    
loaded = CharRNN(checkpoint['tokens'], n_hidden=checkpoint['n_hidden'], n_layers=checkpoint['n_layers'])
loaded.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [34]:
print(sample(loaded, 2000, cuda=True, top_k=5, prime="And Levin said"))

And Levin said he
could not help seeing her son, as a point of all, staying his stairs,
and trapsicaling in the hand, and she found the fact.



Chapter 7


The captain was of a smaller old coach treating them, but her
shoulder and words and the characteristic of the strong, taken tase
at the strupt to her, than that without all. But that showed this child to her
heart so impossible to think of the sorries, the sight of the partion, and
who could not be too, he was any more assisted on that this simple the
meadows of that happiness of which there was now his below in a child.
Stepan Arkadyevitch had tears with went along with a smule of
heart well as he tried to be at the four too him that she
sent, to the day to this party the crimining on his sight of the
consciousness. Here suffering which was some too which had been strained on his
husband and wound them with her story of the same sense of her.

"I should be it was able to go into the chief some of men and that to
dispute, and were

In [35]:
print(sample(loaded, 200, cuda=True, top_k=5, prime="for which"))

for which he would
be in a countess and without the clerk took a conscious of the meetings, and
through the study of the wede the coats and heart of sense of
herself, was a little sorry and with all the simples


In [39]:
print(sample(loaded, 50, cuda=True, top_k=5, prime="I"))

I've a particular hise
at that countess, and were at
