In [119]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [120]:
with open('data/anna.txt','r') as f:
    text = f.read()
print(len(text))

1985223


In [121]:
print(type(text))
chars = tuple(set(text))
int2chars = dict(enumerate(chars))
chars2int = {ch:i for i, ch in int2chars.items()}
print(chars2int)

<class 'str'>
{'9': 0, 'O': 1, "'": 2, 'k': 3, 'F': 4, 'z': 5, ')': 6, 'l': 7, 'h': 8, 'i': 9, '0': 10, 'S': 11, 'j': 12, 'B': 13, 'e': 14, '*': 15, 'a': 16, '$': 17, '%': 18, 'C': 19, 'U': 20, 'M': 21, '3': 22, '/': 23, 'b': 24, 'u': 25, 'g': 26, ' ': 27, 'r': 28, 't': 29, 'X': 30, 'v': 31, '.': 32, 'P': 33, 'I': 34, 'Z': 35, 'Y': 36, 'J': 37, '`': 38, ':': 39, '&': 40, 'q': 41, 'd': 42, '?': 43, '1': 44, 'R': 45, '5': 46, 'x': 47, '6': 48, 'o': 49, '!': 50, 'K': 51, 'E': 52, 'A': 53, 'H': 54, 'w': 55, '7': 56, 'c': 57, 'L': 58, '"': 59, 'f': 60, 'T': 61, '@': 62, 'm': 63, 'G': 64, ';': 65, '_': 66, 'y': 67, '-': 68, '4': 69, 'V': 70, 'W': 71, '(': 72, '2': 73, 's': 74, 'Q': 75, 'p': 76, '\n': 77, 'D': 78, '8': 79, 'N': 80, 'n': 81, ',': 82}


In [122]:
encoded_text = np.array([chars2int[ch] for ch in text])
encoded_text[:10]
print(type(encoded_text[:10]))

<class 'numpy.ndarray'>


In [123]:
def one_hot_encode(arr, num_classes):
    one_hot = np.zeros((np.multiply(*arr.shape), num_classes))
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1
    one_hot = one_hot.reshape(*arr.shape,num_classes)
    return one_hot

test = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(one_hot_encode(test,10))

[[[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]]


In [124]:
class sequenceDataset(Dataset):
    def __init__(self, text, sequence_length, num_classes):
        text_length = len(text)
        self.num_classes = num_classes
        self.n_seq = text_length // sequence_length
        self.text = text[:self.n_seq * sequence_length]
        self.sequence_length = sequence_length
    
    def __len__(self):
        return self.n_seq
    
    def __getitem__(self, idx):
        X = self.text[idx*self.sequence_length:(idx+1)*self.sequence_length]
        y = np.zeros_like(X)
        try:
            y[:-1] = X[1:]
            y[-1] = self.text[(idx+1)*self.sequence_length+1]
        except IndexError:
            y[:-1] = X[1:]
            y[-1] = self.text[0]

        return X, y   

In [125]:
class charLstm(nn.Module):
    def __init__(self, hidden_size, n_layer, num_classes, drop_prob = 0.5, learning_rate = 1e-3):
        super(charLstm, self).__init__()
        self.hidden_size = hidden_size
        self.n_layer = n_layer
        self.input_size = num_classes
        self.num_classes = num_classes
        self.lr = learning_rate
        
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.n_layer,batch_first=True)
        self.drop = nn.Dropout(p=drop_prob)
        self.fc = nn.Linear(self.hidden_size, self.num_classes)
        
    def forward(self, x, hidden=None):
        output, hidden = self.lstm(x, hidden)
        
        output = self.drop(output)
        output = output.contiguous().view(-1, self.hidden_size)
        output = self.fc(output)
        return output, hidden
    
    def init_weights(self):
        for name, param in self.named_parameters():
            if 'weight_ih' in name:
                nn.init.xavier_uniform_(param.data)
            elif 'weight_hh' in name:
                nn.init.orthogonal_(param.data)
            elif 'bias' in name:
                nn.init.constant_(param.data, 0)
    
    def init_hidden(self, batch_size):
        weights = next(self.parameters()).data
        
        if torch.cuda.is_available():
            hidden = (
                weights.new(self.n_layer,batch_size, self.hidden_size).zero_().cuda(),
                weights.new(self.n_layer,batch_size, self.hidden_size).zero_().cuda()
            )
        else:
            hidden = (
                weights.new(self.n_layer,batch_size, self.hidden_size).zero_(),
                weights.new(self.n_layer,batch_size, self.hidden_size).zero_()
            )
        return hidden

In [126]:
batch_size = 128
val_frac = 0.2
val_idx = int(len(text) * (1-val_frac))
num_classes = len(chars)
sequence_length = 100
train_ds = sequenceDataset(encoded_text[:val_idx], sequence_length, num_classes)
val_ds = sequenceDataset(encoded_text[val_idx:], sequence_length, num_classes)
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True,drop_last=True)
val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=True,drop_last=True)

hidden_size = 512
n_layer = 2
model = charLstm(hidden_size=hidden_size, n_layer=n_layer, num_classes=num_classes, drop_prob=0.5, learning_rate=1e-3)
model.init_weights()

criterion = nn.CrossEntropyLoss()
opt = optim.Adam(model.parameters(), lr=1e-3)

In [127]:
def train_model(model, train_dl, val_dl,epochs):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    clip = 5
    print_every = 200
    
    model.to(device)
    model.train()
    
    for epoch in range(epochs):
        hidden = model.init_hidden(batch_size=batch_size)
        for idx, (X, y) in enumerate(train_dl):
            X, y = one_hot_encode(X, num_classes), one_hot_encode(y, num_classes)
            X, y = torch.from_numpy(X).float(), torch.from_numpy(y).float()
            X, y = X.to(device), y.to(device)
            hidden = (hidden[0].detach(), hidden[1].detach())
            model.zero_grad()
            pred, hidden = model(X, hidden)
            loss = criterion(pred, y.contiguous().view(batch_size*sequence_length,-1))
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), clip)
            opt.step()
            
            if idx % print_every == 0:
                model.eval()
                val_hidden = model.init_hidden(batch_size=batch_size)
                val_losses = []
                with torch.no_grad():
                    for idx, (X, y) in enumerate(val_dl):
                        X, y = one_hot_encode(X, num_classes).astype(np.float32), one_hot_encode(y, num_classes).astype(np.float32)
                        X, y = torch.from_numpy(X).to(device), torch.from_numpy(y).to(device)
                        val_hidden = (val_hidden[0].detach(), val_hidden[1].detach())
                        val_pred, val_hidden = model(X, val_hidden)
                        val_loss = criterion(val_pred, y.contiguous().view(batch_size*sequence_length,-1))
                        val_losses.append(val_loss.item())
            model.train()
            
            print('Epoch: {}/{}.. '.format(epoch+1, epochs),
                  'idx : {}'.format(idx),
                  'loss : {:.4f}'.format(loss.item()),
                  'val_loss : {:.4f}'.format(np.mean(val_losses)))
                    

In [128]:
print(model)

charLstm(
  (lstm): LSTM(83, 512, num_layers=2, batch_first=True)
  (drop): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=83, bias=True)
)


In [129]:

train_model(model,train_dl,val_dl,epochs=20)

Epoch: 1/20..  idx : 30 loss : 4.4188 val_loss : 4.4092
Epoch: 1/20..  idx : 1 loss : 4.4093 val_loss : 4.4092
Epoch: 1/20..  idx : 2 loss : 4.3921 val_loss : 4.4092
Epoch: 1/20..  idx : 3 loss : 4.3453 val_loss : 4.4092
Epoch: 1/20..  idx : 4 loss : 4.1613 val_loss : 4.4092
Epoch: 1/20..  idx : 5 loss : 3.3803 val_loss : 4.4092
Epoch: 1/20..  idx : 6 loss : 3.3236 val_loss : 4.4092
Epoch: 1/20..  idx : 7 loss : 3.2051 val_loss : 4.4092
Epoch: 1/20..  idx : 8 loss : 3.2370 val_loss : 4.4092
Epoch: 1/20..  idx : 9 loss : 3.2737 val_loss : 4.4092
Epoch: 1/20..  idx : 10 loss : 3.2150 val_loss : 4.4092
Epoch: 1/20..  idx : 11 loss : 3.1739 val_loss : 4.4092
Epoch: 1/20..  idx : 12 loss : 3.1780 val_loss : 4.4092
Epoch: 1/20..  idx : 13 loss : 3.1567 val_loss : 4.4092
Epoch: 1/20..  idx : 14 loss : 3.1850 val_loss : 4.4092
Epoch: 1/20..  idx : 15 loss : 3.1549 val_loss : 4.4092
Epoch: 1/20..  idx : 16 loss : 3.1631 val_loss : 4.4092
Epoch: 1/20..  idx : 17 loss : 3.1526 val_loss : 4.4092
E

In [133]:
model_name = 'lstm_20_epochs'

check_point = {
    'n_layers' : model.n_layer,
    'hidden_size': model.hidden_size,
    'state_dict': model.state_dict()
}
with open(model_name,'wb') as f:
    torch.save(check_point,f)

In [115]:
model_name = 'rnn_x_epoch.net'
with open(model_name,'rb') as f:
    check_point = torch.load(f)
    
model = charLstm(check_point['n_hidden'], check_point['n_layers'], num_classes, drop_prob=0.5, learning_rate=1e-3)
model.load_state_dict(check_point['state_dict'])


  check_point = torch.load(f)


<All keys matched successfully>

In [130]:
def predict(model, char, hidden, top_k):
    X = np.array([chars2int[char]]).reshape(1,-1).astype(np.int16)
    X = one_hot_encode(X, num_classes).astype(np.float32)
    X = torch.from_numpy(X)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    X = X.to(device)
    
    hidden = (hidden[0].detach(), hidden[1].detach())
    out, hidden = model(X, hidden)
    
    p = F.softmax(out, dim=1).data
    
    top_p, top_i = p.topk(top_k)
    top_p, top_i = top_p.cpu().numpy().squeeze(), top_i.cpu().numpy().squeeze()
    i = np.random.choice(top_i, p= top_p/top_p.sum())
    return int2chars[i], hidden

In [131]:
def sample(model, size, prime='Anna', top_k=3):
    chars = [ ch for ch in prime]
    hidden = model.init_hidden(batch_size=1)
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    
    for ch in chars:
        
        char, hidden = predict(model, ch, hidden, top_k)
    
    chars.append(char)
    
    for i in range(size):
        char, hidden = predict(model, chars[-1], hidden, top_k)
        chars.append(char)
    
    return "".join(chars)

In [132]:
print(sample(model, size=2000, prime='And Levin said', top_k=5))

And Levin said that her
showels and the sone, as though to say. He saw
that the subject of seronice, the persons of his wishe the string of harrighous.
He drove up from
her thoughts. She was terrible in a sort for the
sense, and his best success to a met to me."

"I've going to sup.
I didn't know, a children would be dornt," said Levin, sighing.

"I'll come raid. In her eyes, we shan the man who
has spection of the side of the weakness to the prince,s she had all with seeking
the same
time, and wordered her head to see him in the sound
of the change in the same subject. This
case of a stranger time; but in sens a pear and difficult of all is that she was not to decide the criminary of the huge she had not to
consider his white conversation and attached work and call, and then, with a pleasuare head at and see him
walked off into a pitty of an end or
a cerear thing is to stand would see anything, to do it. There was
a look of support, and there was nothing allow her and her, his husbing