In [158]:
import torch
import torch.nn as nn
import numpy as np
import re
import matplotlib.pyplot as plt
device = torch.device('cuda') if torch.cuda.is_available() else torch.device("cpu")

In [1]:
input_fname = 'voyna-i-mir-tom-1.txt'

In [159]:
%matplotlib widget
def loss_plot(fig, ax, train_loss, test_loss, loss_name):
    train_line = ax.plot(train_loss, color = 'black')
    test_line = ax.plot(test_loss, color = 'red')
    ax.set_xlabel('Эпоха')
    ax.set_ylabel(loss_name)
    ax.legend(('Тренировочная выборка', 'Тестовая выборка'))
    fig.canvas.draw()

In [13]:
START_CHAR = '\b'
END_CHAR = '\t'
PADDING_CHAR = '\a'
chars = set([START_CHAR, '\n', END_CHAR])
with open(input_fname) as f:
    for line in f:
        chars.update(list(line.strip().lower()))
char_indices = {c: i + 1 for i,c in enumerate(sorted(list(chars)))}
char_indices[PADDING_CHAR] = 0
indices_to_chars = {i: c for c, i in char_indices.items()}
num_chars = len(chars) + 1

In [14]:
def get_one(i , sz):
    res = np.zeros(sz)
    res[i] = 1
    return res

char_vectors = {
    c : (np.zeros(num_chars) if c == PADDING_CHAR else get_one(v, num_chars))
    for c, v in char_indices.items()
}

In [69]:
#т.к. анализую войну и мир, немного своя предобработка текста
sentences = []
text = ''
with open(input_fname, 'r') as f:
    for line in f:
        s = re.split('[.!?\-\]\[]', line.strip().lower())
        for sentence in s:
            if len(sentence) > 30:
                sentences.append(sentence.strip())

In [133]:
def get_matrices(sentences):
    max_sentence_len = np.max([len(x) for x in sentences])
    X = np.zeros((len(sentences), max_sentence_len, num_chars), dtype = np.bool)
    y = np.zeros((len(sentences), max_sentence_len), dtype = np.long)
    for i, sentence in enumerate(sentences):
        char_seq = (START_CHAR + sentence + END_CHAR).ljust(max_sentence_len + 1, PADDING_CHAR)
        for t in range(max_sentence_len):
            X[i, t, :] = char_vectors[char_seq[t]]
            #индекс символа, т.к. в торче своеобразный лосс
            y[i, t] = np.argmax(char_vectors[char_seq[t + 1]])
    return X, y

In [155]:
test_indices = np.random.choice(range(len(sentences)), int(len(sentences) * 0.05))
sentences_train = [sentences[x] for x in set(range(len(sentences))) - set(test_indices)]
sentences_test = [sentences[x] for x in test_indices]
sentences_train = sorted(sentences_train, key = lambda x: len(x))
X_test, y_test = get_matrices(sentences_test)
X_test = torch.from_numpy(X_test).to(device).float()
y_test = torch.from_numpy(y_test).to(device).long()

In [144]:
batch_size = 16
def generate_batch():
    while True:
        for i in range(int(len(sentences) / batch_size)):
            sentences_batch = sentences_train[i * batch_size : (i+1) * batch_size]
            if (len(sentences_batch)  == 0): break
            yield get_matrices(sentences_batch)

In [145]:
class ReqModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(num_chars, 128, batch_first = True)
        self.tanh = nn.Tanh()
        self.drop = nn.Dropout(0.2)
        self.dense = nn.Linear(128, num_chars)
        self.softmax = nn.Softmax()
        
    def forward(self, X):
        X, _ = self.lstm(X)
        X = self.tanh(X)
        X = torch.reshape(X, (-1, 128))
        X = self.drop(X)
        X = self.dense(X)
        #X = self.softmax(X)
        return X

In [168]:
model = ReqModel().to(device)
optimizer = torch.optim.Adam(model.parameters())
cross_entropy = torch.nn.CrossEntropyLoss()

In [None]:
#Провожу эксперимент по обучению без обрезки градиентов
%matplotlib notebook
fig = plt.figure(figsize=(8,10))
ax = fig.subplots(2,1)
fig.tight_layout(pad=3.0)
ax1, ax2 = ax

epochs = 101
train_loss = []
train_acc = []
test_loss_no_clip = []
test_acc_no_clip = []
for e in range(epochs):
    train_loss_epoch = 0
    train_acc_epoch = 0
    generator = generate_batch()
    for i in range(int(len(sentences) / batch_size)):
        X, y = next(generator)
        X = torch.from_numpy(X).to(device).float()
        y = torch.from_numpy(y).to(device).long()
        logits = model(X)
        loss = cross_entropy(logits, y.reshape(-1))
        train_loss_epoch += loss.item()
        train_acc_epoch += torch.mean((y.reshape(-1) == torch.argmax(logits, -1)).float())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    train_loss_epoch /= int(len(sentences) / batch_size)
    train_acc_epoch /= int(len(sentences) / batch_size)
    train_loss.append(train_loss_epoch)
    train_acc.append(train_acc_epoch)
    print(f'Эпоха {e}')
    print(f'Тренировочный лосс: {train_loss[-1]}  точность:{train_acc[-1]}')
    with torch.no_grad():
        logits_test = model(X_test)
        loss = cross_entropy(logits_test, y_test.reshape(-1))
        test_loss_no_clip.append(loss.item())
        test_acc_no_clip.append(torch.mean((y_test.reshape(-1) == torch.argmax(logits_test, -1)).float()))
    print(f'Тестовый лосс: {test_loss[-1]}  точность:{test_acc[-1]}')
    loss_plot(fig,ax1,train_loss, test_loss_no_clip, "Кросс-энтропия")
    loss_plot(fig,ax2,train_acc, test_acc_no_clip, "Точность")

In [298]:
import os
class CharSampler():
    def __init__(self, char_vectors, model, filename):
        self.char_vectors = char_vectors
        self.model = model
        if os.path.exists(filename):
            os.remove(filename)
        self.filename = filename
        
    def sample(self, preds, temperature):
        #тут функция немного изменилась по сранению с книгой, надеюсь, не ошибся
        preds = preds.cpu().numpy().astype('float64')
        preds = preds / temperature
        exp_preds = np.exp(preds) 
        preds = exp_preds / np.sum(exp_preds)
        probas = np.random.multinomial(1, preds.reshape(-1), 1)
        #probas = preds
        
        return np.argmax(probas)
    
    def sample_one(self, T):
        with torch.no_grad():
            result = START_CHAR
            while len(result) < 300:
                X_sampled = np.zeros((1, len(result), num_chars))
                for t, c in enumerate(list(result)):
                    X_sampled[0,t, :] = self.char_vectors[c]
                X_sampled = torch.from_numpy(X_sampled).to(device).float()
                y_sampled = self.model(X_sampled)
                yv = y_sampled[len(result)-1:]
                selected_char = indices_to_chars[self.sample(yv, T)]
                if selected_char==END_CHAR:
                    break
                result = result + selected_char
            return result
    
    def on_epoch_end(self, epoch):
        if epoch % 20 == 0:
            print("Started sampling")
            with open(self.filename, 'a', encoding="utf-8") as outf:
                outf.write(f'\nЭпоха {epoch}\n')
                for T in [0.3, 0.5, 0.7, 0.9, 1.1]:
                    print(f'\tsampling, T = {T: .1f}')
                    for _ in range(5):
                        res = self.sample_one(T)
                        outf.write(f'\nT = {T: .1f}\n {res[1:]}\n')
                            

In [None]:
#на этой модели я не заметил разницы с нормой градиентов
model = ReqModel().to(device)
optimizer = torch.optim.Adam(model.parameters())
cross_entropy = torch.nn.CrossEntropyLoss()


%matplotlib notebook
fig = plt.figure(figsize=(8,10))
ax = fig.subplots(2,1)
fig.tight_layout(pad=3.0)
ax1, ax2 = ax

epochs = 101
train_loss = []
train_acc = []
test_loss_simple = []
test_acc_simple = []
sampler = CharSampler(char_vectors, model, 'simple.txt')
for e in range(epochs):
    train_loss_epoch = 0
    train_acc_epoch = 0
    generator = generate_batch()
    for i in range(int(len(sentences) / batch_size)):
        X, y = next(generator)
        X = torch.from_numpy(X).to(device).float()
        y = torch.from_numpy(y).to(device).long()
        logits = model(X)
        loss = cross_entropy(logits, y.reshape(-1))
        train_loss_epoch += loss.item()
        train_acc_epoch += torch.mean((y.reshape(-1) == torch.argmax(logits, -1)).float())
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
        optimizer.step()
        optimizer.zero_grad()
    train_loss_epoch /= int(len(sentences) / batch_size)
    train_acc_epoch /= int(len(sentences) / batch_size)
    train_loss.append(train_loss_epoch)
    train_acc.append(train_acc_epoch)
    print(f'Эпоха {e}')
    print(f'Тренировочный лосс: {train_loss[-1]}  точность:{train_acc[-1]}')
    with torch.no_grad():
        logits_test = model(X_test)
        loss = cross_entropy(logits_test, y_test.reshape(-1))
        test_loss_simple.append(loss.item())
        test_acc_simple.append(torch.mean((y_test.reshape(-1) == torch.argmax(logits_test, -1)).float()))
    print(f'Тестовый лосс: {test_loss_simple[-1]}  точность:{test_acc_simple[-1]}')
    loss_plot(fig,ax1,train_loss, test_loss_simple, "Кросс-энтропия")
    loss_plot(fig,ax2,train_acc, test_acc_simple, "Точность")
    sampler.on_epoch_end(e)



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Эпоха 0
Тренировочный лосс: 3.1866448262158564  точность:0.16681286692619324
Тестовый лосс: 4.2296624183654785  точность:0.04495524242520332
Started sampling
	sampling, T =  0.3
	sampling, T =  0.5
	sampling, T =  0.7
	sampling, T =  0.9
	sampling, T =  1.1
Эпоха 1
Тренировочный лосс: 2.6507953290378348  точность:0.24295692145824432
Тестовый лосс: 4.434423923492432  точность:0.04969309642910957
Эпоха 2
Тренировочный лосс: 2.492383592829985  точность:0.27468207478523254
Тестовый лосс: 4.011533737182617  точность:0.05593350529670715
Эпоха 3
Тренировочный лосс: 2.3974051037956685  точность:0.29907044768333435
Тестовый лосс: 2.977808952331543  точность:0.05969948694109917
Эпоха 4
Тренировочный лосс: 2.3226902731727153  точность:0.31694796681404114
Тестовый лосс: 2.1637611389160156  точность:0.6357736587524414
Эпоха 5
Тренировочный лосс: 2.2544123879600972  точность:0.33509117364883423
Тестовый лосс: 0.7658377885818481  точность:0.859098494052887
Эпоха 6
Тренировочный лосс: 2.19369553594028

Эпоха 57
Тренировочный лосс: 1.5675070681291468  точность:0.5240395069122314
Тестовый лосс: 0.3800201714038849  точность:0.889264702796936
Эпоха 58
Тренировочный лосс: 1.5639466813031364  точность:0.5255091190338135
Тестовый лосс: 0.38558775186538696  точность:0.8889961838722229
Эпоха 59
Тренировочный лосс: 1.5631523157568539  точность:0.5248302221298218
Тестовый лосс: 0.38102731108665466  точность:0.8888107538223267
Эпоха 60
Тренировочный лосс: 1.5592730642767514  точность:0.5262420177459717
Тестовый лосс: 0.3883562386035919  точность:0.8890025615692139
Started sampling
	sampling, T =  0.3
	sampling, T =  0.5
	sampling, T =  0.7
	sampling, T =  0.9
	sampling, T =  1.1
Эпоха 61
Тренировочный лосс: 1.5578877247081084  точность:0.5261783003807068
Тестовый лосс: 0.3815257251262665  точность:0.889271080493927
Эпоха 62
Тренировочный лосс: 1.5546986150741577  точность:0.5277594327926636
Тестовый лосс: 0.39694318175315857  точность:0.8878836035728455
Эпоха 63
Тренировочный лосс: 1.55315841394

In [306]:
class LSTM3Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm1 = nn.LSTM(num_chars, 128, batch_first = True)
        self.lstm2 = nn.LSTM(num_chars + 128, 128, batch_first = True)
        self.lstm3 = nn.LSTM(num_chars + 128, 128, batch_first = True)
        self.tanh = nn.Tanh()
        self.drop = nn.Dropout(0.2)
        self.dense = nn.Linear(128 * 3, num_chars)
        
    def forward(self, X):
        out1, _ = self.lstm1(X)
        out1 = self.tanh(out1)
        out1 = self.drop(out1)
        out2, _ = self.lstm2(torch.cat([X, out1], 2))
        out2 = self.tanh(out2)
        out2 = self.drop(out2)
        out3, _ = self.lstm3(torch.cat([X, out2], 2))
        out3 = self.tanh(out3)
        out3 = self.drop(out3)
        out = torch.cat([out1, out2, out3], 2)
        out = torch.reshape(out, (-1, 128 * 3))
        out = self.dense(out)
        #X = self.softmax(X)
        return out

In [307]:
#на этой модели я не заметил разницы с нормой градиентов
model = LSTM3Model().to(device)
optimizer = torch.optim.Adam(model.parameters())
cross_entropy = torch.nn.CrossEntropyLoss()


%matplotlib notebook
fig = plt.figure(figsize=(8,10))
ax = fig.subplots(2,1)
fig.tight_layout(pad=3.0)
ax1, ax2 = ax

epochs = 101
train_loss = []
train_acc = []
test_loss = []
test_acc = []
sampler = CharSampler(char_vectors, model, 'stacked_with_skip.txt')
for e in range(epochs):
    train_loss_epoch = 0
    train_acc_epoch = 0
    generator = generate_batch()
    for i in range(int(len(sentences) / batch_size)):
        X, y = next(generator)
        X = torch.from_numpy(X).to(device).float()
        y = torch.from_numpy(y).to(device).long()
        logits = model(X)
        loss = cross_entropy(logits, y.reshape(-1))
        train_loss_epoch += loss.item()
        train_acc_epoch += torch.mean((y.reshape(-1) == torch.argmax(logits, -1)).float())
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
        optimizer.step()
        optimizer.zero_grad()
    train_loss_epoch /= int(len(sentences) / batch_size)
    train_acc_epoch /= int(len(sentences) / batch_size)
    train_loss.append(train_loss_epoch)
    train_acc.append(train_acc_epoch)
    print(f'Эпоха {e}')
    print(f'Тренировочный лосс: {train_loss[-1]}  точность:{train_acc[-1]}')
    with torch.no_grad():
        logits_test = model(X_test)
        loss = cross_entropy(logits_test, y_test.reshape(-1))
        test_loss.append(loss.item())
        test_acc.append(torch.mean((y_test.reshape(-1) == torch.argmax(logits_test, -1)).float()))
    print(f'Тестовый лосс: {test_loss[-1]}  точность:{test_acc[-1]}')
    loss_plot(fig,ax1,train_loss, test_loss, "Кросс-энтропия")
    loss_plot(fig,ax2,train_acc, test_acc, "Точность")
    sampler.on_epoch_end(e)



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Эпоха 0
Тренировочный лосс: 2.839977298624375  точность:0.21576911211013794
Тестовый лосс: 4.111471176147461  точность:0.0532161109149456
Started sampling
	sampling, T =  0.3
	sampling, T =  0.5
	sampling, T =  0.7
	sampling, T =  0.9
	sampling, T =  1.1
Эпоха 1
Тренировочный лосс: 2.3581260193095486  точность:0.30353787541389465
Тестовый лосс: 0.7555564641952515  точность:0.8599552512168884
Эпоха 2
Тренировочный лосс: 2.1878455669739667  точность:0.34920865297317505
Тестовый лосс: 0.45273202657699585  точность:0.868548572063446
Эпоха 3
Тренировочный лосс: 2.0634936921736773  точность:0.3832738399505615
Тестовый лосс: 0.4283387064933777  точность:0.8742391467094421
Эпоха 4
Тренировочный лосс: 1.9675043798895444  точность:0.4104003608226776
Тестовый лосс: 0.4097271263599396  точность:0.8789705634117126
Эпоха 5
Тренировочный лосс: 1.8886413013233858  точность:0.4326542615890503
Тестовый лосс: 0.3930443227291107  точность:0.8826982378959656
Эпоха 6
Тренировочный лосс: 1.8243494255402508  

Эпоха 57
Тренировочный лосс: 1.2488006006970125  точность:0.6103459596633911
Тестовый лосс: 0.3497077524662018  точность:0.8987787961959839
Эпоха 58
Тренировочный лосс: 1.2457782333037433  точность:0.6108003258705139
Тестовый лосс: 0.34723082184791565  точность:0.8984654545783997
Эпоха 59
Тренировочный лосс: 1.2424870607432197  точность:0.6118929982185364
Тестовый лосс: 0.3492526412010193  точность:0.8979284167289734
Эпоха 60
Тренировочный лосс: 1.2382978489819696  точность:0.6138525605201721
Тестовый лосс: 0.3470187485218048  точность:0.8983056545257568
Started sampling
	sampling, T =  0.3
	sampling, T =  0.5
	sampling, T =  0.7
	sampling, T =  0.9
	sampling, T =  1.1
Эпоха 61
Тренировочный лосс: 1.2352123776604147  точность:0.6141741275787354
Тестовый лосс: 0.3472181260585785  точность:0.8985486030578613
Эпоха 62
Тренировочный лосс: 1.2332307192858527  точность:0.6139135360717773
Тестовый лосс: 0.34649237990379333  точность:0.898638129234314
Эпоха 63
Тренировочный лосс: 1.23024022060

In [None]:
%matplotlib notebook
fig = plt.figure(figsize=(8,10))
ax1 = fig.plot()
fig.tight_layout(pad=3.0)

train_line = ax1.plot(test_acc_simple, color = 'black')
test_line = ax1.plot(test_acc, color = 'red')
ax1.set_xlabel('Эпоха')
ax1.set_ylabel('Точность')
ax1.legend(('1 слой', '3слоя'))
fig.canvas.draw()
