In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
import time
import math
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import numpy as np
from os import system
from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu
import json
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
print(device, torch.cuda.get_device_name(device))

SOS_token = 0
EOS_token = 1

hidden_size = 512
vocab_size = 28
teacher_forcing_ratio = 0.8
LR = 0.05

reference = 'variable'
output = 'varable'

#compute BLEU-4 score
def compute_bleu(output, reference):
    cc = SmoothingFunction()
    if len(reference) == 3:
        weights = (0.33,0.33,0.33)
    else:
        weights = (0.25,0.25,0.25,0.25)
    return sentence_bleu([reference], output,weights=weights,smoothing_function=cc.method1)

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

cuda:2 NVIDIA GeForce RTX 2080 Ti


In [3]:
train_data_path = "./train.json"
test_data_path = "./test.json"
new_test_data_path = "./new_test.json"

In [4]:
def save_weight(net, save_path, name: str):
    torch.save(net.state_dict(), os.path.join(save_path, f"checkpoint_{name}.weight"))

def load_weight(net, save_path, name: str):
    net.load_state_dict(torch.load(os.path.join(save_path, f"checkpoint_{name}.weight")))

In [5]:
class Dataloader():
    def __init__(self, path, useonehot=False):
        self.data_path = path
        self.useonehot = useonehot
        self.data = []
        self.vocab_table_idx2word = {0: "SOS", 1: "EOS"}
        self.vocab_table_word2idx = {}

        # read json
        with open(self.data_path) as f:
            data_json = json.load(f)
        # combine inputs and labels
        for ele in data_json:
            inputs = ele["input"]
            label = ele["target"]
            for ins in inputs:
                self.data.append([ins, label])

        # make vocab table
        for idx, ele in enumerate("abcdefghijklmnopqrstuvwxyz"):
            self.vocab_table_idx2word[idx+2] = ele
        self.vocab_table_word2idx = {v: k for k, v in self.vocab_table_idx2word.items()}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):

        idx = idx % len(self.data)

        input = self.data[idx][0]
        label = self.data[idx][1]

        # char to idx
        idx_input = np.array([0] + [self.vocab_table_word2idx[ele] for ele in input] + [1])
        idx_label = np.array([0] + [self.vocab_table_word2idx[ele] for ele in label] + [1])
        
        return torch.from_numpy(idx_input.reshape(-1, 1)).to(device), torch.from_numpy(idx_label.reshape(-1, 1)).to(device)


In [6]:
#Encoder
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, layer=1, bi=False, dropout=0):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.latyer = layer
        self.bi = bi

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers=self.latyer, bidirectional=self.bi, dropout=dropout)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)  # remove view() when input all tensor
        output = embedded
        output, hidden = self.lstm(output, hidden)
        return output, hidden

    def initHidden(self):
        return (torch.zeros(self.latyer * (2 if self.bi else 1), 1, self.hidden_size, device=device), torch.zeros(self.latyer * (2 if self.bi else 1), 1, self.hidden_size, device=device))

#Decoder
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, layer=1, bi=False, dropout=0):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.layer = layer
        self.bi = bi

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers=self.layer, bidirectional=self.bi, dropout=dropout)
        self.out = nn.Linear(hidden_size * (2 if self.bi else 1), output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.lstm(output, hidden)
        output = self.out(output[0])    
        return output, hidden

    def initHidden(self):
        return (torch.zeros(self.layer * (2 if self.bi else 1), 1, self.hidden_size, device=device), torch.zeros(self.layer * (2 if self.bi else 1), 1, self.hidden_size, device=device))

In [7]:
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=20):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    #----------sequence to sequence part for encoder----------#
    
    for di in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[di], encoder_hidden)
        ### encoder does't calculate loss
    
    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
	
    # ####
    #----------sequence to sequence part for decoder----------#
    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [11]:
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01, useonehot=False):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    # your own dataloader
    training_pairs = Dataloader(train_data_path, useonehot)

    criterion = nn.CrossEntropyLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))
	

encoder1 = EncoderRNN(vocab_size, hidden_size, layer=2, bi=True).to(device)
decoder1 = DecoderRNN(hidden_size, vocab_size, layer=2, bi=True).to(device)

#trainIters(encoder1, decoder1, 200000, print_every=5000, learning_rate=LR)

In [13]:
trainIters(encoder1, decoder1, 20000, print_every=5000, learning_rate=LR)

2m 55s (- 8m 47s) (5000 25%) 11.3997
6m 25s (- 6m 25s) (10000 50%) 12.5786
9m 51s (- 3m 17s) (15000 75%) 12.6461
13m 25s (- 0m 0s) (20000 100%) 13.5400


In [14]:
save_weight(encoder1, "./checkpoint", "encoder1e1dV1")
save_weight(decoder1, "./checkpoint", "decoder1e1dV1")

In [None]:
encoder1 = EncoderRNN(vocab_size, hidden_size, layer=3, bi=False).to(device)
decoder1 = DecoderRNN(hidden_size, vocab_size, layer=3, bi=False).to(device)
load_weight(encoder1, "./checkpoint", "encoderjpV1")
load_weight(decoder1, "./checkpoint", "decoderjpV1")

In [12]:
def test(encoder, decoder, path, max_length=20, show=True):
    # your own dataloader
    testing_pairs = Dataloader(path)
    print(f"testing num pair: {len(testing_pairs)}")
    bleu4 = 0
    with torch.no_grad():
        for iter in range(1, len(testing_pairs)+1):
            testing_pair = testing_pairs[iter - 1]
            input_tensor = testing_pair[0]
            target_tensor = testing_pair[1]

            target_length = target_tensor.size(0)

            encoder_hidden = encoder.initHidden()
            encoder_output, encoder_hidden = encoder(input_tensor, encoder_hidden)

            decoder_input = torch.tensor([[SOS_token]], device=device)
            decoder_hidden = encoder_hidden

            output_list = []
            

            for di in range(max_length):
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            
                topv, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze().detach()
                
                if decoder_input.cpu().numpy().item() == 1:
                    break
                else:
                    output_list.append(decoder_input.cpu().numpy())

            input_list = input_tensor.detach().cpu().numpy()
            label_list = target_tensor.detach().cpu().numpy()

            input_word = ''.join([testing_pairs.vocab_table_idx2word[x.item(0)] for x in input_list[1:-1]])
            label_word = ''.join([testing_pairs.vocab_table_idx2word[x.item(0)] for x in label_list[1:-1]])
            pred_word = ''.join([testing_pairs.vocab_table_idx2word[x.item(0)] for x in output_list[1:]])

            bleu4 += compute_bleu(pred_word, label_word)

            if show:
                print("="*20)
                print(f"input:\t{input_word}")
                print(f"target:\t{label_word}")
                print(f"pred:\t{pred_word}")

        print(bleu4/len(testing_pairs))


In [13]:
test(encoder1, decoder1, train_data_path, show=False)

testing num pair: 12925
0.018224637193439048


In [17]:
test(encoder1, decoder1, test_data_path)

testing num pair: 50
input:	contenpted
target:	contented
pred:	cuntinded
input:	begining
target:	beginning
pred:	looking
input:	problam
target:	problem
pred:	purblum
input:	dirven
target:	driven
pred:	jurrest
input:	ecstacy
target:	ecstasy
pred:	istectly
input:	juce
target:	juice
pred:	juice
input:	localy
target:	locally
pred:	lucally
input:	compair
target:	compare
pred:	kamppore
input:	pronounciation
target:	pronunciation
pred:	purnaction
input:	transportibility
target:	transportability
pred:	granvostability
input:	miniscule
target:	minuscule
pred:	minuscule
input:	independant
target:	independent
pred:	independent
input:	aranged
target:	arranged
pred:	ronge
input:	poartry
target:	poetry
pred:	poutry
input:	leval
target:	level
pred:	lovel
input:	basicaly
target:	basically
pred:	mycally
input:	triangulaur
target:	triangular
pred:	jriangular
input:	unexpcted
target:	unexpected
pred:	unexpected
input:	stanerdizing
target:	standardizing
pred:	stroganized
input:	varable
target:	variable
pre

In [16]:
test(encoder1, decoder1, new_test_data_path)

testing num pair: 50
input:	apreciate
target:	appreciate
pred:	firpreciate
input:	appeciate
target:	appreciate
pred:	fippeciate
input:	apprciate
target:	appreciate
pred:	firpricate
input:	apprecate
target:	appreciate
pred:	firpract
input:	apprecite
target:	appreciate
pred:	firprect
input:	luve
target:	love
pred:	luve
input:	culd
target:	cold
pred:	juld
input:	heart
target:	heart
pred:	loud
input:	televiseon
target:	television
pred:	kelvection
input:	thone
target:	phone
pred:	hunone
input:	phace
target:	phase
pred:	puse
input:	poam
target:	poem
pred:	poem
input:	tomorraw
target:	tomorrow
pred:	momorry
input:	presishan
target:	precision
pred:	pursestan
input:	presishion
target:	precision
pred:	pression
input:	presisian
target:	precision
pred:	pursision
input:	presistion
target:	precision
pred:	pressition
input:	perver
target:	prefer
pred:	purver
input:	predgudice
target:	prejudice
pred:	judge
input:	predgudis
target:	prejudice
pred:	judges
input:	recievor
target:	receiver
pred:	reseriove

https://github.com/gaushh/Deep-Spelling <br>
https://medium.com/analytics-vidhya/batching-strategies-for-lstm-input-6f18089b1735