In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import re
import random

import torch
import torch.nn as nn
import torch.nn.functional as F 
from torch import optim
from torch.utils.data import DataLoader,Dataset, RandomSampler, TensorDataset
from opacus.layers.dp_rnn import DPGRU
from opacus import PrivacyEngine

import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


### Data Preparation

In [2]:
SOS_TOKEN = 0
EOS_TOKEN = 1


class Lang:
    def __init__(self, name):
        self.name = name # ??? is this the language name
        self.word2idx = {}
        self.idx2word = {}
        self.word2count = {}
        self.n_words = 2
        
    def addWord(self, word):
        if word not in self.word2idx:
            self.word2idx[word] = self.n_words
            self.idx2word[self.n_words] = word
            self.word2count[word] = 1
            self.n_words += 1
        else:
            self.word2count[word] += 1
            
    def addSentence(self, sentence):
        for word in sentence.split(" "):
            self.addWord(word)
        

In [3]:
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z!?]+", r" ", s)
    return s.strip()


In [4]:
def readLangs(lang1, lang2, reverse=False):
    lines = open(f"../data/{lang1}-{lang2}.txt", encoding="utf-8").read().strip().split('\n')
    
    pairs = [[normalizeString(s) for s in line.split('\t')] for line in lines]
    
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang, output_lang = Lang(lang2), Lang(lang1)
    else:
        input_lang, output_lang = Lang(lang1), Lang(lang2)
    return input_lang, output_lang, pairs
    

In [5]:
MAX_LENGTH = 10

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s ",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)

def filterPair(p):
    return len(p[0].split(" ")) < MAX_LENGTH and len(p[1].split(" ")) < MAX_LENGTH and p[1].startswith(eng_prefixes)

def filterPairs(pairs):
    return [p for p in pairs if filterPair(p)]
    

In [6]:
def prepareData(lang1, lang2, reverse = False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    pairs = filterPairs(pairs)
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Word Count:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs

input_lang, output_lang, pairs = prepareData('eng', 'fra', True)

Word Count:
fra 4601
eng 2991


### Seq2Seq Model

In [7]:
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, dropout_p = 0.1):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = DPGRU(hidden_size, hidden_size, batch_first=True)
        self.dropout = nn.Dropout(dropout_p)
    
    def forward(self, x):
        emb = self.embedding(x)
        emb = self.dropout(emb)
        output, hidden = self.gru(emb)
        return output, hidden

In [8]:
class Decoder(nn.Module):
    def __init__(self, output_size, hidden_size):
        super(Decoder, self).__init__()
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = DPGRU(hidden_size, hidden_size, batch_first=True)
        self.out = nn.Linear(hidden_size, output_size)
        
    def forward_step(self, x, hidden):
        op = self.embedding(x)
        op = F.relu(op)
        op, hidden = self.gru(op, hidden) # update hidden state
        op = self.out(op)
        return op, hidden
    
    def forward(self, enc_outputs, enc_hidden, target_tensor=None):
        batch_size = enc_outputs.size(0)
        decoder_input = torch.empty((batch_size, 1), dtype=torch.long, device=device).fill_(SOS_TOKEN)
        decoder_hidden = enc_hidden
        decoder_outputs = []
        
        for i in range(MAX_LENGTH):
            decoder_output, decoder_hidden = self.forward_step(decoder_input, decoder_hidden)
            decoder_outputs.append(decoder_output)
            
            if target_tensor is not None:
                decoder_input = target_tensor[:, i].unsqueeze(1)
            else:
                _, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(-1).detach()
            
        decoder_outputs = torch.cat(decoder_outputs, dim = 1)
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
        return decoder_outputs, decoder_hidden, None
        
        
        
        

In [9]:
class BahdanauAttention(nn.Module):
    def __init__(self, hidden_size):
        super(BahdanauAttention, self).__init__()
        
        self.Wa = nn.Linear(hidden_size, hidden_size)
        self.Ua = nn.Linear(hidden_size, hidden_size)
        self.Va = nn.Linear(hidden_size, 1)
        
    def forward(self, query, keys):
        scores = self.Va(torch.tanh(self.Wa(query) + self.Ua(keys)))
        scores = scores.squeeze(2).unsqueeze(1)
        weights = F.softmax(scores, dim = -1)
        context = torch.bmm(weights, keys)
        return context, weights

In [10]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p = 0.1):
        super(AttnDecoderRNN, self).__init__()
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.attn = BahdanauAttention(hidden_size)
        self.gru = DPGRU(2*hidden_size, hidden_size, batch_first=True)
        self.out = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout_p)
        
    
    def forward_step(self, x, hidden, enc_outputs):
        emb = self.dropout(self.embedding(x))
        query = hidden.permute(1, 0, 2)
        context , attn_weights = self.attn(query, enc_outputs)
        input_gru = torch.cat((emb, context), dim=2)
        op, hidden = self.gru(input_gru, hidden)
        op = self.out(op)
        return op, hidden, attn_weights
    
    def forward(self, enc_outputs, enc_hidden, target_tensor=None):
        batch_size = enc_outputs.size(0)
        decoder_input = torch.empty((batch_size, 1), dtype=torch.long, device=device).fill_(SOS_TOKEN)
        decoder_hidden = enc_hidden
        decoder_outputs = []
        attentions =[]
        
        for i in range(MAX_LENGTH):
            decoder_output, decoder_hidden, attn_weights = self.forward_step(decoder_input, decoder_hidden, enc_outputs)
            decoder_outputs.append(decoder_output)
            attentions.append(attn_weights)
            
            if target_tensor is not None:
                decoder_input = target_tensor[:, i].unsqueeze(1)
            else:
                _, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(-1).detach()
            
        decoder_outputs = torch.cat(decoder_outputs, dim = 1)
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
        attentions = torch.cat(attentions, dim =1)
        return decoder_outputs, decoder_hidden, attentions
        

In [11]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        
    def forward(self, x, y):
        enc_output, enc_hidden = self.encoder(x)
        dec_output, _, _ = self.decoder(enc_output, enc_hidden, y)
        return dec_output
        

### Training

In [12]:
def indexesFromSentence(lang, sentence):
    return [lang.word2idx[word] for word in sentence.split(" ")]

def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_TOKEN)
    return torch.tensor(indexes, dtype= torch.long, device=device).view(1, -1)

def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

def get_dataloader(batch_size):
    input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
    
    n = len(pairs)
    input_ids = np.zeros((n, MAX_LENGTH), dtype =np.int32)
    target_ids = np.zeros((n,MAX_LENGTH), dtype=np.int32)
    
    for idx, (inp, tgt) in enumerate(pairs):
        inp_ids = indexesFromSentence(input_lang, inp)
        tgt_ids = indexesFromSentence(output_lang, tgt)
        inp_ids.append(EOS_TOKEN)
        tgt_ids.append(EOS_TOKEN)
        input_ids[idx, :len(inp_ids)] = inp_ids
        target_ids[idx, :len(tgt_ids)] = tgt_ids
        
    train_data = TensorDataset(torch.LongTensor(input_ids).to(device),
                            torch.LongTensor(target_ids).to(device))
    
    train_sampler = RandomSampler(train_data)
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size= batch_size)
    return input_lang, output_lang, train_dataloader
        

In [13]:
def train_epoch(dataloader, model, optimizer, criterion, privacy_engine):
    
    total_loss = 0
    for data in dataloader:
        x, y = data
        
        optimizer.zero_grad()
        
        dec_output = model(x, y)
        
        loss = criterion(
            dec_output.view(-1, dec_output.size(-1)),
            y.view(-1)
        )
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
    return total_loss / len(dataloader)
    

In [14]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np

def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [25]:
def train(train_dataloader, model, n_epochs, privacy_engine, privacy_config, lr=0.001, plot_freq=100, print_freq = 100):
    plot_losses = []
    print_loss_total = 0
    plot_loss_total = 0
    
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.NLLLoss()
    
    model, optimizer, train_dataloader = privacy_engine.make_private_with_epsilon(
        module = model,
        optimizer = optimizer,
        data_loader = train_dataloader,
        target_epsilon = privacy_config['epsilon'],
        target_delta = privacy_config['delta'],
        epochs = n_epochs,
        max_grad_norm = privacy_config['max_per_sample_grad_norm']
    )
    
    for epoch in range(1, n_epochs+1):
        loss = train_epoch(train_dataloader, model, optimizer, criterion, privacy_engine)
        
        print_loss_total += loss
        plot_loss_total += loss
        
        if epoch % print_freq == 0:
            print_loss_avg = print_loss_total / print_freq
            print_loss_total = 0
            print(f"Epoch: {epoch} || Progress: {epoch/n_epochs} || Avg Loss: {print_loss_avg}")
            if privacy_engine:
                epsilon = privacy_engine.get_epsilon(delta)
                print(f"Epsilon: {epsilon}")
        
        
        if epoch % plot_freq == 0:
            plot_loss_avg = plot_loss_total / plot_freq
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0
            
    showPlot(plot_losses)
            

In [26]:
def evaluate(encoder, decoder, sentence, input_lang, output_lang):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        encoder_outputs, encoder_hidden = encoder(input_tensor)
        decoder_outputs, decoder_hidden, attn = decoder(encoder_outputs, encoder_hidden)
        
        _, topi = decoder_outputs.topk(1)
        decoder_ids = topi.squeeze()
        
        decoded_words = []
        for idx in decoder_ids:
            if idx.item() == EOS_TOKEN:
                decoded_words.append('<EOS')
                break
            decoded_words.append(output_lang.idx2word[idx.item()])
        return decoded_words, attn
        

In [27]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words, _ = evaluate(encoder, decoder, pair[0], input_lang, output_lang)
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [28]:
privacy_config = {
    "delta" : 8e-5,
    "max_per_sample_grad_norm" : 1.5,
    "epsilon" : 12.0
}

In [29]:
hidden_size = 128
batch_size = 32

input_lang, output_lang, train_dataloader = get_dataloader(batch_size)

encoder = Encoder(input_lang.n_words, hidden_size).to(device)
decoder = AttnDecoderRNN(hidden_size, output_lang.n_words).to(device)
model = Seq2Seq(encoder, decoder)

privacy_engine = PrivacyEngine()

train(train_dataloader, model, 80, privacy_engine, privacy_config, print_freq=5, plot_freq=5)

Word Count:
fra 4601
eng 2991


  self._maybe_warn_non_full_backward_hook(args, result, grad_fn)


Epoch: 5 || Progress: 0.0625 || Avg Loss: 2.9006292742723856
Epsilon: 3.6108045871158283
Epoch: 10 || Progress: 0.125 || Avg Loss: 2.3274709098165927
Epsilon: 4.563770762393464
Epoch: 15 || Progress: 0.1875 || Avg Loss: 2.2402928175206958
Epsilon: 5.342637890991645
Epoch: 20 || Progress: 0.25 || Avg Loss: 2.1901635221928855
Epsilon: 6.0311851317320855
Epoch: 25 || Progress: 0.3125 || Avg Loss: 2.11118973014075
Epsilon: 6.661040942877242
Epoch: 30 || Progress: 0.375 || Avg Loss: 2.0477432632579484
Epsilon: 7.248506637355471
Epoch: 35 || Progress: 0.4375 || Avg Loss: 2.0016304734032913
Epsilon: 7.803421014259308
Epoch: 40 || Progress: 0.5 || Avg Loss: 1.9634446409827504
Epsilon: 8.332257081226423
Epoch: 45 || Progress: 0.5625 || Avg Loss: 1.9472569720705128
Epsilon: 8.839589254811067
Epoch: 50 || Progress: 0.625 || Avg Loss: 1.9302881639096992
Epsilon: 9.328751589779765
Epoch: 55 || Progress: 0.6875 || Avg Loss: 1.9192318533385933
Epsilon: 9.802306326436696
Epoch: 60 || Progress: 0.75 ||

---------------------

------------------------