In [1]:
import time
import math
import random
import os
from itertools import zip_longest

import numpy as np

import pickle as pkl
import gzip

import torch
import torch.nn as nn
from torch.utils.data import Dataset
import torch.nn.functional as F

from torch import optim
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

from utils import asMinutes, timeSince, load_zipped_pickle, corpus_bleu, directories
from langUtils import loadLangPairs, langDataset, langCollateFn, initHybridEmbeddings, tensorToList

import matplotlib.pyplot as plt
plt.switch_backend('agg')
import seaborn as sns; sns.set()
sns.set_style("darkgrid")
sns.set_context("paper")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()

In [2]:
data_dir, em_dir = directories()

SPECIAL_SYMBOLS_ID = PAD_ID, UNK_ID, SOS_ID, EOS_ID = 0, 1, 2, 3
NUM_SPECIAL = len(SPECIAL_SYMBOLS_ID)

vi, en = loadLangPairs("vi")

class SortedList(list):
    def insort(self, x):
        bisect.insort(self, x)

In [3]:
BATCH_SIZE = 64

train_dataset = langDataset([(vi.train_num[i], en.train_num[i]) for i in range(len(vi.train_num)) if (len(vi.train[i]) < vi.max_length) & (len(en.train[i]) < en.max_length)])
overfit_dataset = langDataset([(vi.train_num[i], en.train_num[i]) for i in range(64)])
train_loader = torch.utils.data.DataLoader(dataset=overfit_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=langCollateFn,
                                           shuffle=False)
dev_dataset = langDataset([(vi.dev_num[i], en.dev_num[i]) for i in range(len(vi.dev_num)) if (len(vi.dev[i]) < vi.max_length) & (len(en.dev[i]) < en.max_length)])
dev_loader = torch.utils.data.DataLoader(dataset=dev_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=langCollateFn,
                                           shuffle=True)

In [4]:
class EncoderRNN(nn.Module):
    """
        Encoder RNN
        Input
            params - Dictionary of paramter
            raw_emb - (100,000, 300) raw embeddings
            learn_ids - list of ids to do embedding learning
            
            inp - (Max Length, Batch Size), original inputs
            inp_lens - (Batch Size), true length of inputs
        Output
            output - (Max Length, Batch Size, Hidden Size), GRU output
            hidden - (2, Batch Size, Hidden Size), Final hidden state of GRU
    """
    def __init__(self, params, raw_emb, learn_ids):
    
        super(EncoderRNN, self).__init__()
        
        self.hidden_size = params['hidden_size']
        self.n_layers = params['n_layers']
        
        self.embedding = initHybridEmbeddings(raw_emb, learn_ids)
        self.gru = nn.GRU(self.embedding.embedding_dim, params['hidden_size'], self.n_layers, bidirectional=True)
        
    def forward(self, inp, inp_lens):
        #Embed input
        embedded = self.embedding(inp)
        #Pack padded
        packed = pack_padded_sequence(embedded, inp_lens).to(device)
        
        #GRU
        output, self.hidden = self.gru(packed)
        #Pad packed
        output, _ = pad_packed_sequence(output)
        #Concat bidirectional layers
        output = output[:, :, :self.hidden_size] + output[:, : ,self.hidden_size:]
        return output, self.hidden
    
class DecoderRNN(nn.Module):
    """
        Decoder RNN
        Input
            params - Dictionary of paramter
            raw_emb - (100,000, 300) raw embeddings
            learn_ids - list of ids to do embedding learning
            
            inp - (1, Batch Size), SOS Token each in batch size
            hidden - (2, 1, Hidden Size), Prev hidden size
        Output
            output - (32, Vocab Size), Probabilities
            hidden - (2, 1 Size, Hidden Size), Final hidden state of GRU
    """
    def __init__(self, params, raw_emb, learn_ids):
        super(DecoderRNN, self).__init__()
        self.hidden_size = params['hidden_size']
        self.n_layers = params['n_layers']
        self.output_size = params['output_size']

        self.embedding = initHybridEmbeddings(raw_emb, learn_ids)
        self.gru = nn.GRU(self.embedding.embedding_dim, params['hidden_size'], self.n_layers, bidirectional=True)
        self.out = nn.Linear(self.hidden_size, self.output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, inp, hidden, encoder_output=None):
        #Embed
        embedded = self.embedding(inp)
        #Dropout
        output = F.relu(embedded)
        
        #Gru
        output, self.hidden = self.gru(output, hidden)
        #Concat directions
        output = output[:, :, :self.hidden_size] + output[:, : ,self.hidden_size:]
        #GRU output to probabilities
        output = torch.exp(self.softmax(self.out(output))).squeeze(0)
        return output, hidden

def maskedLoss(inp, target, mask):
    """Masked Loss"""
    #Total # real words
    nTotal = mask.sum()
    #Cross entropy
    crossEntropy = -torch.log(torch.gather(inp, 1, target.view(-1, 1).to(device)))
    #Select loss of real words
    loss = crossEntropy.masked_select(mask.to(device)).mean()
    loss = loss.to(device)
    return loss, nTotal.item()

In [5]:
class Attn(torch.nn.Module):
    def __init__(self, params):
        super(Attn, self).__init__()
        self.hidden_size = params['hidden_size']
           
    def forward(self, hidden, encoder_output):
        #h_t.T.dot(h_s)
        attn_energies = torch.sum(hidden * encoder_output, dim=2)

        return F.softmax(attn_energies.t(), dim=1).unsqueeze(1)

class AttnDecoderRNN(nn.Module):
    def __init__(self, params, raw_emb, learn_ids):
        super(AttnDecoderRNN, self).__init__()

        # Keep for reference
        self.hidden_size = params['hidden_size']
        self.output_size = params['output_size']
        self.n_layers = params['n_layers']
        self.dropout = params['dropout']

        # Define layers
        self.embedding = initHybridEmbeddings(raw_emb, learn_ids)
        self.embed_dropout = nn.Dropout(self.dropout)
        self.gru = nn.GRU(self.embedding.embedding_dim, self.hidden_size, self.n_layers, bidirectional=True)
        self.concat = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

        self.attn = Attn({'hidden_size':self.hidden_size})

    def forward(self, inp, hidden, encoder_output):
        #Embedding and dropout
        embedded = self.embedding(inp)
        output = self.embed_dropout(embedded)
        
        #GRU
        rnn_out, hidden = self.gru(output, hidden)
        rnn_out = rnn_out[:, :, :self.hidden_size] + rnn_out[:, : ,self.hidden_size:]

        #Attn weights * enc_output
        attn_weights = self.attn(rnn_out, encoder_output)
        context = attn_weights.bmm(encoder_output.transpose(0, 1))
        
        #Concat context to GRU output     
        rnn_out = rnn_out.squeeze(0)

        context = context.squeeze(1)
        concat_inp = torch.cat((rnn_out, context), 1)
        concat_out = torch.tanh(self.concat(concat_inp))
        
        #Prediction
        output = self.out(concat_out)
        output = F.softmax(output, dim=1)
        
        return output, hidden

In [6]:
class Model():
    def __init__(self, encoder, decoder, encoder_optim, decoder_optim):
        
        self.encoder = encoder
        self.decoder = decoder
        
        self.encoder_optim = encoder_optim
        self.decoder_optim = decoder_optim
        
    def fit(self, train_data, dev_data, n_epoch, print_every, n_grams):
        start = time.time()

        self.n_epoch = n_epoch
        
        
        print("Initializing...")
        start_epoch = 1
        print_loss_total, plot_loss_total = 0 , 0
        plot_losses, plot_train_scores, plot_dev_scores = [], [], []
        
        for epoch in range(start_epoch, n_epoch):
            for i, (inp, inp_lens, output, out_mask, out_max) in enumerate(train_loader):
                loss = self.trainEpoch(inp, inp_lens, output, out_mask, out_max)
                
                print_loss_total += loss
                plot_loss_total += loss
                    
                if i % print_every == 0:
                    train_score = self.bleuScore(train_loader, n_grams)
                    dev_score = self.bleuScore(dev_loader, n_grams)
                    plot_train_scores.append(train_score)
                    plot_dev_scores.append(dev_score)
                    plot_loss_avg = plot_loss_total / print_every
                    plot_losses.append(plot_loss_avg)
                    plot_loss_total = 0       

                    print_loss_avg = print_loss_total / print_every
                    print_loss_total = 0
                    print("Epoch:{} | Time Elapsed:{} | Percent Complete:{:.1} | Loss:{:.4} | TrainScore:{:.4} | DevScore:{:.4}".format(epoch,
                                                                                                                                  timeSince(start, epoch/n_epoch), 
                                                                                                                                  epoch/n_epoch*100, 
                                                                                                                                  print_loss_avg, 
                                                                                                                                  train_score,
                                                                                                                                  dev_score))                

        self.plot_losses = plot_losses
        self.plot_train_scores = plot_train_scores
        self.plot_dev_scores = plot_dev_scores
        return "Training Complete!"            
            
    def trainEpoch(self, inp, inp_lens, output, out_mask, out_max):
        #Zero gradients
        self.encoder_optim.zero_grad()
        self.decoder_optim.zero_grad()

        #Loss vars
        loss, print_losses, n_totals = 0, [], 0
        
        #Encoder Forward
        encoder_output, encoder_hidden = self.encoder(inp, inp_lens)
        
        #Init decoder_input
        decoder_input = torch.LongTensor([[SOS_ID for _ in range(inp.size(1))]]).to(device)
        decoder_hidden = encoder_hidden[:self.decoder.n_layers* (2 *self.decoder.gru.bidirectional)]

        #Teacher Forcing
        for t in range(out_max):
            #Decoder Forward
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_output)
            #True Output
            decoder_input = output[t].view(1, -1)
                
            mask_loss, nTotal = maskedLoss(decoder_output, output[t], out_mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal
                
        loss.backward()
        
        self.encoder_optim.step()
        self.decoder_optim.step()

        return sum(print_losses) / n_totals
    
    def bleuScore(self, data_loader, n_grams):
        with torch.no_grad():
            
            true_outputs = []
            decoder_outputs = []

            for i, (inp, inp_lens, out, out_mask, out_max) in enumerate(data_loader):
                if i * BATCH_SIZE > 10000:
                    break

                #Save true masked outputs
                true_outputs += [out[:,i].masked_select(out_mask[:,i]).tolist() for i in range(out.size(1))]
                
                #Encoder forward
                encoder_output, encoder_hidden = self.encoder(inp, inp_lens)
                
                #Decoder inputs
                decoder_input = torch.LongTensor([[SOS_ID] * inp.size(1)]).to(device)
                decoder_hidden = encoder_hidden[:self.decoder.n_layers* (2 *self.decoder.gru.bidirectional)]

                #Decoder Results
                batch_output = decoder_input.clone()
                
                for t in range(out_max):
                    #Decoder forward
                    decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_output)
                    _, topi = decoder_output.topk(1)
                    decoder_input = torch.LongTensor([[topi[i][0] for i in range(inp.size(1))]]).to(device)
                    
                    #Save result
                    batch_output = torch.cat((batch_output, decoder_input), dim=0)

                    
                #Mask results
                batch_pos = []

                for i in range(batch_output.size(1)):
                    try:
                        batch_pos.append((batch_output[:,i]==EOS_ID).nonzero().item())
                    except:
                        try:
                            batch_pos.append((batch_output[:,i]==PAD_ID).nonzero().item())
                        except:
                            batch_pos.append(batch_output.size(0)+1)
                
                #To list
                decoder_outputs += [batch_output[:,batch_pos[i]].tolist() for i in range(len(batch_pos))]

        true_outputs = [[str(i) for i in seq] for seq in true_outputs] 
        decoder_outputs = [[str(i) for i in seq] for seq in decoder_outputs] 
        return corpus_bleu(decoder_outputs, true_outputs, n_grams)

    def showLoss(self):
        plt.figure()
        fig = plt.figure(figsize=(10,6))
        fig_plt = sns.lineplot(x=np.arange(0, self.n_epoch, int(self.n_epoch/len(self.plot_losses))), y=self.plot_losses)
        fig_plt.set_title("Loss Over Time")
        fig_plt.set_ylabel("Loss")
        fig_plt.set_xlabel("Epochs")
        return fig_plt.get_figure()
    
    def showScore(self):
        df = pd.concat([pd.DataFrame({'X':np.arange(0, self.n_epoch, int(self.n_epoch/len(self.plot_losses))), 'Y':self.plot_train_scores, 'Score':'Train'}), 
                        pd.DataFrame({'X':np.arange(0, self.n_epoch, int(self.n_epoch/len(self.plot_losses))), 'Y':self.plot_dev_scores, 'Score':'Dev'})], axis=0)
    
        plt.figure()
        pp = sns.lineplot(data=df, x = 'X', y = 'Y', hue='Score', style="Score", legend= "brief")
        fig_plt.set_title("Score Over Time")
        fig_plt.set_ylabel("Score")
        fig_plt.set_xlabel("Epoch")
        return fig_plt.get_figure()

In [7]:
LEARNING_RATE = 1

encoder_params = {'hidden_size':512, 'n_layers':1}
decoder_params = {'hidden_size':encoder_params['hidden_size'], 'n_layers':1, 'output_size':en.n_words}
attn_decoder_params = {'hidden_size':encoder_params['hidden_size'], 'n_layers':1, 'output_size':en.n_words, "dropout":0.1}

encoder = EncoderRNN(encoder_params, vi.emb, vi.learn_ids).to(device)
encoder_optim = optim.Adam(encoder.parameters(), lr=LEARNING_RATE)

decoder = DecoderRNN(decoder_params, en.emb, en.learn_ids).to(device)
decoder_optim = optim.Adam(decoder.parameters(), lr=LEARNING_RATE)

attn_decoder = AttnDecoderRNN(attn_decoder_params, en.emb, en.learn_ids).to(device)
attn_decoder_optim = optim.Adam(attn_decoder.parameters(), lr=LEARNING_RATE)

In [8]:
model = Model(encoder, attn_decoder, encoder_optim, decoder_optim)
model.fit(train_loader, dev_loader,  n_epoch=10, print_every=250, n_grams=4)


Initializing...
Epoch:1 | Time Elapsed:0m 10s (- 1m 33s) | Percent Complete:1e+01 | Loss:0.04598 | TrainScore:0.03035 | DevScore:0.00612
Epoch:2 | Time Elapsed:0m 20s (- 1m 20s) | Percent Complete:2e+01 | Loss:0.04183 | TrainScore:0.03035 | DevScore:0.006092


KeyboardInterrupt: 

In [7]:
for inp, inp_lens, out, out_mask, out_max in train_loader:
    for s, seq in enumerate(inp.t()):
        break
    break

In [12]:
#Your unsorted array

a = [[d0, p0], [d1, p1], [d2,p2], [d3, p3], [d4, p4]]
arr_sum = 0

for i in range(len(a)):
    #All elements of array up to i are sorted
    #min_idx is the index of the minimum of the unsorted array
    min_idx = i
    
    #Iterate through unsorted part of array
    for j in range(i+1, len(a)):
        #If j is less than min_idx min_idx = j
        if a[min_idx][0] > a[j][0]:
            min_idx = j
            
    #Move minimum of unsorted array into correct sorted position
    a[i], a[min_idx] = a[min_idx], a[i]
    
    #Add the price of the new sorted house to the total
    arr_sum += a[i][1]
    
    #Check if you have enough neighbors
    if i == k:
        #Stop loop
        break
    
#Return average
return arr_sum/k


[['1', '2', '3'], ['1', '2', '3'], ['4', '5', '2']]