In [40]:
import numpy as np
from collections import defaultdict 
import random
import torch
import os
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader
from sklearn.metrics import  accuracy_score
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
import csv
import time
from torch import optim

In [41]:
# https://stackoverflow.com/questions/58961768/set-torch-backends-cudnn-benchmark-true-or-not
def set_seed(seed = 0):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
set_seed(1)



In [42]:
SOS_token = 0
EOS_token = 1


class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS
        # self.mx_len = 0

    def addSentence(self, sentence):
        # for word in sentence.split(' '):
        #     self.addWord(word)
        for x in sentence: 
            self.addWord(x)


    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1
            


In [43]:
def fill_buffer(buffer, path): 
    with open(path, 'r', encoding = 'latin-1') as file:
        for rows in file:
            a, b = (rows.split(", "))
            a = a[1:-1]  # removing ''
            b = b[1:-2]  # removing ''\n
            buffer.append((a, b))
    
    print("Got {} dates".format(len(buffer)))
    return buffer


buffer = []
buffer = fill_buffer(buffer,  path = "Assignment4aDataset.txt" )
print(buffer[5])

def preprocess(buffer):
    mx_len1, mx_len2 = 0, 0
    date_lang = Lang("date")
    label_lang = Lang("label")
    
    
    for i, (date, label) in enumerate(buffer):

        # string 
        date_lang.addSentence(date)
        label_lang.addSentence(label)

        # # split 
        # date = date.split(" ")
        # label = label.split(" ")
        
        mx_len1 = max(mx_len1, len(date))
        mx_len2 = max(mx_len2, len(label))

        buffer[i] = (date, label)
         
    print("Max sentence length : {}, {}".format(mx_len1, mx_len2))
    return buffer, date_lang, label_lang
    
buffer, date_lang, label_lang = preprocess(buffer)
# get_data_stat(embedding_dict, buffer, dtype = "str")
print(buffer[5])

print(date_lang.n_words, label_lang.n_words)

Got 40000 dates
('friday 1791 2 09', '1791-09-02')
Max sentence length : 27, 10
('friday 1791 2 09', '1791-09-02')
36 13


In [44]:
# print( sorted(label_lang.word2count.keys())  )

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [45]:
def indexesFromSentence(lang, sentence):
    # return [lang.word2index[word] for word in sentence.split(' ')]
    return [lang.word2index[w] for w in sentence]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(date_lang, pair[0])
    target_tensor = tensorFromSentence(label_lang, pair[1])
    return (input_tensor, target_tensor)


# print( buffer[5], "\n", tensorsFromPair(buffer[5]) )

In [46]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)
    

In [68]:
MAX_LENGTH = 28 #37  #5 # encoder's max date format + eos

# hidden_size, label_lang.n_words, dropout_p=0.1

class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)

        # s(t-1) concat with h(j) 
        self.W1 = nn.Linear( self.hidden_size , self.hidden_size ) 
        self.W2 = nn.Linear( self.hidden_size, self.hidden_size )
        self.V = nn.Linear( self.hidden_size , 1 ) 

        self.gru = nn.GRU( 2 * self.hidden_size, self.hidden_size )
        self.out = nn.Linear(self.hidden_size, self.output_size)



    # torch.Size([1, 1]) torch.Size([1, 1, 256]) torch.Size([4, 256])
    def forward(self, input, hidden, encoder_outputs):
        # print(input.shape, hidden.shape, encoder_outputs.shape)
        embedded = self.embedding(input) # .view(1, 1, -1)
        # print("embed: ", embedded.shape) # (1, 1, 256) ???? 
        # embedded = self.dropout(embedded) 
        
        # (4 * 256)
        X = torch.tanh(self.W1(hidden) + self.W2(encoder_outputs))
        wts = torch.softmax( self.V(X).view(-1), dim = 0 )
        wts = wts.unsqueeze(1)
        # print("wt: ", wts.shape)
        # return 

        C_j = torch.sum(wts * encoder_outputs, dim = 0)
        # print("C_j: ", C_j.shape) # 256
        # return 
 
        input = torch.cat([C_j, embedded.view(-1)]).view(1, 1, -1)
        output, hidden_gru = self.gru(input, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        # print(output.shape, hidden_gru.shape)
        return output, hidden_gru, wts


    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [85]:
wts = torch.randn(4, 1)
en_out = torch.randn(4, 256)

print( torch.sum(wts * en_out, dim = 0).shape )


gru = nn.GRU(2 * 256, 256)
print(wts.squeeze(1).shape)
# output, hidden = gru( torch.randn((1, 512)), torch.zeros((1, 256)) )
# print(output.shape)


torch.Size([256])
torch.Size([4])


In [94]:


def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer,\
          decoder_optimizer, criterion, max_length = MAX_LENGTH, teacher_forcing_ratio = 0.5):
    encoder_hidden = encoder.initHidden()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    # print("encoder, ", input_length) 
    
    # for each word 
    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        # print(encoder_output.shape) # (1, 1, 256)
        encoder_outputs[ei] = encoder_output[0, 0] # 
    
    
    # print(encoder_outputs.shape) #  (4, 256)
    
    ###################################################################

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden
    result = []
    
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    decoder_attentions = []


    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])

            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            result.append(topi.item())
            # print(decoder_attention.squeeze(1).shape)
            decoder_attentions.append( decoder_attention.detach().squeeze(1).cpu().numpy() )

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    return loss, result, decoder_attentions

In [50]:
# encoder1, attn_decoder1, train_buffer

def trainIters(encoder, decoder, buffer, learning_rate=0.001):
    n_iters = len(buffer)
    
    print(n_iters)

    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(buffer[i]) for i in range(n_iters) ]
    criterion = nn.NLLLoss()

    batch_loss = 0 
    batch_size = 100
    total_loss = 0
    epoch_size = 500 

    for iter in range(1, n_iters + 1):  
        training_pair = training_pairs[(iter - 1)]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss, _, _ = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)

        batch_loss += loss
        total_loss += loss.item()

        if iter % epoch_size == 0: 
            print("{} {:.4f}".format(iter // epoch_size, total_loss / (target_tensor.shape[0] * epoch_size) )) # , end = " ") 
            total_loss = 0


        if iter % batch_size == 0: 
            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()

            batch_loss /= batch_size
            batch_loss.backward()

            encoder_optimizer.step()
            decoder_optimizer.step()

            # print("\r {} {:.4f}".format(iter, loss.item()/target_tensor.shape[0] ), end = " ") #################
            batch_loss = 0

            


In [51]:
hidden_size = 256
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

encoder1 = EncoderRNN(date_lang.n_words, hidden_size).to(device)
attn_decoder1 = AttnDecoderRNN(hidden_size = hidden_size, \
    output_size = label_lang.n_words, dropout_p=0.1).to(device)


In [52]:
random.shuffle(buffer)

upto = int(0.7 * len(buffer))
train_buffer = buffer[:upto]

test_buffer = buffer[upto : ]

# trainIters(encoder1, attn_decoder1, train_buffer)


In [95]:
# encoder1, attn_decoder1, train_buffer

import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points, iter):
    points = np.stack([x for x in points])
    # print(points.shape)
    fig, ax = plt.subplots()
    im = ax.imshow(points)
    plt.savefig(str(iter))




def testIters(encoder : nn.Module, decoder, buffer, learning_rate=0.001):
    n_iters = len(buffer)

    print(n_iters)

    encoder.load_state_dict(torch.load("models/encoder1.pth"))
    decoder.load_state_dict(torch.load("models/decoder1.pth"))

    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(buffer[i]) for i in range(n_iters) ]
    criterion = nn.NLLLoss()


    total_loss = 0
    acc = 0
    acc1 = 0 
    epoch_size = 1000 

    for iter in range(1, n_iters + 1):  
        training_pair = training_pairs[(iter - 1) % len(training_pairs)]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        _, result, decoder_attentions = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion, teacher_forcing_ratio = 0)

        # total_loss += loss.item()
        result = torch.tensor(result)
        print("\r{} {}".format(iter, len(result)), end = " ")
        sum = (result.view(-1) == target_tensor.cpu().view(-1)).sum()
        if len(result) == 11 and sum == 11: 
            acc += 1
        
        acc1 += (sum / 11)

        if iter % epoch_size == 0: 
            print("{} {:.4f} {:.4f}".format(iter,  acc / iter , acc1 / iter) )  # , end = " ") 
            showPlot(decoder_attentions, iter)



# testIters(encoder1, attn_decoder1, test_buffer)

            


In [96]:
test_buffer = []
test_buffer = fill_buffer(test_buffer,  path = "Assignment4aTestDataset.txt" )
print(test_buffer[5])

encoder2 = EncoderRNN(date_lang.n_words, hidden_size).to(device)
attn_decoder2 = AttnDecoderRNN(hidden_size = hidden_size, \
    output_size = label_lang.n_words, dropout_p=0.1).to(device)

testIters(encoder2, attn_decoder2, test_buffer)


Got 10000 dates
('3 november 2064', '2064-11-03')
10000
1000 11 1000 0.9680 0.9962
(11, 28)
1600 11 

KeyboardInterrupt: 