In [37]:
from data_rnn import load_ndfa, load_brackets
from data_prep import pad_and_convert
import pandas as pd
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.tensorboard import SummaryWriter

In [38]:
writer = SummaryWriter('Question_8')

In [39]:
x_train_ndfa, (i2w_ndfa, w2i_ndfa) = load_ndfa(n=1500)
x_train_brackets, (i2w_brackets, w2i_brackets) = load_brackets(n=150_000)

In [40]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [41]:
class LSTM(nn.Module):
    def __init__(self, vocab_size, emb_size, h, num_char, n_layers=1):
        super(LSTM, self).__init__()

        self.embedding = nn.Embedding(vocab_size, emb_size)
        self.lstm = nn.LSTM(input_size=emb_size, hidden_size=h, num_layers=n_layers, batch_first=True)
        self.fc = nn.Linear(h, num_char)
    
    def forward(self, input_seq, h):
        embedded = self.embedding(input_seq)
        lstm_out, hidden = self.lstm(embedded, h)
        lstm_out = lstm_out[:, -1, :]
        output = self.fc(lstm_out)
        return output, hidden

In [51]:
def pad_and_convert2(batch, w2i):
    start_token = w2i['.start']
    end_token = w2i['.end']
    
    batch = [[start_token] + x + [end_token] for x in batch]
    
    max_len = max([len(x) for x in batch])
    
    batch = [x + [w2i['.pad']] * (max_len - len(x)) for x in batch]
    
    batch = torch.tensor(batch, dtype=torch.long)
    
    return batch


def create_target(x_train_tensor):
    target_tensor = []
    for seq in x_train_tensor:
        begin_target = seq[1:]
        extra_tensor = torch.tensor([0])
        target = torch.cat((begin_target, extra_tensor), 0)
        target_tensor.append(target)
    return torch.stack(target_tensor)

In [70]:
x_train_ndfa_padded2 = pad_and_convert2(x_train_ndfa, w2i_ndfa)
x_train_brackets_padded2 = pad_and_convert2(x_train_brackets, w2i_brackets)

target_ndfa = create_target(x_train_ndfa_padded2)
target_brackets = create_target(x_train_brackets_padded2)

dataset_ndfa2 = TensorDataset(x_train_ndfa_padded2, target_ndfa)
dataloader_ndfa2 = DataLoader(dataset_ndfa2, batch_size=64, shuffle=True)

dataset_brackets2 = TensorDataset(x_train_brackets_padded2, target_brackets)
dataloader_brackets2 = DataLoader(dataset_brackets2, batch_size=64, shuffle=True)

In [71]:
model = LSTM(vocab_size=len(set(w2i_ndfa)), emb_size=300, h=300, num_char=len(set(w2i_ndfa)), n_layers=1)

In [72]:
vocab_size = len(set(w2i_ndfa))
emb_size = 300
h = 300
num_char = vocab_size
n_layers = 1
num_epochs = 2
learning_rate = 0.001

In [73]:
w2i_brackets['.end']

2

In [74]:
w2i_ndfa['.end']

2

In [75]:
criterion = nn.CrossEntropyLoss(ignore_index=2,reduction='sum')
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [76]:
import torch.distributions as dist
def sample(lnprobs, temperature=1.0): 
    """
    Sample an element from a categorical distribution
    :param lnprobs: Outcome logits
    :param temperature: Sampling temperature. 1.0 follows the given
        distribution, 0.0 returns the maximum probability element. :return: The index of the sampled element.
    """
    if temperature == 0.0:
        return lnprobs.argmax()
    p = F.softmax(lnprobs / temperature, dim=0)
    cd = dist.Categorical(p)
    return cd.sample()

In [77]:
max_length = 50
for epoch in range(num_epochs):
    total_loss = 0.0

    for batch_idx, (inputs, targets) in enumerate(dataloader_ndfa2):
        print(f'Batch Index: {batch_idx}, Batch Size: {inputs.size(0)}')
        print(inputs.shape)
        print('targets',targets.shape)

        print('targets reshape', targets.shape)

        model.train()        
        optimizer.zero_grad()

        h = None
        
        output, _ = model(inputs, h)
        print('output shape', output.shape)
        output = output.reshape(-1, vocab_size)
        targets = targets.reshape(-1)
        print('output shape reshaped', output.shape)
        print('target shape reshaped', targets.shape)


        loss = criterion(output, targets)  

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        print(f'Epoch [{epoch+1}/{num_epochs}], Iteration [{batch_idx+1}/{len(dataloader_ndfa2)}], Loss: {total_loss:.4f}')

    average_loss = total_loss / len(dataloader_ndfa2)
    print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {average_loss:.4f}')

torch.save(model.state_dict(), 'lstm_model.pth')

Batch Index: 0, Batch Size: 64
torch.Size([64, 112])
targets torch.Size([64, 112])
targets reshape torch.Size([64, 112])
output shape torch.Size([64, 15])
output shape reshaped torch.Size([64, 15])
target shape reshaped torch.Size([7168])


ValueError: Expected input batch_size (64) to match target batch_size (7168).

In [68]:
max_length = 50
for epoch in range(num_epochs):
    total_loss = 0.0

    for batch_idx, (inputs, targets) in enumerate(dataloader_ndfa2):
        print(f'Batch Index: {batch_idx}, Batch Size: {inputs.size(0)}')

        model.train()        
        optimizer.zero_grad()

        h = None
        
        output, _ = model(inputs, h)

        output = output.reshape(-1, vocab_size)
        targets = targets.reshape(-1)

        loss = criterion(output, targets)  

        loss.backward()

        allgradvec = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()

        total_loss += loss.item()

        print(f'Epoch [{epoch+1}/{num_epochs}], Iteration [{batch_idx+1}/{len(dataloader_ndfa2)}], Loss: {total_loss:.4f}')

    average_loss = total_loss / len(dataloader_ndfa2)

    for name, param in model.named_parameters():
        if param.grad is not None:
            writer.add_scalar(f'Gradient Norm/{name}', param.grad.norm().item(), global_step=epoch)
                
    writer.add_scaler('All Gradient Norm', allgradvec, epoch)

    writer.add_scalar('Training Loss', average_loss, epoch)
    print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {average_loss:.4f}')

torch.save(model.state_dict(), 'lstm_model.pth')

Batch Index: 0, Batch Size: 64


ValueError: Expected input batch_size (64) to match target batch_size (7168).

In [36]:
max_length = 50
for epoch in range(num_epochs):
    total_loss = 0.0

    for batch_idx, (inputs,) in enumerate(dataloader_ndfa2):
        # print(f'Batch Index: {batch_idx}, Batch Size: {inputs.size(0)}')
        model.train()        
        optimizer.zero_grad()

        # Autoregressive training
        h = None
        for t in range(inputs.size(1) - 1):
            # total_loss = 0.0
            input_seq = inputs[:, :t+1]
            target_word = inputs[:, t+1]
            # print('input_seq:', input_seq)
            # print('target_word:', target_word)

            output, _ = model(input_seq, h)

            loss = criterion(output, target_word)  

            loss.backward()

            allgradvec = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

            optimizer.step()

            total_loss += loss.item()
            

        # print(f'Epoch [{epoch+1}/{num_epochs}], Iteration [{batch_idx+1}/{len(dataloader_ndfa2)}], Loss: {total_loss:.4f}')

    average_loss = total_loss / len(dataloader_ndfa2.dataset)

    for name, param in model.named_parameters():
            if param.grad is not None:
                writer.add_scalar(f'Gradient Norm/{name}', param.grad.norm().item(), global_step=epoch)
                
    writer.add_scaler('All Gradient Norm', allgradvec, epoch)

    writer.add_scalar('Training Loss', average_loss, epoch)

    print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {average_loss:.4f}')

torch.save(model.state_dict(), 'lstm_model_ndfa_q8.pth')


AttributeError: 'SummaryWriter' object has no attribute 'add_scaler'

In [34]:
!tensorboard --logdir=runs


NOTE: Using experimental fast data loading logic. To disable, pass
    "--load_fast=false" and report issues on GitHub. More details:
    https://github.com/tensorflow/tensorboard/issues/4784

Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.13.0 at http://localhost:6006/ (Press CTRL+C to quit)
^C


In [None]:
writer.close()