# Prelude

In [1]:
import numpy
import scipy
import matplotlib
import seaborn

import torch
import torch.nn as nn
import datasets
import wandb

import random

from torch import tensor
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence

import nltk.lm as lm
from nltk import ngrams
from nltk import tokenize
from nltk.corpus import stopwords, gutenberg
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
from nltk.tokenize import word_tokenize

from matplotlib import pyplot

In [2]:
import pickle
torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x753b419a0ed0>

In [3]:
sos = '<SOS>'
eos = '<EOS>'
pad = '<PAD>'
unk = '<UNK>'

# Later Things

## Input Data

In [4]:
device = 'cuda'
batch_size = 4

In [5]:
train_text = pickle.load(open('train_text.pickle', 'rb'))
train_high = pickle.load(open('train_high.pickle', 'rb'))
vocab = {k: e for e, k, in enumerate(pickle.load(open('vocab.pickle', 'rb')))}

In [6]:
tn = [torch.tensor([vocab[x] for x in y]) for y in train_text]
tn = pad_sequence(tn, batch_first = True, padding_value = vocab[pad])

th = [torch.tensor([vocab[x] for x in y]) for y in train_high]
th = pad_sequence(th, batch_first = True, padding_value = vocab[pad])

loader = DataLoader(list(zip(tn, th)), batch_size = batch_size, shuffle = True)

## Proper Model

In [7]:
class Encoder(nn.Module):
    def __init__(self, vocab):
        super().__init__()
        self.embedding = nn.Embedding(
            num_embeddings = len(vocab),
            embedding_dim = 256,
        )
        self.gru = nn.GRU(
            input_size = 256,
            hidden_size = 128,
            num_layers = 1,
            batch_first = True,
        )

    def forward(self, input):
        x = self.embedding(input)
        output, hidden = self.gru(x)
        return output, hidden

In [8]:
class Decoder(nn.Module):
    def __init__(self, vocab):
        super().__init__()
        self.embedding = nn.Embedding(num_embeddings = len(vocab), embedding_dim = 256)
        self.relu = nn.ReLU()
        self.gru = nn.GRU(input_size = 256, hidden_size = 128, batch_first = True)
        self.out = nn.Linear(128, 128)
        self.out2 = nn.Linear(128, len(vocab))

    def forward(self, input, hidden):
        x = self.embedding(input)
        x, h = self.gru(x, hidden)
        x = self.out(x)
        x = self.relu(x)
        x = self.out2(x)
        return x, h

In [9]:
encoder = Encoder(vocab).to(device)
decoder = Decoder(vocab).to(device)
optimiser = torch.optim.Adam(
    list(encoder.parameters()) + list(decoder.parameters()),
    lr = 1e-3,
    weight_decay = 1e-3,
)
criterion = torch.nn.CrossEntropyLoss().to(device)

In [10]:
def run_epoch():
    for text, high in loader:
        text = text.to(device)
        high = high.to(device)

        optimiser.zero_grad()

        encoder_output, encoder_hidden = encoder(text)

        assert all(high[:, 0] == vocab[sos])
        decoder_input = high[:, 0]
        decoder_hidden = encoder_hidden

        loss = tensor(0.).to(device)
        text_len = text.size(1)
        high_len = high.size(1)
        for t in range(1, high_len):
            decoder_output, decoder_hidden = decoder(decoder_input.unsqueeze(1), decoder_hidden)
            loss += criterion(decoder_output.squeeze(1), high[:, t])
            decoder_input = high[:, t]

        loss.backward()
        optimiser.step()

In [11]:
epochs = 1
for e in range(epochs):
    run_epoch()
    print(f'Epoch {e}: loss = {loss.item()}')

OutOfMemoryError: CUDA out of memory. Tried to allocate 12.00 MiB. GPU 0 has a total capacity of 3.81 GiB of which 12.00 MiB is free. Including non-PyTorch memory, this process has 3.78 GiB memory in use. Of the allocated memory 3.48 GiB is allocated by PyTorch, and 214.23 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
for text, high in loader:
    print(high.size(1))
    break

In [None]:
del encoder, decoder, text, high, encoder_output, encoder_hidden, decoder_input, decoder_hidden

In [None]:
del loader

In [None]:
torch.cuda.empty_cache()

In [None]:
import gc
gc.collect()