# Imports

In [1]:
# Imports
# Standard Imports
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm
import pandas as pd
import torch
import os

# Ch 2 Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import *
from sklearn.metrics import accuracy_score

# Ch 3 Imports
import torchvision
from torchvision import transforms

# Now you can import your file just like any other library
import sys
sys.path.append('../Inside-Deep-Learning/')

from idlmam import *


In [6]:
if torch.cuda.is_available():
   device = torch.device("cuda")
   B = 32
   epochs = 20
   num_workers_data_loaders = 2
elif torch.xpu.is_available():
   device = torch.device("xpu")
   B = 32
   epochs = 20
   num_workers_data_loaders = 2
else:
   device = torch.device("cpu")
   B = 1
   epochs = 5
   num_workers_data_loaders = 1

print(device)

xpu


In [7]:
from io import BytesIO 
from zipfile import ZipFile 
from urllib.request import urlopen 
import re

all_data = []
resp = urlopen(
 "https://cs.stanford.edu/people/karpathy/char-rnn/shakespear.txt") 
shakespear_100k = resp.read() 
shakespear_100k = shakespear_100k.decode('utf-8').lower()

In [8]:
vocab2indx = {}
for char in shakespear_100k: 
    if char not in vocab2indx:
        vocab2indx[char] = len(vocab2indx)


indx2vocab = {}
for k, v in vocab2indx.items():
    indx2vocab[v] = k 
    
print("Vocab Size: ", len(vocab2indx)) 
print("Total Characters:", len(shakespear_100k))

Vocab Size:  36
Total Characters: 99993


In [15]:
class AutoRegressiveDataset(Dataset): 
    """ 
    Creates an autoregressive dataset from one single, long, source 
     sequence by breaking it up into "chunks". 
    """

    def __init__(self, large_string, max_chunk=500): 
        """ 
        large_string: the original long source sequence that chunks will 
         be extracted from 
        max_chunk: the maximum allowed size of any chunk. 
        """
        self.doc = large_string 
        self.max_chunk = max_chunk

    def __len__(self): 
        return (len(self.doc)-1) // self.max_chunk

    def __getitem__(self, idx): 
        start = idx*self.max_chunk

        sub_string = self.doc[start:start+self.max_chunk]
        x = [vocab2indx[c] for c in sub_string]
        sub_string = self.doc[start+1:start+self.max_chunk+1]
        y = [vocab2indx[c] for c in sub_string]
        return torch.tensor(x, dtype=torch.int64), torch.tensor(y, 
         dtype=torch.int64)

In [16]:
class AutoRegressive(nn.Module):

    def __init__(self, num_embeddings, embd_size, hidden_size, layers=1):
       super(AutoRegressive, self).__init__() 

       self.hidden_size = hidden_size 
       self.embd = nn.Embedding(num_embeddings, embd_size)

       self.layers = nn.ModuleList(
            [nn.GRUCell(embd_size, hidden_size)] + [nn.GRUCell(hidden_size, hidden_size) 
            for i in range(layers-1)])
       self.norms = nn.ModuleList( 
            [nn.LayerNorm(hidden_size) for i in range(layers)])
       
       self.pred_class = nn.Sequential( 
            nn.Linear(hidden_size, hidden_size),
            nn.LeakyReLU(), 
            nn.LayerNorm(hidden_size),
            nn.Linear(hidden_size, num_embeddings)
        )
    
    def forward(self, input):
        B = input.size(0)
        T = input.size(1)

        x = self.embd(input)

        h_prevs = self.initHiddenStates(B)

        last_activations = [] 
        for t in range(T):
            x_in = x[:,t,:]
            last_activations.append(self.step(x_in, h_prevs))

        last_activations = torch.stack(last_activations, dim=1)

        return last_activations
    
    def initHiddenStates(self, B): 
        """ 
        Creates an initial hidden state list for the RNN layers.

        B: the batch size for the hidden states. 
        """ 
        return [torch.zeros(B, self.hidden_size, device=device) 
            for _ in range(len(self.layers))]
    
    def step(self, x_in, h_prevs=None): 
        """ 
        x_in: the input for this current time step and has shape (B) 
        if the values need to be embedded, and (B, D) if they 
        have already been embedded.

        h_prevs: a list of hidden state tensors each with shape 
        (B, self.hidden_size) for each layer in the network.
        These contain the current hidden state of the RNN layers 
        and will be updated by this call. 
        """

        if len(x_in.shape) == 1:
            x_in = self.embd(x_in)

        if h_prevs is None:
            h_prevs = self.initHiddenStates(x_in.shape[0])

        for l in range(len(self.layers)):
            h_prev = h_prevs[l] 
            h = self.norms[l](self.layers[l](x_in, h_prev))

            h_prevs[l] = h 
            x_in = h 
        return self.pred_class(x_in)
    
    

In [17]:
def applyLinearLayerOverTime(x): 
    results = []
    B, T, D = x.shape 
    for t in range(T): 
        results.append(linearLayer(x[:,t,:]))
        
    return torch.stack(results, dim=0).view(B, T, -1)

In [18]:
autoRegData = AutoRegressiveDataset(shakespear_100k, max_chunk=250) 
autoReg_loader = DataLoader(autoRegData, batch_size=128, shuffle=True)

autoReg_model = AutoRegressive(len(vocab2indx), 32, 128, layers=2) 
autoReg_model = autoReg_model.to(device)

for p in autoReg_model.parameters(): 
   p.register_hook(lambda grad: torch.clamp(grad, -2, 2))

In [19]:
def CrossEntLossTime(x, y): 
    """ 
    x: output with shape (B, T, V) 
    y: labels with shape (B, T) 
    """ 
    cel = nn.CrossEntropyLoss() 

    T = x.size(1) 

    loss = 0 

    for t in range(T):
        loss += cel(x[:,t,:], y[:,t])

    return loss

In [20]:
train_network(autoReg_model, CrossEntLossTime, autoReg_loader, epochs=100, 
 device=device)

Epoch: 100%|██████████| 100/100 [12:29<00:00,  7.49s/it]


Unnamed: 0,epoch,total time,train loss
0,0,8.266254,884.772064
1,1,15.722022,803.757401
2,2,22.679129,772.206879
3,3,30.018151,765.550522
4,4,38.546090,761.632721
...,...,...,...
95,95,706.699120,411.351776
96,96,713.628780,410.847778
97,97,722.492518,405.594604
98,98,735.530222,408.191254
