In [None]:
%load_ext autoreload
%autoreload 2

# Exercise 5

<img src="./images/05.png" width=800>

We add this after model defining:
```python
for p in model.parameters():
    p.register_hook(lambda grad: torch.clamp(grad, -5 5))
```

In [None]:
import time
import torch
import numpy as np
from torch.utils.data import DataLoader, Dataset, random_split, Subset
import torch.nn as nn 
from typing import DefaultDict, Any, Callable, Optional
import mlflow
import os
from utils import train_network, accuracy_score_wrapper, weight_reset
import torchvision
from torchvision import transforms
from pytorchinfo import summary
import mlflow
import torch.utils
from  sklearn.model_selection import train_test_split

In [None]:
os.environ['MLFLOW_TRACKING_URI'] = './mlruns05_5'
mlflow.set_tracking_uri(os.environ.get('MLFLOW_TRACKING_URI'))

In [None]:
mlflow.set_experiment('Exercise05_5')

2025/06/07 10:04:44 INFO mlflow.tracking.fluent: Experiment with name 'Exercise_1' does not exist. Creating a new experiment.


<Experiment: artifact_location='/home/spakdel/my_projects/Books/Inside-Deep-Learning/Exercises_InsideDeepLearning/Chapter_05/mlruns_1/454812752021810061', creation_time=1749278084155, experiment_id='454812752021810061', last_update_time=1749278084155, lifecycle_stage='active', name='Exercise_1', tags={}>

In [None]:
torch.backends.cudnn.deterministic=True
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Dataset and DataLoader

In [None]:
zip_file_url =  "https://download.pytorch.org/tutorial/data.zip"
import requests, zipfile, io
r = requests.get(zip_file_url)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall()

In [None]:
namge_language_data = {}

#We will use some code to remove UNICODE tokens to make life easy for us processing wise
#e.g., convert something like "Ślusàrski" to Slusarski
import unicodedata
import string

all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters)
alphabet = {}
for i in range(n_letters):
    alphabet[all_letters[i]] = i
    
# Turn a Unicode string to plain ASCII, thanks to https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in all_letters
    )

In [None]:
class LanguageNameDataset_inferred_unicode(Dataset):
    def __init__(self, zipfile, vocabulary=None, unicode=False):
        self.namge_language_data = {}
        self.unicode_or_not(z=zipfile, unicode=unicode)
        self.label_names = [x for x in self.namge_language_data.keys()]
        self.data = []
        self.labels = []
        self.vocabulary = vocabulary
        for y, language in enumerate(self.label_names):
            for sample in self.namge_language_data[language]:
                self.data.append(sample)
                self.labels.append(y)
        if vocabulary is None:
            vocabulary_set = {char
                for names in self.data
                for char in names}
            vocabulary = {y:x
            for x, y in enumerate(vocabulary_set)
            }
        self.vocabulary = vocabulary
    def __len__(self):
        return len(self.data)
    
    def string2inputvector(self, input_string):
        T = len(input_string)
        name_vec = torch.zeros((T), dtype=torch.long)
        for pos, character in enumerate(input_string):
            name_vec[pos] = self.vocabulary[character]
        return name_vec
    
    def unicode_or_not(self, z, unicode=False):
        for zip_path in z.namelist():
            if "data/names/" in zip_path and zip_path.endswith(".txt"):
                lang = zip_path[len("data/names/"):-len(".txt")]
                with z.open(zip_path) as myfile:
                    if unicode:
                        lang_names = [line.lower() for line in str(myfile.read(), encoding='utf-8').strip().split("\n")]
                    else:
                        lang_names = [unicodeToAscii(line).lower() for line in str(myfile.read(), encoding='utf-8').strip().split("\n")]
                    self.namge_language_data[lang] = lang_names
                # print(lang, ": ", len(lang_names)) #Print out the name of each language too. 
    
    def __getitem__(self, index):
        name = self.data[index]
        label = self.labels[index]
        
        label_tensor = torch.tensor(label, dtype=torch.long)
        return self.string2inputvector(name), label_tensor

In [None]:
dataset = LanguageNameDataset_inferred_unicode(zipfile=z)
print(len(dataset.vocabulary))

29


In [None]:
train_idx, validation_idx = train_test_split(np.arange(len(dataset)),
                                            test_size=0.1,
                                            random_state=999,
                                            shuffle=True,
                                            stratify=dataset.labels)

# Subset dataset for train and val
train_dataset = Subset(dataset, train_idx)
test_dataset = Subset(dataset, validation_idx)

In [None]:
def pad_and_pack(batch):
    input_tensors = []
    labels = []
    lengths = []
    for x, y in batch:
        input_tensors.append(x)
        labels.append(y)
        lengths.append(x.shape[0])
    x_padded = torch.nn.utils.rnn.pad_sequence(input_tensors, batch_first=False)
    x_packed = torch.nn.utils.rnn.pack_padded_sequence(x_padded, lengths, batch_first=False, enforce_sorted=False)
    y_batched = torch.as_tensor(labels, dtype=torch.long)
    return x_packed, y_batched

In [None]:
batch_size = 1

In [None]:
# Dataloader for train and val
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=pad_and_pack)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=pad_and_pack)

## Model

In [None]:
class LasttimeStep(nn.Module):
    def __init__(self, rnn_layer=1, bidirectional=False):
        super().__init__()
        self.rnn_layer = rnn_layer
        if bidirectional:
            self.num_bidirectional = 2
        else:
            self.num_bidirectional = 1
    def forward(self, input):
        rnn_output = input[0]
        last_step = input[1]
        if isinstance(last_step, tuple):
            last_step = last_step[0]
        batch_size = last_step.shape[1]
        last_step = last_step.view(self.rnn_layer, self.num_bidirectional, batch_size, -1)
        last_step = last_step[-1]
        return last_step.reshape(batch_size, -1)

In [None]:
D = 64
vocab_size = len(all_letters)
hidden_nodes = 256
classes = len(dataset.label_names)

rnn_3layer_bidir = nn.Sequential(
    nn.Embedding(vocab_size, D),
    nn.RNN(D, hidden_size=hidden_nodes, batch_first=True, num_layers=3, bidirectional=True),
    LasttimeStep(rnn_layer=3, bidirectional=True),
    nn.Linear(hidden_nodes*2, classes),
)

## Training

In [None]:
eta_0 = 0.001
loss_func = nn.CrossEntropyLoss()
score_funcs = {"Accuracy": accuracy_score_wrapper}

In [None]:
def optimizers(model):
    yield torch.optim.SGD(model.parameters(), lr=eta_0)
    yield torch.optim.SGD(model.parameters(), lr=eta_0, momentum=0.9)
    yield torch.optim.SGD(model.parameters(), lr=eta_0, momentum=0.9, nesterov=True)
    yield torch.optim.Adam(model.parameters())
    yield torch.optim.AdamW(model.parameters())
experiments = (
    'SGD',
    'SGD+momentum',
    'SGD+momentom+nesterov',
    'adam',
    'adamW'
    )

In [None]:
epochs = 10
params = {
    'device': device,
    'loss_func': loss_func.__class__.__name__,
    'epochs': epochs,
    'batch_size': batch_size
}

In [None]:
for experiment in experiments:
    print(experiment)
    rnn_3layer_bidir.apply(weight_reset)
    for p in rnn_3layer_bidir.parameters():
        p.register_hook(lambda grad: torch.clamp(grad, -5, 5))
    optimizer = next(optimizers(rnn_3layer_bidir))
    params['optimizer'] = optimizer.defaults
    with open('model_summary.txt', 'w') as f:
        f.write(str(summary(rnn_3layer_bidir)))
    with mlflow.start_run(nested=True, run_name=f'{experiment}'):
        mlflow.log_params(params)
        mlflow.log_artifact('model_summary.txt')

        results = train_network(
            model=rnn_3layer_bidir,
            optimizer=optimizer,
            loss_func=loss_func,
            train_loader=train_loader,
            test_loader=test_loader,
            epochs=epochs,
            device=device,
            score_funcs=score_funcs,
            )

<imd src="./images/E5train_acc.png">