## Set-up

In [1]:
import torch
from torch.utils.data import Dataset
import torch_xla as xla
import matplotlib
import matplotlib.pyplot as plt
import time
import numpy as np
import string
import unicodedata




In [2]:
# Check if CUDA is available
device = torch.device('cpu')
if torch.cuda.is_available():
    device = torch.device('cuda')

torch.set_default_device(device)
print(f"Using device = {torch.get_default_device()}")

Using device = cpu


## Prepare and load Data

In [3]:
allowed_characters = string.ascii_letters + " .,;'"
n_letters = len(allowed_characters)

# Turn a Unicode string to plain ASCII, thanks to https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in allowed_characters
    )

In [10]:
%run -i 'namesdataset.py'

-- Initiate Dataset declaration
-- Dataset declaration finalized


In [11]:
alldata = NamesDataset("../data/names")
print(f"loaded {len(alldata)} items of data")
print(f"example = {alldata[0]}")

loaded 20074 items of data
example = (tensor([1]), tensor([[0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0.]]), 'Vietnamese', 'Nguyen')


In [12]:
train_set, test_set = torch.utils.data.random_split(alldata, [.85, .15], generator=torch.Generator(device=device).manual_seed(2024))

print(f"train examples = {len(train_set)}, validation examples = {len(test_set)}")

train examples = 17063, validation examples = 3011


## Prepare model

In [14]:
%run -i './dnn.py'

-- Initiate model and training declaration
-- Finished model and training declaration


In [15]:
n_hidden = 128
dnn = DeepANN(n_letters, n_hidden, len(alldata.labels_uniq))
print(dnn)



DeepANN(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_deep_stack): Sequential(
    (0): Linear(in_features=57, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=18, bias=True)
  )
  (softmax): LogSoftmax(dim=1)
)


In [16]:
print('Pre-training sample output:')
input = lineToTensor('Albert')
output = dnn(input) #this is equivalent to ``output = dnn.forward(input)``
print(output)
print(label_from_output(output, alldata.labels_uniq))

Pre-training sample output:
tensor([[-2.8803, -2.9684, -2.9496, -3.0857, -3.0022, -2.8422, -2.8286, -2.9535,
         -2.8950, -2.7103, -2.9521, -3.0139, -2.7753, -2.8511, -2.8128, -2.8272,
         -2.8839, -2.8678]], grad_fn=<LogSoftmaxBackward0>)
('Japanese', 9)


In [17]:
start = time.time()

xla.launch(train, args=((dnn, train_set, 27, 0.15, 5, 3)))
# all_losses = train(dnn, train_set, n_epoch=27, learning_rate=0.15, report_every=5)
end = time.time()
print(f"training took {end-start}s")

-- Initiate model and training declaration
-- Finished model and training declaration


Process SpawnProcess-1:
Traceback (most recent call last):
  File "/home/pgmoka/miniconda3/envs/torch310/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/home/pgmoka/miniconda3/envs/torch310/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/pgmoka/miniconda3/envs/torch310/lib/python3.10/concurrent/futures/process.py", line 240, in _process_worker
    call_item = call_queue.get(block=True)
  File "/home/pgmoka/miniconda3/envs/torch310/lib/python3.10/multiprocessing/queues.py", line 122, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'NamesDataset' on <module '__mp_main__' from '/home/pgmoka/rnn_in_pytorchXLA/pytorchxla_example/dnn.py'>


BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.

In [None]:
print('Post-training sample output:')

input = lineToTensor('Albert')
output = dnn(input) #this is equivalent to ``output = dnn.forward(input)``
print(output)
print(label_from_output(output, alldata.labels_uniq))

In [None]:
all_losses = np.array(all_losses)
all_losses

In [None]:
plt.figure()
plt.plot(all_losses)
plt.show()

In [None]:
evaluate(dnn, test_set, classes=alldata.labels_uniq)