## Set-up

In [1]:
import torch
from torch.utils.data import Dataset
import torch_xla as xla
import torch_xla.core.xla_model as xm
import matplotlib
import matplotlib.pyplot as plt
import time
import numpy as np
import string
import unicodedata
import os




: 

In [4]:
# Check if CUDA is available
device_tpu = xm.xla_device()
# device = torch.device('cpu')

torch.set_default_device(device_tpu)
print(f"Using device = {torch.get_default_device()}")

Using device = xla:0


## Prepare and load Data

In [5]:
allowed_characters = string.ascii_letters + " .,;'"
n_letters = len(allowed_characters)

# Turn a Unicode string to plain ASCII, thanks to https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in allowed_characters
    )

In [6]:
%run -i 'namesdataset.py'

-- Initiate Dataset declaration
-- Dataset declaration finalized


In [7]:
alldata = NamesDataset("../data/names")
print(f"loaded {len(alldata)} items of data")
print(f"example = {alldata[0]}")

loaded 20074 items of data
example = (tensor([12], device='xla:0'), tensor([[0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0.]], device='xla:0'), 'Chinese', 'Ang')


In [10]:
train_set, test_set = torch.utils.data.random_split(alldata, [.85, .15], generator=torch.Generator().manual_seed(2024))

print(f"train examples = {len(train_set)}, validation examples = {len(test_set)}")

train examples = 17063, validation examples = 3011


## Prepare model

In [11]:
%run -i './dnn.py'

-- Initiate model and training declaration
-- Finished model and training declaration


In [12]:
n_hidden = 128
dnn = DeepANN(n_letters, n_hidden, len(alldata.labels_uniq))
print(dnn)

DeepANN(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_deep_stack): Sequential(
    (0): Linear(in_features=57, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=18, bias=True)
  )
  (softmax): LogSoftmax(dim=1)
)




In [13]:
print('Pre-training sample output:')
input = lineToTensor('Albert')
output = dnn(input) #this is equivalent to ``output = dnn.forward(input)``
print(output)
print(label_from_output(output, alldata.labels_uniq))

Pre-training sample output:
tensor([[-3.0417, -3.1447, -2.8196, -3.0057, -2.7648, -2.8852, -2.9202, -2.8272,
         -2.9089, -2.8539, -2.7115, -2.9616, -2.9154, -2.9620, -2.9113, -2.9397,
         -2.7718, -2.7799]], device='xla:0', grad_fn=<LogSoftmaxBackward0>)
('German', 10)


In [None]:
os.environ['TPU_NUM_DEVICES'] = 8

In [15]:
start = time.time()

xla.launch(train, args=((dnn, train_set, 27, 0.15, 5, 3)))
# all_losses = train(dnn, train_set, n_epoch=27, learning_rate=0.15, report_every=5)
end = time.time()
print(f"training took {end-start}s")

RuntimeError: Runtime is already initialized. Do not use the XLA device before calling xmp.spawn.

In [None]:
print('Post-training sample output:')

input = lineToTensor('Albert')
output = dnn(input) #this is equivalent to ``output = dnn.forward(input)``
print(output)
print(label_from_output(output, alldata.labels_uniq))

In [None]:
all_losses = np.array(all_losses)
all_losses

In [None]:
plt.figure()
plt.plot(all_losses)
plt.show()

In [None]:
evaluate(dnn, test_set, classes=alldata.labels_uniq)