# 🤖 Transformer Hands-on (30 minutes)

Welcome! In this short interactive session, we’ll explore what a Transformer model does and see it in action as it **completes sentences**.

🎯 **Goal**: Get an intuitive idea of what a Transformer is and try using a pre-trained model with just a few lines of code.


# Install Hugging Face datasets library if needed

In [1]:
import numpy as np

# 1) Define the sentence here (modify as desired)
sentence = "Sarah carried a light bag walking under the light"
tokens = sentence.lower().split()
print("Tokens:", tokens)

# 2) Set a small embedding dimension for clarity
embed_dim = 8

# 3) Simulate original embeddings: one unique vector per word
np.random.seed(0)
unique_tokens = list(dict.fromkeys(tokens))  # preserve order, remove duplicates
token_embeddings = {token: np.random.randn(embed_dim) for token in unique_tokens}

# 4) Build embeds array from token_embeddings dict
embeds = np.vstack([token_embeddings[token] for token in tokens])

# 5) Positional encoding function
def positional_encoding(position, d_model):
    angle_rates = 1 / np.power(10000, (2 * (np.arange(d_model)//2)) / np.float32(d_model))
    angle_rads = np.arange(position)[:, None] * angle_rates[None, :]
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
    return angle_rads

# 6) Compute positional encodings
pos_enc = positional_encoding(len(tokens), embed_dim)

# 7) Compute final embeddings = original + positional
embeds_final = embeds + pos_enc

# 8) Display original, positional, and final embeddings for each token
for i, token in enumerate(tokens):
    print(f"\nToken '{token}' (position {i}):")
    print("  Original embedding   :", np.round(embeds[i], 3).tolist())
    print("  Positional encoding  :", np.round(pos_enc[i], 3).tolist())
    print("  Final embedding (sum):", np.round(embeds_final[i], 3).tolist())

Tokens: ['sarah', 'carried', 'a', 'light', 'bag', 'walking', 'under', 'the', 'light']

Token 'sarah' (position 0):
  Original embedding   : [1.764, 0.4, 0.979, 2.241, 1.868, -0.977, 0.95, -0.151]
  Positional encoding  : [0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0]
  Final embedding (sum): [1.764, 1.4, 0.979, 3.241, 1.868, 0.023, 0.95, 0.849]

Token 'carried' (position 1):
  Original embedding   : [-0.103, 0.411, 0.144, 1.454, 0.761, 0.122, 0.444, 0.334]
  Positional encoding  : [0.841, 0.54, 0.1, 0.995, 0.01, 1.0, 0.001, 1.0]
  Final embedding (sum): [0.738, 0.951, 0.244, 2.449, 0.771, 1.122, 0.445, 1.334]

Token 'a' (position 2):
  Original embedding   : [1.494, -0.205, 0.313, -0.854, -2.553, 0.654, 0.864, -0.742]
  Positional encoding  : [0.909, -0.416, 0.199, 0.98, 0.02, 1.0, 0.002, 1.0]
  Final embedding (sum): [2.403, -0.621, 0.512, 0.126, -2.533, 1.653, 0.866, 0.258]

Token 'light' (position 3):
  Original embedding   : [2.27, -1.454, 0.046, -0.187, 1.533, 1.469, 0.155, 0.378]
  Pos

In [2]:
import torch
import torch.nn as nn

In [3]:
vocab = ['sarah', 'carried', 'a', 'light', 'bag', 'walking', 'under', 'the']
word2idx = {word: idx for idx, word in enumerate(vocab)}
idx2word = {idx: word for word, idx in word2idx.items()}

In [4]:
sentence = "sarah carried a light bag walking under the light"
tokens = sentence.lower().split()
token_ids = [word2idx[word] for word in tokens]
input_tensor = torch.tensor(token_ids)  # shape: (seq_len,)


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py", line 712, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.11/dist-package

In [5]:
embed_dim = 8
embedding = nn.Embedding(num_embeddings=len(vocab), embedding_dim=embed_dim)

In [6]:
embedded = embedding(input_tensor)  # shape: (seq_len, embed_dim)
print(embedded.shape)

torch.Size([9, 8])


In [7]:
for word in tokens:
    idx = word2idx[word]
    emb_vector = embedding(torch.tensor(idx))
    print(f"{word} --> {emb_vector.detach()}")

sarah --> tensor([-0.0354, -0.0360, -0.0273,  0.4576, -0.5828, -1.9049, -0.4362,  0.7440])
carried --> tensor([-0.4080,  0.7927, -0.9235,  0.0528, -1.1435,  1.9432, -0.5535,  0.3547])
a --> tensor([-0.2828,  0.3791, -1.0743,  0.5997, -0.2516,  0.7729,  0.2562, -1.2644])
light --> tensor([ 1.3198, -0.0903,  1.1957,  0.9336,  0.0647,  0.7454,  3.4284,  0.9872])
bag --> tensor([-1.7117,  0.1749, -0.1585,  0.8617, -0.3888,  1.6417,  0.5674, -1.1896])
walking --> tensor([ 0.7275,  1.2251,  0.4950,  0.5525, -0.2189,  0.2997, -0.3747,  0.8317])
under --> tensor([-0.0642,  0.4200, -0.7438, -0.0858,  0.3095,  0.3209,  0.3215,  0.0050])
the --> tensor([-0.8736, -1.7338, -0.7238,  1.9924, -0.1571, -0.0962,  0.4507,  0.2352])
light --> tensor([ 1.3198, -0.0903,  1.1957,  0.9336,  0.0647,  0.7454,  3.4284,  0.9872])


In [8]:
!pip install torchtext



In [9]:
import torch
from torchtext.vocab import GloVe

# 1. Carica GloVe da torchtext
glove = GloVe(name='6B', dim=50)  # usa GloVe 10-dimensional


In [10]:
# 2. Frase di esempio
sentence = "Sarah carried a light bag walking under the light"
tokens = sentence.lower().split()

# 3. Estrai e stampa embedding per ogni parola
print("Word → Embedding vector:")
for word in tokens:
    if word in glove.stoi:
        emb = glove[word]
        print(f"{word:>10} → {emb[:5]}")  # first 5 elements for comparing them simply....
    else:
        print(f"{word:>10} → [UNK]")

Word → Embedding vector:
     sarah → tensor([-0.4671,  1.6665,  0.1289, -0.1402,  0.4243])
   carried → tensor([ 0.6284, -0.0887,  0.3397, -0.5304,  0.0704])
         a → tensor([ 0.2171,  0.4651, -0.4676,  0.1008,  1.0135])
     light → tensor([ 0.0063,  0.4725, -0.0733, -0.0060,  0.3675])
       bag → tensor([-0.0282, -0.2216,  0.4478, -0.1850,  0.9992])
   walking → tensor([ 0.2787,  0.7067, -0.3070, -0.5401,  0.6306])
     under → tensor([ 0.1372, -0.2950, -0.0592, -0.5924,  0.0230])
       the → tensor([ 0.4180,  0.2497, -0.4124,  0.1217,  0.3453])
     light → tensor([ 0.0063,  0.4725, -0.0733, -0.0060,  0.3675])


In [12]:
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np  # Explicitly import numpy again

# Carica GloVe 50-dimensional
glove = GloVe(name='6B', dim=50)

# Parole da confrontare
words = ["king", "queen", "man", "woman", "prince", "princess", "doctor", "nurse"]

# Prendi i vettori dal vocabolario
valid_words = [w for w in words if w in glove.stoi]

# Check NumPy version to confirm availability
print(f"NumPy version: {np.__version__}")

vectors = torch.stack([glove[w] for w in valid_words]).numpy()

# PCA in 2D
pca = PCA(n_components=2)
coords = pca.fit_transform(vectors)

# Plot
plt.figure(figsize=(8, 6))
for i, word in enumerate(valid_words):
    x, y = coords[i]
    plt.scatter(x, y)
    plt.text(x + 0.01, y + 0.01, word, fontsize=12)
plt.title("GloVe Embeddings – relazioni semantiche (PCA)")
plt.grid(True)
plt.show()

NumPy version: 2.0.2


RuntimeError: Numpy is not available