In [16]:
# Setup and checks
import torch
import torch.nn as nn
import numpy as np
import random

# Utilities
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print('PyTorch version:', torch.__version__)
print('Device:', device)


PyTorch version: 2.9.1+cpu
Device: cpu


In [17]:
## Section 2 — Phần 1: Khám phá Tensor (Task 1.1: Tạo Tensor)
import numpy as np

# Tạo tensor từ list
data = [[1, 2], [3, 4]]
x_data = torch.tensor(data)
print('Tensor từ list:\n', x_data)

# Tạo tensor từ NumPy array
np_array = np.array(data)
x_np = torch.from_numpy(np_array)
print('\nTensor từ NumPy array:\n', x_np)

# Ones and random like
x_ones = torch.ones_like(x_data)
print('\nOnes Tensor:\n', x_ones)

x_rand = torch.rand_like(x_data, dtype=torch.float)
print('\nRandom Tensor:\n', x_rand)

print('\nShape:', x_rand.shape)
print('Dtype:', x_rand.dtype)
print('Device:', x_rand.device)


Tensor từ list:
 tensor([[1, 2],
        [3, 4]])

Tensor từ NumPy array:
 tensor([[1, 2],
        [3, 4]])

Ones Tensor:
 tensor([[1, 1],
        [1, 1]])

Random Tensor:
 tensor([[0.8823, 0.9150],
        [0.3829, 0.9593]])

Shape: torch.Size([2, 2])
Dtype: torch.float32
Device: cpu


In [18]:
## Task 1.2 — Các phép toán trên Tensor
# 1) Cộng x_data với chính nó
print('x_data + x_data:\n', x_data + x_data)
# 2) Nhân x_data với 5
print('\nx_data * 5:\n', x_data * 5)
# 3) Nhân ma trận với ma trận chuyển vị
print('\nx_data @ x_data.T:\n', x_data @ x_data.t())

# In-place vs out-of-place
a = x_data.clone()
print('\nBefore a.add_(1):', a)
a.add_(1)  # in-place
print('After a.add_(1):', a)
b = x_data + 1  # out-of-place
print('x_data (unchanged):', x_data)
print('b (new tensor):', b)


x_data + x_data:
 tensor([[2, 4],
        [6, 8]])

x_data * 5:
 tensor([[ 5, 10],
        [15, 20]])

x_data @ x_data.T:
 tensor([[ 5, 11],
        [11, 25]])

Before a.add_(1): tensor([[1, 2],
        [3, 4]])
After a.add_(1): tensor([[2, 3],
        [4, 5]])
x_data (unchanged): tensor([[1, 2],
        [3, 4]])
b (new tensor): tensor([[2, 3],
        [4, 5]])


In [28]:
## Task 1.3 — Indexing và Slicing
# Hàng đầu tiên
row0 = x_data[0]
print('Hàng đầu tiên:', row0)
# Cột thứ hai
col1 = x_data[:, 1]
print('Cột thứ hai:', col1)
# Giá trị hàng 2 cột 2 (index bắt đầu từ 0)
val = x_data[1, 1]
print('Giá trị ở hàng 2, cột 2:', val)

# Slice và sửa (view vs copy)
A = torch.arange(12).view(3,4)
print('\nA:\n', A)
s = A[:, :2]
print('Slice s (A[:, :2]):\n', s)
s[0,0] = 999
print('Sau khi sửa s[0,0]=999 -> A:\n', A)
print('\nGhi chú: slicing trong PyTorch tạo ra view (tham chiếu) nếu có thể')

Hàng đầu tiên: tensor([1, 2])
Cột thứ hai: tensor([2, 4])
Giá trị ở hàng 2, cột 2: tensor(4)

A:
 tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
Slice s (A[:, :2]):
 tensor([[0, 1],
        [4, 5],
        [8, 9]])
Sau khi sửa s[0,0]=999 -> A:
 tensor([[999,   1,   2,   3],
        [  4,   5,   6,   7],
        [  8,   9,  10,  11]])

Ghi chú: slicing trong PyTorch tạo ra view (tham chiếu) nếu có thể


In [29]:
## Task 1.4 — Thay đổi hình dạng Tensor
x = torch.rand(4,4)
print('x:\n', x)
print('x shape:', x.shape)

x_view = x.view(16,1)
print('\nx.view(16,1) shape:', x_view.shape)

x_reshape = x.reshape(16,1)
print('x.reshape(16,1) shape:', x_reshape.shape)
print('numel:', x.numel())


x:
 tensor([[0.6311, 0.5379, 0.9891, 0.8462],
        [0.1670, 0.5482, 0.7697, 0.8823],
        [0.6707, 0.7125, 0.4077, 0.5108],
        [0.1539, 0.9116, 0.8937, 0.2247]])
x shape: torch.Size([4, 4])

x.view(16,1) shape: torch.Size([16, 1])
x.reshape(16,1) shape: torch.Size([16, 1])
numel: 16


In [30]:
## Section 2 — Phần 2: Autograd (Task 2.1)
# Tạo tensor require gradient
x = torch.ones(1, requires_grad=True)
print('x:', x)

# Phép toán
y = x + 2
print('y:', y)
print('grad_fn của y:', y.grad_fn)

z = y * y * 3
print('z:', z)

# backward 1 lần
z.backward()
print('x.grad sau backward():', x.grad)

# Gọi backward lần nữa sẽ cộng dồn gradient (accumulation)
try:
    z.backward()
    print('x.grad sau backward() lần 2:', x.grad)
except Exception as e:
    print('Gặp lỗi khi gọi backward lần 2:', e)

# Reset gradient
x.grad.zero_()
print('x.grad sau x.grad.zero_():', x.grad)


x: tensor([1.], requires_grad=True)
y: tensor([3.], grad_fn=<AddBackward0>)
grad_fn của y: <AddBackward0 object at 0x00000197E50A0B50>
z: tensor([27.], grad_fn=<MulBackward0>)
x.grad sau backward(): tensor([18.])
Gặp lỗi khi gọi backward lần 2: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.
x.grad sau x.grad.zero_(): tensor([0.])


In [31]:
## Section 3 — torch.nn: Linear and Embedding (Task 3.1 & 3.2)
# nn.Linear demo
linear_layer = nn.Linear(in_features=5, out_features=2)
input_tensor = torch.randn(3,5)
output = linear_layer(input_tensor)
print('Input shape:', input_tensor.shape)
print('Output shape:', output.shape)
print('Weights shape:', linear_layer.weight.shape)
print('Bias shape:', linear_layer.bias.shape)

# Compute a simple loss and backward to see gradient
loss = output.sum()
loss.backward()
print('\nGradients for linear.weight (sum of grads):', linear_layer.weight.grad.abs().sum())

# Reset grads
linear_layer.zero_grad()

# nn.Embedding demo
embedding_layer = nn.Embedding(num_embeddings=10, embedding_dim=3)
input_indices = torch.LongTensor([1, 5, 0, 8])
embeddings = embedding_layer(input_indices)
print('\nEmbedding input shape:', input_indices.shape)
print('Embeddings shape:', embeddings.shape)

# Loss on embeddings
emb_loss = embeddings.sum()
emb_loss.backward()
print('Embedding weight grad (partial sum):', embedding_layer.weight.grad.abs().sum())


Input shape: torch.Size([3, 5])
Output shape: torch.Size([3, 2])
Weights shape: torch.Size([2, 5])
Bias shape: torch.Size([2])

Gradients for linear.weight (sum of grads): tensor(5.5789)

Embedding input shape: torch.Size([4])
Embeddings shape: torch.Size([4, 3])
Embedding weight grad (partial sum): tensor(12.)


In [32]:
## Task 3.3 — Kết hợp thành một nn.Module (MyFirstModel)
from torch import nn

class MyFirstModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(MyFirstModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.linear = nn.Linear(embedding_dim, hidden_dim)
        self.activation = nn.ReLU()
        self.output_layer = nn.Linear(hidden_dim, output_dim)

    def forward(self, indices):
        embeds = self.embedding(indices)
        hidden = self.activation(self.linear(embeds))
        output = self.output_layer(hidden)
        return output

model = MyFirstModel(vocab_size=100, embedding_dim=16, hidden_dim=8, output_dim=2)
input_data = torch.LongTensor([[1, 2, 5, 9]])
output_data = model(input_data)
print('Model output shape:', output_data.shape)

# Unit test-like assertion
assert output_data.shape == (1, 4, 2)
print('Forward test passed: output shape correct')


Model output shape: torch.Size([1, 4, 2])
Forward test passed: output shape correct


In [33]:
## Section 4 — RNN cho Token Classification: Định nghĩa mô hình
class SimpleRNNForTokenClassification(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_tags):
        super(SimpleRNNForTokenClassification, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
        self.linear = nn.Linear(hidden_dim, num_tags)

    def forward(self, sentence):
        embeds = self.embedding(sentence)
        rnn_out, _ = self.rnn(embeds)
        tag_scores = self.linear(rnn_out)
        return tag_scores

# Create toy model and inputs
vocab_size = 20
embedding_dim = 8
hidden_dim = 6
num_tags = 5
model_rnn = SimpleRNNForTokenClassification(vocab_size, embedding_dim, hidden_dim, num_tags)

# toy batch: batch_size=2, seq_len=4
toy_input = torch.LongTensor([[1,2,3,4],[2,3,4,0]])
outs = model_rnn(toy_input)
print('tag_scores shape:', outs.shape)  # (batch_size, seq_len, num_tags)


tag_scores shape: torch.Size([2, 4, 5])


In [34]:
## Section 4 (continued) — Huấn luyện mô hình RNN trên dữ liệu giả (toy data)
# Create toy dataset (random) for token classification
vocab_size = 30
seq_len = 6
batch_size = 16
num_tags = 4

# random inputs and random labels
X = torch.randint(low=0, high=vocab_size, size=(batch_size, seq_len))
Y = torch.randint(low=0, high=num_tags, size=(batch_size, seq_len))

rnn_model = SimpleRNNForTokenClassification(vocab_size, embedding_dim=8, hidden_dim=16, num_tags=num_tags)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn_model.parameters(), lr=0.01)

# Training loop (few epochs)
for epoch in range(6):
    optimizer.zero_grad()
    outputs = rnn_model(X)  # (batch, seq_len, num_tags)
    # reshape to (batch*seq_len, num_tags)
    outputs_flat = outputs.view(-1, num_tags)
    labels_flat = Y.view(-1)
    loss = criterion(outputs_flat, labels_flat)
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch+1}, loss: {loss.item():.4f}')

# Simple predict on X[:2]
with torch.no_grad():
    preds = rnn_model(X[:2]).argmax(dim=-1)
    print('Preds shape:', preds.shape)
    print('Preds (first 2):\n', preds)


Epoch 1, loss: 1.4318
Epoch 2, loss: 1.3943
Epoch 3, loss: 1.3624
Epoch 4, loss: 1.3346
Epoch 5, loss: 1.3093
Epoch 6, loss: 1.2855
Preds shape: torch.Size([2, 6])
Preds (first 2):
 tensor([[0, 0, 3, 2, 0, 2],
        [3, 0, 3, 0, 3, 3]])


In [35]:
## Section 5 — So sánh RNN / LSTM / GRU (ví dụ outputs & hidden states)
rnn = nn.RNN(8, 10, batch_first=True)
lstm = nn.LSTM(8, 10, batch_first=True)
gru = nn.GRU(8, 10, batch_first=True)

x = torch.randn(2, 5, 8)

r_out, r_h = rnn(x)
print('RNN out shape:', r_out.shape, 'hidden shape:', r_h.shape)

l_out, (l_h, l_c) = lstm(x)
print('LSTM out shape:', l_out.shape, 'hidden shape:', l_h.shape, 'cell shape:', l_c.shape)

g_out, g_h = gru(x)
print('GRU out shape:', g_out.shape, 'hidden shape:', g_h.shape)


RNN out shape: torch.Size([2, 5, 10]) hidden shape: torch.Size([1, 2, 10])
LSTM out shape: torch.Size([2, 5, 10]) hidden shape: torch.Size([1, 2, 10]) cell shape: torch.Size([1, 2, 10])
GRU out shape: torch.Size([2, 5, 10]) hidden shape: torch.Size([1, 2, 10])


In [36]:
## Section 6 — Tiện ích và kiểm thử nhỏ
# small utilities
def to_device(tensor_or_model, device):
    try:
        return tensor_or_model.to(device)
    except Exception:
        # If it's a model
        for p in tensor_or_model.parameters():
            p.data = p.data.to(device)
        return tensor_or_model

# set_seed already defined earlier

# Small asserts / tests
# NumPy <-> Tensor
arr = np.array([1.0,2.0,3.0])
T = torch.from_numpy(arr)
assert isinstance(T, torch.Tensor)

# device assignment
_t = torch.tensor([1.0])
_t = _t.to(device)
assert _t.device == device

# model output size test
m = MyFirstModel(vocab_size=50, embedding_dim=4, hidden_dim=6, output_dim=3)
out = m(torch.LongTensor([[1,2,3]]))
assert out.shape == (1, 3, 3)
print('Small checks passed')


Small checks passed
