In [None]:
import os
import torch
import random
import numpy as np
from transformers import BertTokenizer, BertModel, BertConfig

# Fungsi untuk memuat model yang sudah dilatih
def load_model(model, load_model_name="model", load_model_dir="E:/code/project-list/bert-hfacs/models/model_trained/"):
    # Pastikan direktori ada, jika tidak buat
    os.makedirs(load_model_dir, exist_ok=True)

    # Gabungkan nama model dengan ekstensi .pth
    load_model_name_with_extension = load_model_name + ".pth"

    # Gabungkan direktori dan nama file model
    load_path = os.path.join(load_model_dir, load_model_name_with_extension)

    # Memuat model state_dict
    model.load_state_dict(
        torch.load(
            load_path,
            map_location=torch.device('cpu'),  # Ganti ke 'cuda' jika menggunakan GPU
        )
    )
    print("Model Weight Loaded")
    return model

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

# Seed for CUDA
set_seed(1)

torch.set_printoptions(profile="full")

In [18]:
# 1. Load tokenizer dan model IndoBERT
tokenizer = BertTokenizer.from_pretrained("E:/code/project-list/bert-hfacs/models/indobert_base")
model = BertModel.from_pretrained("E:/code/project-list/bert-hfacs/models/indobert_base")

In [None]:
# 1. Buat config dengan arsitektur paling sederhana
config = BertConfig(
    hidden_size=4,  # Ukuran hidden layer, harus sama denan intermediate size
    num_attention_heads=1,  # 1 attention head
    num_hidden_layers=1,  # 1 layer
    intermediate_size=4,  # Ukuran layer feed-forward (sederhana)
    vocab_size=30522,  # Ukuran vocabulary standar BERT
    max_position_embeddings=512,  # Ukuran input yang bisa diterima model
)

In [19]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
model = BertModel(config)

In [20]:
# 2. Contoh input teks
input_text = "supir mobil kantuk"
tokens = tokenizer(input_text, return_tensors="pt")

In [21]:
torch.set_printoptions(profile="full")

In [22]:
tokenizer.decode(tokens["input_ids"][0])

'[CLS] supir mobil kantuk [SEP]'

In [23]:
print(model)

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 4, padding_idx=0)
    (position_embeddings): Embedding(512, 4)
    (token_type_embeddings): Embedding(2, 4)
    (LayerNorm): LayerNorm((4,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=4, out_features=4, bias=True)
            (key): Linear(in_features=4, out_features=4, bias=True)
            (value): Linear(in_features=4, out_features=4, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=4, out_features=4, bias=True)
            (LayerNorm): LayerNorm((4,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (i

In [24]:
tokens

{'input_ids': tensor([[    2, 14884,   895, 27395,     3]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1]])}

In [25]:
input_embeddings = model.embeddings(tokens['input_ids'])
input_embeddings

tensor([[[-1.6163,  0.9201,  1.1345, -0.4383],
         [-1.2845,  1.7137, -0.5747,  0.1455],
         [-1.8702,  0.7533,  0.9117,  0.2052],
         [-1.6609,  0.7850,  1.2060, -0.3302],
         [ 0.5764, -0.0000,  1.5312, -1.1957]]], grad_fn=<MulBackward0>)

In [26]:
print(input_embeddings.shape)

torch.Size([1, 5, 4])


In [27]:
# 4. Mengakses bobot Q, K, V pada layer pertama dan mencetak bobot awal sebelum update
layer_index = 0
query_weights = model.encoder.layer[layer_index].attention.self.query.weight
key_weights = model.encoder.layer[layer_index].attention.self.key.weight
value_weights = model.encoder.layer[layer_index].attention.self.value.weight

print("Bobot awal Query (sebelum update):", query_weights)
print("Bobot awal Key (sebelum update):", key_weights)
print("Bobot awal Value (sebelum update):", value_weights)

Bobot awal Query (sebelum update): Parameter containing:
tensor([[-0.0271,  0.0188, -0.0045, -0.0036],
        [ 0.0110,  0.0145, -0.0327,  0.0227],
        [-0.0190,  0.0167,  0.0314, -0.0204],
        [ 0.0054, -0.0004, -0.0021, -0.0029]], requires_grad=True)
Bobot awal Key (sebelum update): Parameter containing:
tensor([[ 0.0063,  0.0292,  0.0430, -0.0197],
        [ 0.0190,  0.0304,  0.0208, -0.0135],
        [ 0.0076, -0.0022,  0.0024,  0.0209],
        [-0.0273,  0.0289,  0.0046, -0.0183]], requires_grad=True)
Bobot awal Value (sebelum update): Parameter containing:
tensor([[-0.0131,  0.0152, -0.0041, -0.0043],
        [-0.0060, -0.0177, -0.0194, -0.0141],
        [ 0.0275, -0.0086, -0.0065, -0.0487],
        [ 0.0359, -0.0056,  0.0079,  0.0053]], requires_grad=True)


In [28]:
query_bias = model.encoder.layer[layer_index].attention.self.query.bias
key_bias = model.encoder.layer[layer_index].attention.self.key.bias
value_bias = model.encoder.layer[layer_index].attention.self.value.bias

In [29]:
print("Bobot awal Query (sebelum update):", query_bias)
print("Bobot awal Key (sebelum update):", key_bias)
print("Bobot awal Value (sebelum update):", value_bias)

Bobot awal Query (sebelum update): Parameter containing:
tensor([0., 0., 0., 0.], requires_grad=True)
Bobot awal Key (sebelum update): Parameter containing:
tensor([0., 0., 0., 0.], requires_grad=True)
Bobot awal Value (sebelum update): Parameter containing:
tensor([0., 0., 0., 0.], requires_grad=True)


In [30]:
print(query_weights.shape)

torch.Size([4, 4])


In [31]:
# 5. Menghitung nilai Q, K, V untuk setiap kata dalam input embeddings
query = torch.matmul(input_embeddings, query_weights.T)
key = torch.matmul(input_embeddings, key_weights.T)
value = torch.matmul(input_embeddings, value_weights.T)

In [32]:
print("Nilai value:", query)
print("Nilai value:", key)
print("Nilai value:", value)

Nilai value: tensor([[[ 0.0576, -0.0515,  0.0906, -0.0103],
         [ 0.0692,  0.0329,  0.0319, -0.0069],
         [ 0.0601, -0.0348,  0.0725, -0.0130],
         [ 0.0556, -0.0538,  0.0892, -0.0109],
         [-0.0182, -0.0709,  0.0616,  0.0034]]], grad_fn=<UnsafeViewBackward0>)
Nilai value: tensor([[[ 0.0741,  0.0267, -0.0207,  0.0841],
         [ 0.0143,  0.0137, -0.0118,  0.0794],
         [ 0.0454,  0.0035, -0.0093,  0.0734],
         [ 0.0708,  0.0218, -0.0183,  0.0798],
         [ 0.0931,  0.0589, -0.0169,  0.0132]]], grad_fn=<UnsafeViewBackward0>)
Nilai value: tensor([[[ 0.0324, -0.0223, -0.0384, -0.0566],
         [ 0.0447, -0.0134, -0.0534, -0.0595],
         [ 0.0314, -0.0226, -0.0738, -0.0631],
         [ 0.0302, -0.0226, -0.0442, -0.0563],
         [-0.0088, -0.0163,  0.0641,  0.0265]]], grad_fn=<UnsafeViewBackward0>)


In [None]:
# 6. Menghitung skor attention antara setiap pasangan kata
d_k = query.size(-1)
attention_scores = torch.matmul(query, key.transpose(-1, -2)) / torch.sqrt(torch.tensor(d_k, dtype=torch.float32))


In [34]:
print(d_k)
print(attention_scores)

4
tensor([[[ 7.9609e-05, -8.8093e-04,  4.1720e-04,  2.4197e-04,  3.3015e-04],
         [ 2.3832e-03,  2.6061e-04,  1.2257e-03,  2.2419e-03,  3.8722e-03],
         [ 4.6556e-04, -7.4992e-04,  4.8663e-04,  5.6720e-04,  1.0701e-03],
         [-4.0757e-05, -9.2945e-04,  3.4909e-04,  1.3050e-04,  1.7330e-04],
         [-2.1183e-03, -8.4624e-04, -7.0224e-04, -1.8478e-03, -3.4384e-03]]],
       grad_fn=<DivBackward0>)


In [None]:
# 7. Normalisasi softmax pada skor attention 
attention_probs = F.softmax(attention_scores, dim=-1)

In [36]:
print(attention_probs)

tensor([[[0.2000, 0.1998, 0.2001, 0.2000, 0.2001],
         [0.2001, 0.1997, 0.1998, 0.2000, 0.2004],
         [0.2000, 0.1998, 0.2000, 0.2000, 0.2001],
         [0.2000, 0.1998, 0.2001, 0.2000, 0.2000],
         [0.1999, 0.2002, 0.2002, 0.2000, 0.1997]]],
       grad_fn=<SoftmaxBackward0>)


In [37]:
# 8. Hitung representasi konteksual untuk setiap kata berdasarkan attention weights
self_attention = torch.matmul(attention_probs, value)

In [None]:
print(self_attention)

tensor([[[ 0.0260, -0.0195, -0.0291, -0.0418],
         [ 0.0260, -0.0194, -0.0291, -0.0418],
         [ 0.0260, -0.0195, -0.0291, -0.0418],
         [ 0.0260, -0.0195, -0.0291, -0.0418],
         [ 0.0260, -0.0194, -0.0292, -0.0418]]], grad_fn=<UnsafeViewBackward0>)


In [38]:
# 9. Feed-forward processing dalam layer encoder (berulang sesuai layer model IndoBERT)
layer_output = model.encoder.layer[layer_index].output.dense(self_attention)
print(layer_output)

tensor([[[ 0.0017,  0.0011, -0.0017, -0.0001],
         [ 0.0017,  0.0011, -0.0017, -0.0001],
         [ 0.0017,  0.0011, -0.0017, -0.0001],
         [ 0.0017,  0.0011, -0.0017, -0.0001],
         [ 0.0017,  0.0012, -0.0017, -0.0001]]], grad_fn=<ViewBackward0>)


In [39]:
layer_index = 0  # Contoh untuk layer pertama
layer_output_weights = model.encoder.layer[layer_index].output.dense.weight
layer_output_bias = model.encoder.layer[layer_index].output.dense.bias

print("Layer Output Weights:", layer_output_weights)
print("Layer Output Bias:", layer_output_bias)

Layer Output Weights: Parameter containing:
tensor([[-0.0343, -0.0583, -0.0181, -0.0213],
        [-0.0288,  0.0012, -0.0244, -0.0290],
        [-0.0044, -0.0004,  0.0199,  0.0236],
        [ 0.0125, -0.0260,  0.0119,  0.0142]], requires_grad=True)
Layer Output Bias: Parameter containing:
tensor([0., 0., 0., 0.], requires_grad=True)


In [40]:
# 10. Mengambil representasi dari token [CLS] untuk klasifikasi
cls_representation = model.pooler.dense(layer_output[:, 0, :])

In [41]:
# 11. Layer klasifikasi sederhana dengan 2 kelas
classification_layer = nn.Linear(model.config.hidden_size, 2)
logits = classification_layer(cls_representation)
predictions = torch.argmax(logits, dim=-1)  # Mengambil prediksi kelas

In [42]:
classification_weights = classification_layer.weight
classification_bias = classification_layer.bias

print("Classification Layer Weights:", classification_weights)
print("Classification Layer Bias:", classification_bias)

Classification Layer Weights: Parameter containing:
tensor([[ 0.2441,  0.0687,  0.4924,  0.0422],
        [-0.1222,  0.1192,  0.4068, -0.3231]], requires_grad=True)
Classification Layer Bias: Parameter containing:
tensor([0.1400, 0.2900], requires_grad=True)


In [43]:
print(logits)

tensor([[0.1400, 0.2901]], grad_fn=<AddmmBackward0>)


In [44]:
print(predictions)

tensor([1])


In [45]:
# 12. Definisikan label target untuk contoh ini
labels = torch.tensor([1])  # Misalnya label kelas positif, karena mengantuk termasuk PRE

In [46]:
# 13. Hitung loss
criterion = nn.CrossEntropyLoss()
loss = criterion(logits, labels)

In [None]:
# 15. Optimizer (misalnya Adam)
optimizer = optim.Adam(model.parameters(), lr=1e-1)

In [None]:
optimizer.zero_grad()

In [47]:
# 14. Backward pass untuk menghitung gradien
loss.backward()

In [None]:
# 16. Logging Gradient
for name, param in model.named_parameters():
    if param.grad is not None:
        print(f"Gradien {name}: {param.grad}")

In [49]:
# 16. Logging bobot Q, K, V sebelum update
for name, param in model.named_parameters():
    if "encoder.layer.0.attention.self.query.weight" in name:
        print("Bobot Query sebelum update:", param.data)
    if "encoder.layer.0.attention.self.key.weight" in name:
        print("Bobot Key sebelum update:", param.data)
    if "encoder.layer.0.attention.self.value.weight" in name:
        print("Bobot Value sebelum update:", param.data)

Bobot Query sebelum update: tensor([[-0.0271,  0.0188, -0.0045, -0.0036],
        [ 0.0110,  0.0145, -0.0327,  0.0227],
        [-0.0190,  0.0167,  0.0314, -0.0204],
        [ 0.0054, -0.0004, -0.0021, -0.0029]])
Bobot Key sebelum update: tensor([[ 0.0063,  0.0292,  0.0430, -0.0197],
        [ 0.0190,  0.0304,  0.0208, -0.0135],
        [ 0.0076, -0.0022,  0.0024,  0.0209],
        [-0.0273,  0.0289,  0.0046, -0.0183]])
Bobot Value sebelum update: tensor([[-0.0131,  0.0152, -0.0041, -0.0043],
        [-0.0060, -0.0177, -0.0194, -0.0141],
        [ 0.0275, -0.0086, -0.0065, -0.0487],
        [ 0.0359, -0.0056,  0.0079,  0.0053]])


In [50]:
# 17. Update bobot
optimizer.step()

In [51]:
# 18. Logging bobot Q, K, V setelah update
for name, param in model.named_parameters():
    if "encoder.layer.0.attention.self.query.weight" in name:
        print("Bobot Query setelah update:", param.data)
    if "encoder.layer.0.attention.self.key.weight" in name:
        print("Bobot Key setelah update:", param.data)
    if "encoder.layer.0.attention.self.value.weight" in name:
        print("Bobot Value setelah update:", param.data)

Bobot Query setelah update: tensor([[-0.1215,  0.1093,  0.0876, -0.0855],
        [-0.0830,  0.1043,  0.0589, -0.0581],
        [ 0.0393, -0.0276, -0.0181,  0.0071],
        [ 0.1009, -0.0928, -0.0958,  0.0823]])
Bobot Key setelah update: tensor([[ 0.1026, -0.0635,  0.1353, -0.1132],
        [-0.0768,  0.1223, -0.0706,  0.0794],
        [ 0.1052, -0.0974,  0.0973, -0.0749],
        [-0.1096,  0.0983, -0.0634,  0.0538]])
Bobot Value setelah update: tensor([[-0.1131,  0.1152,  0.0959, -0.1042],
        [ 0.0940, -0.1177, -0.1194,  0.0859],
        [-0.0725,  0.0914,  0.0935, -0.1487],
        [-0.0641,  0.0944,  0.1079, -0.0947]])


In [None]:
layer_index = 0 
updated_layer_output_weights = model.encoder.layer[layer_index].output.dense.weight
updated_layer_output_bias = model.encoder.layer[layer_index].output.dense.bias

print("Updated Layer Output Weights:", updated_layer_output_weights)
print("Updated Layer Output Bias:", updated_layer_output_bias)

Updated Layer Output Weights: Parameter containing:
tensor([[ 0.0657, -0.1583, -0.1181, -0.1213],
        [-0.1288,  0.1012,  0.0756,  0.0710],
        [-0.1040,  0.0992,  0.1196,  0.1234],
        [ 0.1125, -0.1260, -0.0881, -0.0858]], requires_grad=True)
Updated Layer Output Bias: Parameter containing:
tensor([ 0.1000, -0.1000, -0.1000,  0.1000], requires_grad=True)


In [53]:
updated_classification_weights = classification_layer.weight
updated_classification_bias = classification_layer.bias

print("Updated Classification Layer Weights:", updated_classification_weights)
print("Updated Classification Layer Bias:", updated_classification_bias)

Updated Classification Layer Weights: Parameter containing:
tensor([[ 0.2441,  0.0687,  0.4924,  0.0422],
        [-0.1222,  0.1192,  0.4068, -0.3231]], requires_grad=True)
Updated Classification Layer Bias: Parameter containing:
tensor([0.1400, 0.2900], requires_grad=True)
