In [None]:
import numpy as np

# 1. Data dan Preprocessing
# Contoh data kalimat dan label (part-of-speech tagging)
sentences = [
    "the cat sat on the mat",
    "the dog lay on the bed",
    "the bird flew to the tree"
]

labels = [
    "DET NOUN VERB PREP DET NOUN",
    "DET NOUN VERB PREP DET NOUN",
    "DET NOUN VERB PREP DET NOUN"
]

# Membuat daftar kata unik (vocabulary) dan tag unik
words = list(set(" ".join(sentences).split()))
tags = list(set(" ".join(labels).split()))

# Mapping kata dan tag ke indeks
word_to_idx = {word: i for i, word in enumerate(words)}
idx_to_word = {i: word for i, word in enumerate(words)}
tag_to_idx = {tag: i for i, tag in enumerate(tags)}
idx_to_tag = {i: tag for i, tag in enumerate(tags)}

vocab_size = len(words)
tag_size = len(tags)

print("Vocabulary:", word_to_idx)
print("Tags:", tag_to_idx)

In [None]:
# Fungsi untuk mengubah teks menjadi urutan indeks
def text_to_indices(text, word_map):
    return [word_map[word] for word in text.split()]

def labels_to_indices(label, tag_map):
    return [tag_map[tag] for tag in label.split()]

# Konversi kalimat dan label menjadi urutan indeks
X_data = [text_to_indices(sentence, word_to_idx) for sentence in sentences]
Y_data = [labels_to_indices(label, tag_to_idx) for label in labels]

# Parameter model
embedding_dim = 5   # Dimensi embedding
hidden_dim = 10     # Dimensi hidden layer
learning_rate = 0.01
n_epochs = 200

In [None]:
# 2. Inisialisasi Bobot
np.random.seed(42)
W = np.random.randn(embedding_dim, hidden_dim)
U = np.random.randn(hidden_dim, hidden_dim)
V = np.random.randn(hidden_dim, tag_size)

# Membuat embedding matriks
E = np.random.randn(vocab_size, embedding_dim)

def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum(axis=0)

def cross_entropy_loss(pred, target):
    return -np.log(pred[target] + 1e-9)

# Fungsi untuk one-hot encoding
def one_hot_encoding(index, size):
    one_hot = np.zeros(size)
    one_hot[index] = 1
    return one_hot

In [None]:
# 3. RNN Forward Pass dan Backpropagation
def forward(inputs):
    hidden_state = np.zeros(hidden_dim)
    outputs = []
    hidden_states = []
    for t in range(len(inputs)):
        x_t = E[inputs[t]]
        hidden_state = np.tanh(np.dot(x_t, W) + np.dot(hidden_state, U))
        output = np.dot(hidden_state, V)
        y_pred = softmax(output)
        outputs.append(y_pred)
        hidden_states.append(hidden_state)
    return outputs, hidden_states

In [None]:
# 4. Pelatihan Model
for epoch in range(n_epochs):
    total_loss = 0
    for i in range(len(X_data)):
        inputs = X_data[i]
        targets = Y_data[i]

        # Forward pass
        y_preds, hidden_states = forward(inputs)

        # Backpropagation (perhitungan gradien secara manual)
        dL_dV = np.zeros_like(V)
        dL_dU = np.zeros_like(U)
        dL_dW = np.zeros_like(W)
        dL_dh = np.zeros(hidden_dim)

        for t in reversed(range(len(inputs))):
            y_pred = y_preds[t]
            target = targets[t]
            hidden_state = hidden_states[t]

            # Gradien output layer V
            error = y_pred - one_hot_encoding(target, tag_size)
            dL_dV += np.outer(hidden_state, error)

            # Gradien terhadap hidden state
            dL_dh = np.dot(V, error) * (1 - hidden_state ** 2)

            # Backpropagate ke U dan W
            x_t = E[inputs[t]]
            dL_dU += np.outer(hidden_states[t - 1] if t > 0 else np.zeros(hidden_dim), dL_dh)
            dL_dW += np.outer(x_t, dL_dh)

        # Update bobot
        V -= learning_rate * dL_dV
        U -= learning_rate * dL_dU
        W -= learning_rate * dL_dW

    if (epoch + 1) % 20 == 0:
        print(f"Epoch {epoch + 1}/{n_epochs}, Loss: {total_loss / len(X_data):.4f}")

In [None]:
# 5. Prediksi Label untuk Kalimat
def predict_labels(sentence):
    inputs = text_to_indices(sentence, word_to_idx)
    y_preds, _ = forward(inputs)
    predicted_tags = [idx_to_tag[np.argmax(y_pred)] for y_pred in y_preds]
    return predicted_tags

# Tes Prediksi
test_sentence = "the cat sat on"
print(f"\nInput: '{test_sentence}'")
predicted_tags = predict_labels(test_sentence)
print("Predicted Tags:", predicted_tags)