In [None]:
import numpy as np

# 1. Preprocessing Data
text = "the cat sat on the mat the cat lay on the bed"
words = list(set(text.split()))
word_to_idx = {word: i for i, word in enumerate(words)}
idx_to_word = {i: word for i, word in enumerate(words)}
vocab_size = len(words)

print("Vocabulary:", word_to_idx)
print("Index:", idx_to_word)
# Fungsi untuk mengubah teks menjadi urutan indeks
def text_to_indices(text):
    return [word_to_idx[word] for word in text.split()]

input_indices = text_to_indices(text)
print("Input Indices:", input_indices)

In [None]:
# 2. Parameter Model
embedding_dim = 5   # Dimensi embedding
hidden_dim = 10     # Dimensi hidden layer
seq_length = 4      # Panjang sequence untuk RNN
learning_rate = 0.01
n_epochs = 2

In [None]:
# 3. Inisialisasi Bobot
np.random.seed(42)
W = np.random.randn(embedding_dim, hidden_dim)
U = np.random.randn(hidden_dim, hidden_dim)
V = np.random.randn(hidden_dim, vocab_size)

# Membuat embedding matriks
E = np.random.randn(vocab_size, embedding_dim)

def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum(axis=0)

def cross_entropy_loss(pred, target):
    return -np.log(pred[target] + 1e-9)

# Fungsi untuk one-hot encoding
def one_hot_encoding(word_index, vocab_size):
    one_hot = np.zeros(vocab_size)
    one_hot[word_index] = 1
    return one_hot

In [None]:
# 4. Membuat Dataset Input dan Target
def get_data(input_indices, seq_length):
    X, Y = [], []
    for i in range(len(input_indices) - seq_length):
        X.append(input_indices[i:i + seq_length])
        Y.append(input_indices[i + seq_length])
    return np.array(X), np.array(Y)

X, Y = get_data(input_indices, seq_length)
print("Training Data (X):", X)
print("Training Labels (Y):", Y)

In [None]:
# 5. Implementasi RNN
def forward(inputs, hidden_state):
    # Inisialisasi hidden state
    for t in range(len(inputs)):
        x_t = E[inputs[t]]  # Mendapatkan embedding untuk kata
        hidden_state = np.tanh(np.dot(x_t, W) + np.dot(hidden_state, U))  # Rumus hidden state
    output = np.dot(hidden_state, V)  # Output ke ruang kosakata
    y_pred = softmax(output)
    return y_pred, hidden_state

In [None]:
# 6. Pelatihan Model
hidden_state = np.zeros(hidden_dim)

for epoch in range(n_epochs):
    total_loss = 0
    for i in range(len(X)):
        inputs = X[i]
        target = Y[i]

        # Forward pass
        y_pred, hidden_state = forward(inputs, hidden_state)
        #cek hasil
        predicted_idx = np.argmax(y_pred)
        predicted_word = idx_to_word[predicted_idx]
        print(f"Next word: {predicted_word}")

        # Menghitung loss
        loss = cross_entropy_loss(y_pred, target)
        total_loss += loss

        # Backpropagation (perhitungan gradien secara manual)
        dL_dV = np.outer(hidden_state, (y_pred - one_hot_encoding(target, vocab_size)))
        dL_dh = np.dot(V, (y_pred - one_hot_encoding(target, vocab_size)))

        dL_dU = np.zeros_like(U)
        dL_dW = np.zeros_like(W)

        # Gradient descent update untuk V
        V -= learning_rate * dL_dV

        # Backpropagation melalui waktu (BPTT)
        for t in reversed(range(len(inputs))):
            x_t = E[inputs[t]]
            dL_dh_raw = dL_dh * (1 - hidden_state ** 2)
            dL_dU += np.outer(hidden_state, dL_dh_raw)
            dL_dW += np.outer(x_t, dL_dh_raw)
            dL_dh = np.dot(U, dL_dh_raw)

        # Update bobot
        U -= learning_rate * dL_dU
        W -= learning_rate * dL_dW

    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch + 1}/{n_epochs}, Loss: {total_loss / len(X):.4f}")

In [None]:
# 7. Prediksi Kata Berikutnya
def predict_next_word(model_input, n_predictions=3):
    hidden_state = np.zeros(hidden_dim)
    for _ in range(n_predictions):
        y_pred, hidden_state = forward(model_input, hidden_state)
        predicted_idx = np.argmax(y_pred)
        predicted_word = idx_to_word[predicted_idx]
        print(f"Next word: {predicted_word}")
        model_input = np.append(model_input[1:], predicted_idx)

In [None]:
# 8. Tes Prediksi
print("\nPrediksi Kata Berikutnya:")
input_text = "the cat sat on"
input_indices = text_to_indices(input_text)
predict_next_word(input_indices[-seq_length:])