<a href="https://colab.research.google.com/github/karankumawat0/Next-Word-Prediction/blob/main/next_word_prediction_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np


In [15]:
# Sample dataset
data = "hello world hello machine hello learning hello AI My name karan kumawat"

# Tokenization
words = list(set(data.split()))
word_to_idx = {w: i for i, w in enumerate(words)}
idx_to_word = {i: w for i, w in enumerate(words)}

# Hyperparameters
vocab_size = len(words)
hidden_size = 16   # Number of hidden units
sequence_length = 2  # Length of input sequence
learning_rate = 0.01

# Preparing training data
def create_sequences(data, seq_length):
    tokens = data.split()
    X, Y = [], []
    for i in range(len(tokens) - seq_length):
        X.append([word_to_idx[word] for word in tokens[i:i+seq_length]])
        Y.append(word_to_idx[tokens[i+seq_length]])
    return np.array(X), np.array(Y)

X, Y = create_sequences(data, sequence_length)


In [16]:
# Weights
Wxh = np.random.randn(hidden_size, vocab_size) * 0.01  # Input to hidden
Whh = np.random.randn(hidden_size, hidden_size) * 0.01  # Hidden to hidden
Why = np.random.randn(vocab_size, hidden_size) * 0.01  # Hidden to output

# Biases
bh = np.zeros((hidden_size, 1))
by = np.zeros((vocab_size, 1))


In [17]:
def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / np.sum(exp_x)


In [18]:
# Training the RNN
for epoch in range(500):
    total_loss = 0
    for i in range(len(X)):
        # Inputs and Targets
        inputs = X[i]
        target = Y[i]

        # One-hot encoding for inputs
        x_encoded = np.zeros((vocab_size, sequence_length))
        for t, idx in enumerate(inputs):
            x_encoded[idx, t] = 1

        # Forward Pass
        hs = np.zeros((hidden_size, sequence_length + 1))  # Hidden states
        for t in range(sequence_length):
            hs[:, t+1] = np.tanh(np.dot(Wxh, x_encoded[:, t]) + np.dot(Whh, hs[:, t]) + bh.flatten())

        # Output layer
        y_hat = softmax(np.dot(Why, hs[:, -1]) + by.flatten())  # Shape: (vocab_size,)

        # Loss (Cross-Entropy)
        loss = -np.log(y_hat[target])
        total_loss += loss

        # Backpropagation
        dy = y_hat
        dy[target] -= 1  # Gradient of softmax + cross-entropy

        # Reshape dy to (vocab_size, 1) for compatibility
        dy = dy.reshape(-1, 1)

        # Gradients for output layer
        dWhy = np.dot(dy, hs[:, -1].reshape(1, -1))  # Shape: (vocab_size, hidden_size)
        dby = dy                                    # Shape: (vocab_size, 1)

        # Gradients for hidden layer
        dh = np.dot(Why.T, dy)                      # Shape: (hidden_size, 1)
        dWxh = np.zeros_like(Wxh)
        dWhh = np.zeros_like(Whh)
        dbh = np.zeros_like(bh)

        for t in reversed(range(sequence_length)):
            dtanh = (1 - hs[:, t+1] ** 2) * dh.flatten()   # Shape: (hidden_size,)

            # Ensure dtanh is 1D for outer products
            dtanh = dtanh.flatten()  # Shape: (hidden_size,)

            # Gradients for Wxh, Whh, and bh
            dWxh += np.outer(dtanh, x_encoded[:, t])       # Shape: (hidden_size, vocab_size)
            dWhh += np.outer(dtanh, hs[:, t])              # Shape: (hidden_size, hidden_size)
            dbh += dtanh.reshape(-1, 1)                    # Shape: (hidden_size, 1)

            # Update dh for the next time step
            dh = np.dot(Whh.T, dtanh.reshape(-1, 1))       # Shape: (hidden_size, 1)

        # Gradient Descent Update
        Wxh -= learning_rate * dWxh
        Whh -= learning_rate * dWhh
        Why -= learning_rate * dWhy
        bh  -= learning_rate * dbh
        by  -= learning_rate * dby  # Shapes match (vocab_size, 1)

    # Monitoring the Loss
    if epoch % 50 == 0:
        print(f'Epoch {epoch}, Loss: {total_loss:.4f}')


Epoch 0, Loss: 21.9946
Epoch 50, Loss: 20.6020
Epoch 100, Loss: 20.1537
Epoch 150, Loss: 19.3572
Epoch 200, Loss: 15.6377
Epoch 250, Loss: 12.3195
Epoch 300, Loss: 9.7868
Epoch 350, Loss: 7.7196
Epoch 400, Loss: 5.9411
Epoch 450, Loss: 4.7631


In [24]:
def predict_next_word(sequence):
    tokens = sequence.split()
    if len(tokens) != sequence_length:
        print(f"Please enter a {sequence_length}-word sequence.")
        return

    # Encoding the input sequence
    x_encoded = np.zeros((vocab_size, sequence_length))
    for t, word in enumerate(tokens):
        if word not in word_to_idx:
            print(f"Word '{word}' not in vocabulary.")
            return
        x_encoded[word_to_idx[word], t] = 1

    # Forward Pass
    hs = np.zeros((hidden_size, sequence_length + 1))
    for t in range(sequence_length):
        hs[:, t+1] = np.tanh(np.dot(Wxh, x_encoded[:, t]) + np.dot(Whh, hs[:, t]) + bh.flatten())

    # Prediction
    y_hat = softmax(np.dot(Why, hs[:, -1]) + by.flatten())
    predicted_idx = np.argmax(y_hat)

    return idx_to_word[predicted_idx]




In [23]:
# Example Prediction
while True:
    # Taking input from the user
    user_input = input(f"Enter a {sequence_length}-word sequence (or type 'exit' to quit): ")

    # Exit condition
    if user_input.lower() == 'exit':
        print("Exiting the program. Goodbye!")
        break

    # Predict the next word
    print(predict_next_word(user_input))


Enter a 2-word sequence (or type 'exit' to quit): My name
karan
Enter a 2-word sequence (or type 'exit' to quit): hello world
hello
Enter a 2-word sequence (or type 'exit' to quit): exit
Exiting the program. Goodbye!
