In [2]:
!pip install gradio
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Embedding, MultiHeadAttention, LayerNormalization, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import gradio as gr


Collecting gradio
  Downloading gradio-5.9.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.5.2 (from gradio)
  Downloading gradio_client-1.5.2-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.8.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

In [3]:
# Example dataset: Replace this with a larger corpus
text = """
The quick brown fox jumps over the lazy dog.
The sky is blue and the grass is green.
She sells seashells by the seashore.
The early bird catches the worm.
A journey of a thousand miles begins with a single step.
"""

# Tokenize text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1

# Create sequences
input_sequences = []
for line in text.split("\n"):
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

# Pad sequences and create labels
max_sequence_len = max([len(seq) for seq in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')
X, y = input_sequences[:, :-1], input_sequences[:, -1]
y = tf.keras.utils.to_categorical(y, num_classes=total_words)


In [4]:
def transformer_block(inputs, head_size, num_heads, ff_dim, dropout=0.1):
    # Multi-Head Attention
    attention = MultiHeadAttention(num_heads=num_heads, key_dim=head_size)(inputs, inputs)
    attention = Dropout(dropout)(attention)
    attention = LayerNormalization(epsilon=1e-6)(inputs + attention)

    # Feed Forward Network
    ff_output = Dense(ff_dim, activation="relu")(attention)
    ff_output = Dense(inputs.shape[-1])(ff_output)
    ff_output = Dropout(dropout)(ff_output)
    return LayerNormalization(epsilon=1e-6)(attention + ff_output)

def build_transformer_model(input_len, vocab_size, embed_dim, num_heads, head_size, ff_dim, num_blocks, dropout=0.1):
    inputs = Input(shape=(input_len,))
    embedding = Embedding(vocab_size, embed_dim)(inputs)

    x = embedding
    for _ in range(num_blocks):
        x = transformer_block(x, head_size, num_heads, ff_dim, dropout)

    x = Dense(128, activation="relu")(x)
    x = Dropout(dropout)(x)
    outputs = Dense(vocab_size, activation="softmax")(x[:, -1])  # Output for the last token only

    return Model(inputs, outputs)

# Model Parameters
model = build_transformer_model(
    input_len=max_sequence_len - 1,  # Sequence length
    vocab_size=total_words,          # Vocabulary size
    embed_dim=64,                    # Embedding dimensions
    num_heads=4,                     # Number of attention heads
    head_size=64,                    # Size of each attention head
    ff_dim=128,                      # Feed-forward network size
    num_blocks=2,                    # Number of transformer blocks
    dropout=0.1                      # Dropout rate
)

model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model.fit(X, y, epochs=20, verbose=2)


Epoch 1/20
2/2 - 10s - 5s/step - accuracy: 0.0000e+00 - loss: 3.5779
Epoch 2/20
2/2 - 0s - 249ms/step - accuracy: 0.1389 - loss: 3.0677
Epoch 3/20
2/2 - 1s - 301ms/step - accuracy: 0.1111 - loss: 2.8853
Epoch 4/20
2/2 - 0s - 118ms/step - accuracy: 0.3333 - loss: 2.5733
Epoch 5/20
2/2 - 0s - 164ms/step - accuracy: 0.3889 - loss: 2.4645
Epoch 6/20
2/2 - 0s - 134ms/step - accuracy: 0.5833 - loss: 2.1880
Epoch 7/20
2/2 - 0s - 92ms/step - accuracy: 0.5556 - loss: 1.9765
Epoch 8/20
2/2 - 0s - 97ms/step - accuracy: 0.5556 - loss: 1.8532
Epoch 9/20
2/2 - 0s - 127ms/step - accuracy: 0.7222 - loss: 1.6513
Epoch 10/20
2/2 - 0s - 69ms/step - accuracy: 0.6944 - loss: 1.4540
Epoch 11/20
2/2 - 0s - 78ms/step - accuracy: 0.7500 - loss: 1.4336
Epoch 12/20
2/2 - 0s - 57ms/step - accuracy: 0.7778 - loss: 1.2850
Epoch 13/20
2/2 - 0s - 68ms/step - accuracy: 0.7778 - loss: 1.2157
Epoch 14/20
2/2 - 0s - 107ms/step - accuracy: 0.7778 - loss: 1.1876
Epoch 15/20
2/2 - 0s - 78ms/step - accuracy: 0.7778 - loss: 1

<keras.src.callbacks.history.History at 0x7d05f0a7d6f0>

In [5]:
def predict_next_word(input_text):
    token_list = tokenizer.texts_to_sequences([input_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre')
    predicted = model.predict(token_list, verbose=0)
    predicted_word = tokenizer.index_word[np.argmax(predicted)]
    return predicted_word


In [6]:
iface = gr.Interface(
    fn=predict_next_word,
    inputs=gr.Textbox(lines=2, placeholder="Enter a sentence..."),
    outputs=gr.Textbox(label="Next Word Prediction"),
    title="Next Word Prediction with Transformer",
    description="Enter the beginning of a sentence, and the model will predict the next word using a Transformer architecture."
)

iface.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://419c335cf060a9f7df.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


