In [None]:
#Sequence modeling to predict next most likely term using RNN and Transformer

#RNN
!pip install tensorflow

import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense


In [None]:

# Sample data
sentences = [
    'how to use tensorflow',
    'what is machine learning',
    'deep learning with neural networks'
]

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)
total_words = len(tokenizer.word_index) + 1

# Create sequences
sequences = []
for sentence in sentences:
    encoded = tokenizer.texts_to_sequences([sentence])[0]
    for i in range(1, len(encoded)):
        sequences.append(encoded[:i+1])

# Pad sequences
max_len = max(len(seq) for seq in sequences)
sequences = pad_sequences(sequences, maxlen=max_len, padding='pre')

# Create input and output
X, y = sequences[:, :-1], sequences[:, -1]
y = tf.keras.utils.to_categorical(y, num_classes=total_words)

# Model
model = Sequential([
    Embedding(input_dim=total_words, output_dim=10, input_length=X.shape[1]),
    SimpleRNN(50, return_sequences=False),
    Dense(total_words, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Train
model.fit(X, y, epochs=10, verbose=1)

# Predict next term
def predict_next_term(model, tokenizer, text):
    encoded = tokenizer.texts_to_sequences([text])[0]
    encoded = pad_sequences([encoded], maxlen=X.shape[1], padding='pre')
    prediction = model.predict(encoded)
    predicted_word_index = np.argmax(prediction, axis=-1)[0]
    return tokenizer.index_word.get(predicted_word_index, '')

print(predict_next_term(model, tokenizer, 'how to use'))
