<a href="https://colab.research.google.com/github/suryatejaganji/NLP-2303A51L19-27/blob/main/AS-6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Importing necessary libraries
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, LSTM, Dense, Embedding
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence

# Load and preprocess the dataset
max_features = 10000  # Number of words to consider as features
maxlen = 100  # Cut texts after this number of words (among top max_features most common words)

# (a) Preprocessing of the Data
print("Loading data...")
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

print(f"Pad sequences (samples x time)")
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)

# (b) Divide data into training and testing data set
print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")

# (c) Build the GRU Model
def build_gru_model():
    model = Sequential()
    model.add(Embedding(max_features, 128))
    model.add(GRU(128, return_sequences=False))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Build the LSTM Model for comparison
def build_lstm_model():
    model = Sequential()
    model.add(Embedding(max_features, 128))
    model.add(LSTM(128, return_sequences=False))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# (d) Training the GRU Model
gru_model = build_gru_model()
print("Training GRU model...")
gru_model.fit(X_train, y_train, batch_size=32, epochs=5, validation_split=0.2)

# Training the LSTM Model
lstm_model = build_lstm_model()
print("Training LSTM model...")
lstm_model.fit(X_train, y_train, batch_size=32, epochs=5, validation_split=0.2)

# (e) Evaluate Models
gru_score, gru_acc = gru_model.evaluate(X_test, y_test)
lstm_score, lstm_acc = lstm_model.evaluate(X_test, y_test)

print(f"GRU Model Accuracy: {gru_acc * 100:.2f}%")
print(f"LSTM Model Accuracy: {lstm_acc * 100:.2f}%")

# (f) Text Generation (Simplified Example)

def generate_text(model, start_text, word_index, index_word, max_len=100):
    for _ in range(max_len):
        encoded_input = np.array([word_index.get(word, 0) for word in start_text])
        encoded_input = sequence.pad_sequences([encoded_input], maxlen=maxlen)
        prediction = model.predict(encoded_input)
        next_word = index_word.get(np.argmax(prediction), '?')
        start_text += ' ' + next_word
        if next_word == 'end':
            break
    return start_text

word_index = imdb.get_word_index()
index_word = {index: word for word, index in word_index.items()}

start_text = "the movie was"
generated_text_gru = generate_text(gru_model, start_text, word_index, index_word)
generated_text_lstm = generate_text(lstm_model, start_text, word_index, index_word)

print("Text Generated by GRU Model:", generated_text_gru)
print("Text Generated by LSTM Model:", generated_text_lstm)

# Evaluate both models on the test data
lstm_score, lstm_acc = lstm_model.evaluate(X_test, y_test)
gru_score, gru_acc = gru_model.evaluate(X_test, y_test)

# Print the accuracy results
print(f"LSTM Model Accuracy: {lstm_acc * 100:.2f}%")
print(f"GRU Model Accuracy: {gru_acc * 100:.2f}%")

Loading data...
Pad sequences (samples x time)
Training data shape: (25000, 100)
Testing data shape: (25000, 100)
Training GRU model...
Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 136ms/step - accuracy: 0.7037 - loss: 0.5426 - val_accuracy: 0.8510 - val_loss: 0.3481
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 135ms/step - accuracy: 0.8988 - loss: 0.2544 - val_accuracy: 0.8564 - val_loss: 0.3382
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 134ms/step - accuracy: 0.9456 - loss: 0.1497 - val_accuracy: 0.8472 - val_loss: 0.3997
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 137ms/step - accuracy: 0.9736 - loss: 0.0748 - val_accuracy: 0.8448 - val_loss: 0.5091
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 136ms/step - accuracy: 0.9865 - loss: 0.0412 - val_accuracy: 0.8400 - val_loss: 0.6333
Training LSTM model...
Epoch 1/5
[1m625/6