In [1]:
# Prepare text data by tokenizing, padding, and encoding it into a numerical format suitable for LSTM models.

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Example text data
texts = ["I love this movie", "I hate this product", "Amazing experience", "Worst ever", "Will recommend"]

# Tokenize and convert to sequences
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences to ensure equal length
padded_sequences = pad_sequences(sequences, maxlen=5, padding='post')

print("Tokenized and Padded Sequences:\n", padded_sequences)

Tokenized and Padded Sequences:
 [[ 1  3  2  4  0]
 [ 1  5  2  6  0]
 [ 7  8  0  0  0]
 [ 9 10  0  0  0]
 [11 12  0  0  0]]


In [2]:
# Define an LSTM-based neural network architecture for text classification.

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Set parameters
vocab_size = 10000  # Vocabulary size for tokenization
embedding_dim = 16  # Size of the word embeddings
max_length = 5      # Maximum length of input sequences

# Build LSTM model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length),
    LSTM(64),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 5, 16)             160000    
                                                                 
 lstm (LSTM)                 (None, 64)                20736     
                                                                 
 dense (Dense)               (None, 32)                2080      
                                                                 
 dense_1 (Dense)             (None, 1)                 33        
                                                                 
Total params: 182,849
Trainable params: 182,849
Non-trainable params: 0
_________________________________________________________________


In [None]:
# Train the LSTM model on text data for sentiment classification.

import numpy as np

# Example binary labels for the text data (1 for positive, 0 for negative sentiment)
labels = np.array([1, 0, 1, 0, 1])

# Train the model
history = model.fit(padded_sequences, labels, epochs=5, verbose=1)

In [None]:
# Evaluate the LSTM model’s performance on unseen data and check its accuracy.

# Example test data
test_texts = ["This was amazing", "Not worth it", "Absolutely fantastic", "Terrible experience"]
test_sequences = tokenizer.texts_to_sequences(test_texts)
test_padded = pad_sequences(test_sequences, maxlen=5, padding='post')

# Generate predictions
predictions = model.predict(test_padded)
predicted_labels = (predictions > 0.5).astype(int)

print("Predicted Labels:", predicted_labels.flatten())

In [None]:
# Plot the training loss and accuracy to analyze the model’s learning over epochs.

import matplotlib.pyplot as plt

# Plot training loss
plt.plot(history.history['loss'], label='Loss')
plt.plot(history.history['accuracy'], label='Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Value')
plt.legend()
plt.title('Training Loss and Accuracy')
plt.show()