In [1]:
# train_lstm_model.py
# -----------------------------------------------------------
# LSTM model training for IMDB Movie Review Sentiment Analysis
# -----------------------------------------------------------

import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

# -----------------------------------------------------------
# Load and preprocess data
# -----------------------------------------------------------
max_features = 10000   # Vocabulary size
max_len = 500          # Max review length

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

print(f"Training data shape: {X_train.shape}, Training labels shape: {y_train.shape}")
print(f"Testing data shape: {X_test.shape}, Testing labels shape: {y_test.shape}")

# Pad sequences to same length
X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)

# -----------------------------------------------------------
# Build LSTM model
# -----------------------------------------------------------
model = Sequential()
model.add(Embedding(max_features, 128))               # Embedding layer
model.add(LSTM(128, activation='tanh'))               # LSTM hidden layer
model.add(Dense(1, activation='sigmoid'))             # Output layer

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping (to avoid overfitting)
earlystopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# -----------------------------------------------------------
# Train model
# -----------------------------------------------------------
history = model.fit(
    X_train, y_train,
    epochs=5,
    batch_size=32,
    validation_split=0.2,
    callbacks=[earlystopping]
)

# -----------------------------------------------------------
# Save model
# -----------------------------------------------------------
model.save("lstm_imdb.h5")
print("✅ Model saved as lstm_imdb.h5")

# -----------------------------------------------------------
# Model Summary
# -----------------------------------------------------------
model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training data shape: (25000,), Training labels shape: (25000,)
Testing data shape: (25000,), Testing labels shape: (25000,)
Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 22ms/step - accuracy: 0.6978 - loss: 0.5740 - val_accuracy: 0.8012 - val_loss: 0.4546
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 21ms/step - accuracy: 0.7804 - loss: 0.4742 - val_accuracy: 0.8304 - val_loss: 0.3919
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 23ms/step - accuracy: 0.8791 - loss: 0.2962 - val_accuracy: 0.8550 - val_loss: 0.3469
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 21ms/step - accuracy: 0.9230 - loss: 0.2046 - val_accuracy: 0.8772 - val_loss: 0.3069
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━



✅ Model saved as lstm_imdb.h5


In [2]:
!pip install streamlit pyngrok tensorflow


Collecting streamlit
  Downloading streamlit-1.52.1-py3-none-any.whl.metadata (9.8 kB)
Collecting pyngrok
  Downloading pyngrok-7.5.0-py3-none-any.whl.metadata (8.1 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.52.1-py3-none-any.whl (9.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m99.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.5.0-py3-none-any.whl (24 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m109.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyngrok, pydeck, streamlit
Successfully installed pydeck-0.9.1 pyngrok-7.5.0 streamlit-1.52.1


In [3]:
!pip install streamlit
!pip install pyngrok




In [4]:
%%writefile main.py
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import load_model
import streamlit as st

# Load word index
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}

# Load your pre-trained model
model = load_model("lstm_imdb.h5")

# Helper functions
def preprocess_text(text):
    words = text.lower().split()
    encoded_review = [word_index.get(word, 2) + 3 for word in words]
    if len(encoded_review) == 0:
        encoded_review = [2]
    padded_review = sequence.pad_sequences([encoded_review], maxlen=500)
    return padded_review

def predict_sentiment(review):
    preprocessed_input = preprocess_text(review)
    prediction = model.predict(preprocessed_input)
    sentiment = "Positive" if prediction[0][0] > 0.5 else "Negative"
    return sentiment, float(prediction[0][0])

# Streamlit UI
st.title("IMDB Movie Review Sentiment Analysis -- Kumar")
st.write("Enter a movie review to classify it as positive or negative.")

review = st.text_area("Movie Review")

if st.button("Classify"):
    if review.strip() == "":
        st.write("Please enter a movie review.")
    else:
        sentiment, score = predict_sentiment(review)
        st.write(f"Sentiment: {sentiment}")
        st.write(f"Prediction Score: {score:.4f}")


Writing main.py


In [5]:
!streamlit run main.py



Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.125.2.153:8501[0m
[0m
[34m  Stopping...[0m
[34m  Stopping...[0m


In [8]:
import streamlit
import tensorflow
