In [3]:
# Import necessary libraries

import numpy as np
import pandas as pd

# Keras tools for NLP and deep learning
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [2]:
!pip install numpy pandas

Collecting pandas
  Downloading pandas-2.3.3-cp310-cp310-win_amd64.whl.metadata (19 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.3-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.3.3-cp310-cp310-win_amd64.whl (11.3 MB)
   ---------------------------------------- 0.0/11.3 MB ? eta -:--:--
    --------------------------------------- 0.3/11.3 MB ? eta -:--:--
   ------- -------------------------------- 2.1/11.3 MB 9.8 MB/s eta 0:00:01
   ---------- ----------------------------- 2.9/11.3 MB 8.4 MB/s eta 0:00:02
   ----------- ---------------------------- 3.1/11.3 MB 5.8 MB/s eta 0:00:02
   -------------- ------------------------- 4.2/11.3 MB 5.0 MB/s eta 0:00:02
   ----------------- ---------------------- 5.0/11.3 MB 4.6 MB/s eta 0:00:02
   -------------------- ------------------- 5.8/11.3 MB 4.5 MB/s eta 0:00:02
   --------------------- ---------------

In [4]:
# Simple dataset

texts = [
    "I love this movie",
    "This movie is good",
    "I hate this movie",
    "This movie is bad"
]

labels = [1,1,0,0]  # 1 = Positive, 0 = Negative


In [5]:
# Create tokenizer object

tokenizer = Tokenizer()

# Learn vocabulary from texts

tokenizer.fit_on_texts(texts)

# Convert sentences into numbers

sequences = tokenizer.texts_to_sequences(texts)

print(sequences)


[[3, 5, 1, 2], [1, 2, 4, 6], [3, 7, 1, 2], [1, 2, 4, 8]]


In [6]:
# Make all sequences same length

X = pad_sequences(sequences, maxlen=5)

print(X)


[[0 3 5 1 2]
 [0 1 2 4 6]
 [0 3 7 1 2]
 [0 1 2 4 8]]


In [7]:
# Convert labels to array

y = np.array(labels)


In [8]:
# Create model

model = Sequential()

# Embedding Layer
model.add(
Embedding(
input_dim=100,
output_dim=8,
input_length=5
)
)




In [9]:
# Add LSTM layer

model.add(LSTM(8))


In [10]:
# Output layer

model.add(Dense(1, activation='sigmoid'))


In [11]:
# Prepare model for training

model.compile(
loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy']
)


In [12]:
# Train model

model.fit(X,y,epochs=20)


Epoch 1/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.2500 - loss: 0.6931
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step - accuracy: 0.2500 - loss: 0.6929
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.7500 - loss: 0.6927
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.7500 - loss: 0.6925
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 0.7500 - loss: 0.6923
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.7500 - loss: 0.6921
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.7500 - loss: 0.6919
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.7500 - loss: 0.6917
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

<keras.src.callbacks.history.History at 0x21c0d8e9e10>

In [13]:
model.summary()

In [18]:
# Test sentence

test = ["This movie is worst"]

# Convert to numbers

seq = tokenizer.texts_to_sequences(test)

# Padding

pad = pad_sequences(seq,maxlen=5)

# Prediction

pred = model.predict(pad)

print(pred)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[[0.49808195]]


In [19]:
if pred > 0.5:
    print("Positive")
else:
    print("Negative")


Negative
