In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv("sentiment_product_reviews.csv")
df.head(2)

Unnamed: 0,comment,label
0,"Moderate performance, works as intended.",1
1,"The product is just okay, nothing special.",1


In [2]:
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')
stopwords = set(stopwords.words('english'))
stopwords.discard("no")
stopwords.discard("not")
stopwords.discard("never")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [3]:
import re
from nltk import ngrams
from nltk.stem import WordNetLemmatizer
lm = WordNetLemmatizer()
def tokenization_review(text): 
    text = re.sub(r'[^a-zA-Z0-9\s.,!?]', ' ', str(text)) 
    text = text.lower()  
    words = text.split()
    words = [lm.lemmatize(word) for word in words if word not in stopwords]
    return ' '.join(words)  
df['comment_cleaned']= df['comment'].apply(tokenization_review)

In [4]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
tokenizer = Tokenizer(char_level=False)
tokenizer.fit_on_texts(df['comment_cleaned'])
df["seq_comment"] = tokenizer.texts_to_sequences(df.comment_cleaned)
df.head(2)

Unnamed: 0,comment,label,comment_cleaned,seq_comment
0,"Moderate performance, works as intended.",1,"moderate performance, work intended.","[71, 54, 14, 120]"
1,"The product is just okay, nothing special.",1,"product okay, nothing special.","[2, 46, 29, 67]"


In [8]:
vocab_length=np.max(df.seq_comment.max())+2
max_seq_length = df['seq_comment'].map(len).max()
vocab_size = 150 
embedding_dim = 25  
max_length = 50  
trunc_type = 'post' 
padding_type = 'post'  
oov_tok = "<OOV>" 

In [9]:
X=df['seq_comment']
y=df['label']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [10]:
training_padded = pad_sequences(X_train, maxlen=max_seq_length, padding=padding_type, truncating=trunc_type)
testing_padded = pad_sequences(X_test, maxlen=max_seq_length, padding=padding_type, truncating=trunc_type)
import numpy as np
training_padded = np.array(training_padded)
y_train = np.array(y_train)
testing_padded = np.array(testing_padded)
y_test = np.array( y_test)

In [17]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
ann=tf.keras.models.Sequential()
ann.add(tf.keras.layers.Embedding(vocab_size, embedding_dim))
ann.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=False)))
ann.add(tf.keras.layers.Dense(units=128, activation='relu'))
ann.add(tf.keras.layers.Dense(units=64, activation='relu'))
ann.add(tf.keras.layers.Dense(units=3, activation='softmax'))  
ann.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
ann.fit(training_padded,y_train, batch_size=50,epochs=100, validation_data=(testing_padded,y_test))

Epoch 1/100
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 11ms/step - accuracy: 0.8372 - loss: 0.3448 - val_accuracy: 1.0000 - val_loss: 2.9980e-05
Epoch 2/100
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 1.0000 - loss: 2.4663e-05 - val_accuracy: 1.0000 - val_loss: 7.1422e-06
Epoch 3/100
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 1.0000 - loss: 6.6008e-06 - val_accuracy: 1.0000 - val_loss: 2.9599e-06
Epoch 4/100
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 1.0000 - loss: 2.6961e-06 - val_accuracy: 1.0000 - val_loss: 1.5824e-06
Epoch 5/100
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 1.0000 - loss: 1.6045e-06 - val_accuracy: 1.0000 - val_loss: 9.6320e-07
Epoch 6/100
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 1.0000 - loss: 9.1953e-07 - val_accuracy: 1.0000

<keras.src.callbacks.history.History at 0x1bd90589a10>

In [19]:
val_loss, val_accuracy = ann.evaluate(testing_padded, y_test)
print("\nBest Model Validation Accuracy:", val_accuracy)
print("\nBest Model Validation Loss:", val_loss)  

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0000e+00

Best Model Validation Accuracy: 1.0

Best Model Validation Loss: 0.0


In [77]:
test= ['I like the product, it is excellent, but it has some issue, it may creat problem sometime']

In [78]:
import re
from nltk.stem import WordNetLemmatizer
lm = WordNetLemmatizer()
def tokenization_review(text):  # Expecting text as a string, not a row
    text = re.sub(r'[^a-zA-Z0-9\s.,!?]', ' ', str(text))  # Keep alphanumeric and punctuation
    text = text.lower()  # Convert to lowercase
    words = text.split()
    words = [lm.lemmatize(word) for word in words if word not in stopwords]
    return ' '.join(words) 
test_input=tokenization_review(test)

In [84]:
test_input

'like product, excellent, issue, may creat problem sometime'

In [79]:
tokenizer.fit_on_texts([test_input])

In [80]:
text_seq=tokenizer.texts_to_sequences([test_input])
text_seq

[[25, 2, 115, 139, 140, 141, 142, 143]]

In [81]:
#test_padded = pad_sequences(text_seq, padding='post') 
test_padded = np.array(text_seq)
test_padded

array([[ 25,   2, 115, 139, 140, 141, 142, 143]])

In [82]:
predicted_labels = ann.predict(test_padded)
predicted_labels

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step


array([[1.1912066e-06, 9.9998486e-01, 1.3997794e-05]], dtype=float32)

In [83]:
predicted_labels = np.argmax(predicted_labels, axis=1)  # Get class index
print(predicted_labels)

[1]
