In [2]:
import pandas as pd

In [3]:
data = pd.read_csv('downloads/tripadvisor_hotel_reviews.csv')
data

Unnamed: 0,Review,Rating,Label
0,nice hotel expensive parking got good deal sta...,4,Positive
1,ok nothing special charge diamond member hilto...,2,Negative
2,nice rooms not 4* experience hotel monaco seat...,3,Negative
3,"unique, great stay, wonderful time hotel monac...",5,Positive
4,"great stay great stay, went seahawk game aweso...",5,Positive
...,...,...,...
20486,"best kept secret 3rd time staying charm, not 5...",5,Positive
20487,great location price view hotel great quick pl...,4,Positive
20488,"ok just looks nice modern outside, desk staff ...",2,Negative
20489,hotel theft ruined vacation hotel opened sept ...,1,Negative


In [4]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [5]:
token = Tokenizer(num_words=5000)

In [6]:
token.fit_on_texts(data['Review'])
sequences = token.texts_to_sequences(data['Review'])

In [7]:
max_length = max([len(seq) for seq in sequences])
X = pad_sequences(sequences,max_length)

In [8]:
from sklearn.preprocessing import LabelEncoder

In [9]:
l = LabelEncoder()

In [10]:
data.iloc[:,2] = l.fit_transform(data.iloc[:,2])

In [11]:
data

Unnamed: 0,Review,Rating,Label
0,nice hotel expensive parking got good deal sta...,4,1
1,ok nothing special charge diamond member hilto...,2,0
2,nice rooms not 4* experience hotel monaco seat...,3,0
3,"unique, great stay, wonderful time hotel monac...",5,1
4,"great stay great stay, went seahawk game aweso...",5,1
...,...,...,...
20486,"best kept secret 3rd time staying charm, not 5...",5,1
20487,great location price view hotel great quick pl...,4,1
20488,"ok just looks nice modern outside, desk staff ...",2,0
20489,hotel theft ruined vacation hotel opened sept ...,1,0


In [12]:
from tensorflow.keras.utils import to_categorical

In [13]:
y = to_categorical(data.iloc[:,2],num_classes=2)

In [14]:
from sklearn.model_selection import train_test_split

In [15]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state = 100)

In [16]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,SimpleRNN,Dense

In [17]:
model = Sequential()

In [19]:
model.add(Embedding(5000,32,input_length=max_length))
model.add(SimpleRNN(100))
model.add(Dense(units=50,activation='relu'))
model.add(Dense(units=2,activation='softmax'))

In [20]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [21]:
model.fit(X_train,y_train,epochs=5,validation_split=0.2)

Epoch 1/5




[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 53ms/step - accuracy: 0.7289 - loss: 0.5832 - val_accuracy: 0.7255 - val_loss: 0.5878
Epoch 2/5
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 53ms/step - accuracy: 0.7361 - loss: 0.5800 - val_accuracy: 0.7255 - val_loss: 0.5877
Epoch 3/5
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 54ms/step - accuracy: 0.7382 - loss: 0.5761 - val_accuracy: 0.7255 - val_loss: 0.5877
Epoch 4/5
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 55ms/step - accuracy: 0.7307 - loss: 0.5848 - val_accuracy: 0.7255 - val_loss: 0.5893
Epoch 5/5
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 54ms/step - accuracy: 0.7410 - loss: 0.5735 - val_accuracy: 0.7255 - val_loss: 0.5877


<keras.src.callbacks.history.History at 0x30f458e10>

In [22]:
loss,accuracy = model.evaluate(X_test,y_test)

[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 42ms/step - accuracy: 0.7388 - loss: 0.5745


In [23]:
import numpy as np

In [24]:
sentence = ["the hotel was so good. My experience was really good. Staff was very good"]
seq = token.texts_to_sequences(sentence)
padded_seq = pad_sequences(seq, maxlen=max_length)
prediction = model.predict(padded_seq)
# print(f"Prediction: {prediction}")
if np.argmax(prediction) == 0:
    print("The review is Negative")
else:
    print("The review is Positive")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
The review is Positive
