In [304]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
import pandas as pd

data = pd.read_csv("./yelp.csv")

features = data.text # 10000
labels = data.stars - 1 

tokenizer = Tokenizer()
tokenizer.fit_on_texts(features)

sequences = tokenizer.texts_to_sequences(features)
word_index = tokenizer.word_index
max_sequence_length = 100  # Maximum length of sequences
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

train_reviews = padded_sequences[:9000]
train_labels = to_categorical(labels[:9000], num_classes=5)
test_reviews = padded_sequences[9000:]
test_labels = to_categorical(labels[9000:], num_classes=5) 


In [305]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(len(word_index) + 1, 100, input_length=max_sequence_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True)),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128)),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(5, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(train_reviews, train_labels, epochs=30, batch_size=32, 
                    validation_data=(test_reviews, test_labels))

Epoch 1/30

KeyboardInterrupt: 

In [None]:
# Evaluate the model on the test data
loss, accuracy = model.evaluate(test_reviews, test_labels)

print("Test loss:", loss)
print("Test accuracy:", accuracy)


Test loss: 1.1036068201065063
Test accuracy: 0.5059999823570251


In [None]:
# testing

import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

review = "so little tables but the food is good"

# Preprocess the review
review_sequence = tokenizer.texts_to_sequences([review])
review_padded = pad_sequences(review_sequence, maxlen=max_sequence_length)

# Make predictions
predictions = model.predict(review_padded)
predicted_label = np.argmax(predictions)

# Map predicted label to original rating
predicted_rating = predicted_label + 1

print("Predicted rating:", predicted_rating)

Predicted rating: 4
