In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Hyperparameters
vocab_size = 10000  # Number of unique words to keep
max_length = 100    # Maximum length of sequences
embedding_dim = 128 # Embedding layer output size

# Load the restaurant.tsv dataset
# Assuming the file is in the same directory
dataset = pd.read_csv('/content/Restaurant_Reviews (1).tsv', delimiter='\t')

# Extract reviews and labels
reviews = dataset['Review'].values
labels = dataset['Liked'].values

# Split data into training and testing sets (80-20 split)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(reviews, labels, test_size=0.2, random_state=42)

# Tokenize and pad sequences
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_test_sequences = tokenizer.texts_to_sequences(X_test)

X_train_padded = pad_sequences(X_train_sequences, maxlen=max_length, padding='post')
X_test_padded = pad_sequences(X_test_sequences, maxlen=max_length, padding='post')

# Build the LSTM Model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length),
    LSTM(64, dropout=0.2, recurrent_dropout=0.2),  # LSTM layer with dropout
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    X_train_padded, y_train,
    epochs=5,
    batch_size=64,
    validation_split=0.2
)

# Evaluate the model
loss, accuracy = model.evaluate(X_test_padded, y_test)
print(f"Test Accuracy: {accuracy}")




Epoch 1/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 161ms/step - accuracy: 0.5071 - loss: 0.6961 - val_accuracy: 0.4563 - val_loss: 0.7000
Epoch 2/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 122ms/step - accuracy: 0.5197 - loss: 0.6935 - val_accuracy: 0.4563 - val_loss: 0.7010
Epoch 3/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 120ms/step - accuracy: 0.5296 - loss: 0.6905 - val_accuracy: 0.4563 - val_loss: 0.6970
Epoch 4/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 120ms/step - accuracy: 0.5118 - loss: 0.6934 - val_accuracy: 0.4563 - val_loss: 0.6942
Epoch 5/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 123ms/step - accuracy: 0.4926 - loss: 0.6932 - val_accuracy: 0.4563 - val_loss: 0.6965
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.4725 - loss: 0.6955
Test Accuracy: 0.47999998927116394
