In [1]:
import numpy as np

import tensorflow_datasets as tfds
import tensorflow as tf
import matplotlib.pyplot as plt
import re

NotFoundError: dlopen(/Users/karine/Library/Python/3.9/lib/python/site-packages/tensorflow-plugins/libmetal_plugin.dylib, 0x0006): Symbol not found: __ZN3tsl8internal10LogMessageC1EPKcii
  Referenced from: <D2EF42E3-3A7F-39DD-9982-FB6BCDC2853C> /Users/karine/Library/Python/3.9/lib/python/site-packages/tensorflow-plugins/libmetal_plugin.dylib
  Expected in:     <68565E95-D1E8-3F51-B42B-0815E25494F5> /Users/karine/Library/Python/3.9/lib/python/site-packages/tensorflow/python/_pywrap_tensorflow_internal.so

In [None]:
# Load Yelp polarity dataset and separate the dataset into training and test sets

dataset, info = tfds.load('yelp_polarity_reviews', with_info=True, as_supervised=True)
train_data, test_data = dataset['train'], dataset['test']

train_data.element_spec

In [None]:
info

In [None]:
# Print a sample review and its corresponding label from the training set

for example, label in train_data.take(1):
  print('text: \n', example.numpy(),'\n' )
  print('label: ', label.numpy())

In [None]:
# Prepare the training and test datasets: shuffle, batch, and prefetch for performance optimization

BUFFER_SIZE = 10000
BATCH_SIZE = 128
NUM_TRAIN = 100000
NUM_TEST = 20000

train_dataset = train_data.take(NUM_TRAIN).shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_data.take(NUM_TEST).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [None]:
for example, label in train_dataset.take(1):
  print('texts: ', example.numpy()[:3])
  print()
  print('labels: ', label.numpy()[:3])

In [None]:
# Define a TextVectorization layer to convert text into sequences of integers

VOCAB_SIZE = 1000
encoder = tf.keras.layers.TextVectorization(
    max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))

In [None]:
vocab = np.array(encoder.get_vocabulary())
vocab[:20]

In [None]:
# Show how text is tokenized and mapped to word identifiers

for example, label in train_dataset.take(1):
  print("Original: ", example[0].numpy())
  print('Label:',label[0].numpy())
  encoded_example = encoder(example[0].numpy())
  print("Round-trip: ", " ".join (vocab[encoded_example]))

In [None]:
# Define and compile the model

model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation = 'sigmoid')
])

model.compile(loss = tf.keras.losses.BinaryCrossentropy(),
              optimizer = tf.keras.optimizers.Adam(1e-3),
              metrics = ['accuracy']
)

model.summary()

In [None]:
# Train the model for up to 20 epochs with early stopping to prevent overfitting

early_callbacks = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 5, restore_best_weights = True)

history = model.fit(train_dataset,
          epochs = 20,
          validation_data = test_dataset,
          callbacks = [early_callbacks]
)

In [None]:
# Evaluate model performance

val_loss, val_acc = model.evaluate(test_dataset, verbose=0)
print("    Valid Accuracy: {:.2f}".format(val_acc*100))
print("Valid Loss: {:.5f}".format(val_loss ))

In [None]:
# Save the trained model in TensorFlow format

model.save('LSTMmodel.tf')

In [None]:
# Visualize model learning progress

fig, (accuracy, loss) = plt.subplots(nrows=1, ncols=2)
fig.set_size_inches(12, 4)
accuracy.title.set_text('Classification Accuracy')
accuracy.plot(history.history['accuracy'], color='pink', label='train')
accuracy.plot(history.history['val_accuracy'], color='green', label='validation')
accuracy.legend(['train', 'validation'])
loss.title.set_text('Loss')
loss.plot(history.history['loss'], color='pink', label='train')
loss.plot(history.history['val_loss'], color='green', label='validation')
loss.legend(['train', 'validation'])
plt.show()

In [None]:
# Predict sentiments for a batch of test reviews and compare model predictions with actual labels

class_names = ["Поганий відгук", "Хороший відгук"]

text, label = next(iter(test_dataset.take(1)))
text_array = np.array(text)
pred = model.predict(text)
pred_labels = np.round(pred).astype(int)
for j in range(6):
    print(f"Емоційне забарвлення: {class_names[label[j]]}")
    print(f"Передбачене емоційне забарвлення:{class_names[pred_labels[j][0]]}")
    print(f"Відгук:{text[j].numpy()}")
    print(f"\n")

In [None]:
# Test the model on custom reviews

string_tensor = tf.constant(["I was very disappointed with my visit.",
    "The service here was terrible - the waiter was rude and inattentive. The food was mediocre at best, and definitely not worth the price.",
    "This place is totally lit!!!The food was off the chain and the vibes were on point. Staff was super chill too. Definitely my new fave spot!",
    "I had high hopes for this restaurant, but it fell short in every way. I won't be coming back!",
    "SOOOO PRETTY PLACE...",
    "Such a dope place! The eats were killer and the whole atmosphere was super cool."
    ])

text_array = np.array(string_tensor)
prediction = model.predict(text_array)
pred_labels = np.round(pred).astype(int)
for j in range(len(string_tensor)):
    print(f"Передбачене емоційне забарвлення:{class_names[pred_labels[j][0]]}")
    print(f"Відгук:{string_tensor[j].numpy()}")
    print(f"\n")