In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense, Attention, Bidirectional, Input
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
data = pd.read_csv("semeval2014.csv")

# Encode polarity labels (positive=2, neutral=1, negative=0)
label_encoder = LabelEncoder()
data['polarity'] = label_encoder.fit_transform(data['polarity'])

# Tokenize text and aspects
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(data['Sentence'])
review_sequences = tokenizer.texts_to_sequences(data['Sentence'])
aspect_sequences = tokenizer.texts_to_sequences(data['Aspect Term'])

# Pad sequences
max_seq_len = 100  # Maximum length for reviews
review_padded = pad_sequences(review_sequences, maxlen=max_seq_len, padding='post', truncating='post')
aspect_padded = pad_sequences(aspect_sequences, maxlen=max_seq_len, padding='post', truncating='post')
labels = tf.keras.utils.to_categorical(data['polarity'], num_classes=4)

train_reviews, test_reviews, train_aspects, test_aspects, train_labels, test_labels = train_test_split(
    review_padded, aspect_padded, labels, test_size=0.2, random_state=42
)


In [5]:
from HubNLP import build_attention_model

# Hyperparameters
vocab_size = 10000  # You can use the tokenizer's word index size
embedding_dim = 128
max_seq_len = 100
lstm_units = 64

model = build_attention_model(vocab_size, embedding_dim, max_seq_len, lstm_units)

# Compile the model
model.compile(optimizer="Adam", loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    [train_reviews, train_aspects], 
    train_labels, 
    epochs=5, 
    batch_size=32, 
    validation_data=([test_reviews, test_aspects], test_labels)
)

# Evaluate the model
test_loss, test_accuracy = model.evaluate([test_reviews, test_aspects], test_labels)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')




Epoch 1/5




[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 81ms/step - accuracy: 0.3839 - loss: 1.1994 - val_accuracy: 0.4025 - val_loss: 1.1359
Epoch 2/5
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 71ms/step - accuracy: 0.4593 - loss: 1.0837 - val_accuracy: 0.6250 - val_loss: 0.9391
Epoch 3/5
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 72ms/step - accuracy: 0.7121 - loss: 0.7471 - val_accuracy: 0.6737 - val_loss: 0.8299
Epoch 4/5
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 72ms/step - accuracy: 0.8282 - loss: 0.5091 - val_accuracy: 0.7055 - val_loss: 0.8104
Epoch 5/5
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 73ms/step - accuracy: 0.8650 - loss: 0.3910 - val_accuracy: 0.7076 - val_loss: 0.8803
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.7413 - loss: 0.7996
Test Loss: 0.8803200125694275
Test Accuracy: 0.7076271176338196
