In [1]:
# Import os and disable tensorflow warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYCARET_CUSTOM_LOGGING_LEVEL'] = 'CRITICAL'

In [2]:
# Import libraries
import pandas as pd
import tensorflow as tf

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Concatenate, Flatten, BatchNormalization, Dropout

In [3]:
# Load reviews dataset
reviews_path = 'combined-dataset/final_reviews_data.csv'
data = pd.read_csv(reviews_path)

In [4]:
# Encode types column and Tokenize reviews column
encoder = LabelEncoder()
data['types_encoded'] = encoder.fit_transform(data['types'])

tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['review'])
sequences = tokenizer.texts_to_sequences(data['review'])

# Pad sequences
max_sequence_length = max(map(len, sequences), default=0)
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

In [5]:
# Create features set and normalize sentiment for label
X = {
    'review': padded_sequences,
    'types': data['types_encoded'].values
}

Y = data['sentiment'].values

In [6]:
## Define Models Layer
# Input
review_input = Input(shape=(max_sequence_length,), name='review')
types_input = Input(shape=(1,), name='types')

# Embedding and LSTM for review
review_embedding = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128)(review_input)
review_lstm = LSTM(128)(review_embedding)

# Embedding and Flatten for types
types_embedding = Embedding(input_dim=len(encoder.classes_), output_dim=128)(types_input)
types_flat = Flatten()(types_embedding)

# Concatenate review and types
concatenated = Concatenate()([review_lstm, types_flat])

# Dense layers
dense_1 = Dense(128, activation='relu')(concatenated)
batch_1 = BatchNormalization()(dense_1)
dropout_1 = Dropout(0.2)(batch_1)

dense_2 = Dense(64, activation='relu')(dropout_1)
batch_2 = BatchNormalization()(dense_2)
dropout_2 = Dropout(0.2)(batch_2)

dense_3 = Dense(32, activation='relu')(dropout_2)
batch_3 = BatchNormalization()(dense_3)
dropout_3 = Dropout(0.2)(batch_3)

output = Dense(1, activation='relu')(dropout_3)

In [7]:
# Create and Compile the Model
model = Model(inputs=[review_input, types_input], outputs=output)
model.summary()
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 review (InputLayer)         [(None, 807)]                0         []                            
                                                                                                  
 types (InputLayer)          [(None, 1)]                  0         []                            
                                                                                                  
 embedding (Embedding)       (None, 807, 128)             4762752   ['review[0][0]']              
                                                                                                  
 embedding_1 (Embedding)     (None, 1, 128)               123648    ['types[0][0]']               
                                                                                              

In [8]:
model.fit([X['review'], X['types']], Y, epochs=25, batch_size=32, validation_split=0.2)

Epoch 1/25


I0000 00:00:1718584890.734298   33388 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.src.callbacks.History at 0x7f8b5a550c50>

In [9]:
loss, mae = model.evaluate([X['review'], X['types']], Y)
print(f'Model has a loss of {loss} and a mean absolute error of {mae}')

Model has a loss of 0.021696556359529495 and a mean absolute error of 0.095406174659729


In [10]:
model.save('saved-model')

INFO:tensorflow:Assets written to: saved-model/assets


INFO:tensorflow:Assets written to: saved-model/assets


In [11]:
model.save('model.keras')