# Text preprocessing

In [2]:
# !pip install viet_text_tools

# !pip install underthesea

# !pip install gensim

# !pip install emoji

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import pandas as pd
import re
import numpy as np
import tensorflow as tf
from keras.models import Input
from keras.layers import Dense, Dropout, Concatenate
from keras.layers import LSTM, Embedding, Bidirectional, GRU
from keras.layers import SpatialDropout1D, Conv1D, GlobalAveragePooling1D, GlobalMaxPooling1D
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow import keras
import matplotlib.pyplot as plt
from keras.initializers import Constant
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras.losses import BinaryCrossentropy, CategoricalCrossentropy
import pickle
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report
import collections
from sklearn.utils.class_weight import compute_class_weight
from gensim import models

#1. Get data

In [5]:
df_train = pd.read_csv("/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/data/splited_data/processed_train.csv")
df_val = pd.read_csv("/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/data/splited_data/processed_val.csv")
df_test = pd.read_csv("/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/data/splited_data/processed_test.csv")

#2. Get label

## def func

In [6]:
def get_label_stage_1(df):
  # input: dataframe contains label encoded 0, 1, 2, -1
  # output: 
  # 1: a binary array to show whether contains that aspect
  # 2: a three dimension array to show the sentiment of that aspect

  # label 1, detect xem co 
  # detect xem co aspect do trong cau hay khong.
  is_stayingpower = df.stayingpower.replace({-1:0, 2:1, 0:1})
  is_texture = df.texture.replace({-1:0, 2:1, 0:1})
  is_smell = df.smell.replace({-1:0, 2:1, 0:1})
  is_price = df.price.replace({-1:0, 2:1, 0:1})
  is_others = df.others.replace({-1:0})
  is_colour = df.colour.replace({-1:0, 2:1, 0:1})
  is_shipping = df.shipping.replace({-1:0, 2:1, 0:1})
  is_packing = df.packing.replace({-1:0, 2:1, 0:1})

  dict = {
          'is_smell': is_smell, 
          'is_texture': is_texture,
          'is_colour': is_colour,
          'is_price': is_price,
          'is_shipping': is_shipping,
          'is_packing': is_packing,
          'is_stayingpower': is_stayingpower, 
          'is_others': is_others}
  df_aspect = pd.DataFrame(dict)

  # return df_aspect.to_numpy()
  return df_aspect


def get_index_label(label_stage_df, aspect):
  return [indx_ for indx_ in label_stage_df[label_stage_df['is_{}'.format(aspect)]==1].index.values if indx_ != 0]

def get_list_index_label(label_stage_df, lst_aspect):

  lst_indx_label = {}
  for aspect in lst_aspect:
    lst_indx_label[aspect] = get_index_label(label_stage_df, aspect)
  return lst_indx_label

def get_data_stage_2(lst_indx_label, df):

  data = {}
  for aspect, indx in lst_indx_label.items():
    data[aspect] = df.iloc[indx]

  return data

def get_label_stage_2(df_input):

  df_stage_2 = df_input.copy()

  # lay tung dataset cua tung khia canh truyen vao, num_classes = 3
  y_stayingpower = tf.keras.utils.to_categorical(df_stage_2['stayingpower'].stayingpower, num_classes = 3)
  y_texture = tf.keras.utils.to_categorical(df_stage_2['texture'].texture, num_classes = 3)
  y_smell = tf.keras.utils.to_categorical(df_stage_2['smell'].smell, num_classes = 3)
  y_price = tf.keras.utils.to_categorical(df_stage_2['price'].price, num_classes = 3)
  y_others = df_stage_2['others'].others.replace({-1:0}).to_numpy()
  y_colour = tf.keras.utils.to_categorical(df_stage_2['colour'].colour, num_classes = 3)
  y_shipping = tf.keras.utils.to_categorical(df_stage_2['shipping'].shipping, num_classes = 3)
  y_packing = tf.keras.utils.to_categorical(df_stage_2['packing'].packing, num_classes = 3)

  dict2 = {'stayingpower': y_stayingpower, 
          'texture': y_texture,
          'smell': y_smell,
          'price': y_price,
          'others': y_others,
          'colour': y_colour,
          'shipping': y_shipping,
          'packing': y_packing}

  del df_stage_2
  return dict2

def get_cmt(dict_df, lst_aspect):
  dict_cmt = {}
  for aspect in lst_aspect:
    dict_cmt[aspect] = dict_df[aspect]['processed_data'].values
  return dict_cmt

def get_array_label(df):
  label_stage_1_df = get_label_stage_1(df)

  lst_aspect = [col for col in df.columns.values if col!='data' if col!='processed_data']
  lst_indx = get_list_index_label(label_stage_1_df, lst_aspect)

  df_temp = get_data_stage_2(lst_indx, df)
  cmt_df = get_cmt(df_temp, lst_aspect)

  label_stage_2 = get_label_stage_2(df_temp)
  del label_stage_1_df, lst_aspect, df_temp
  return cmt_df, label_stage_2

# label_stage_1 = get_label_stage_1(df_train)
# lst_aspect = [col for col in df_train.columns.values if col!='data' if col!='processed_data']
# df_temp = get_data_stage_2(get_list_index_label(label_stage_1, lst_aspect), df_train)
# df_2 = get_label_stage_2(df_temp)
# df_2

## execution

- Label stage 1:



In [7]:
label_aspect_train = get_label_stage_1(df_train)
label_aspect_val = get_label_stage_1(df_val)
label_aspect_test = get_label_stage_1(df_test)
label_aspect_train

Unnamed: 0,is_smell,is_texture,is_colour,is_price,is_shipping,is_packing,is_stayingpower,is_others
0,0,0,0,0,1,0,0,0
1,1,1,0,1,0,0,0,0
2,0,1,1,0,0,0,0,0
3,0,1,1,0,0,0,1,0
4,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...
12639,0,0,0,0,0,0,0,1
12640,0,0,0,0,1,1,0,0
12641,1,1,0,0,0,0,0,0
12642,0,0,0,0,1,0,1,0


- Label stage 2

In [8]:
cmt_train, label_sentiment_train = get_array_label(df_train)
cmt_val, label_sentiment_val = get_array_label(df_val)
cmt_test, label_sentiment_test = get_array_label(df_test)

# Create word embedding matrix stage 1

In [9]:
def get_tokenize(train, val, test):

  tok = Tokenizer(filters='')
  tok.fit_on_texts(train)
  tok.fit_on_texts(val)
  tok.fit_on_texts(test)

  tokenized_train = tok.texts_to_sequences(train)
  tokenized_val = tok.texts_to_sequences(val)
  tokenized_test = tok.texts_to_sequences(test)

  return tok, tokenized_train, tokenized_val, tokenized_test

In [10]:
def get_padded(max_len, tokenized_train, tokenized_val, tokenized_test):

  padded_train = pad_sequences(tokenized_train, padding = 'post', maxlen = max_len)
  padded_val = pad_sequences(tokenized_val, padding = 'post', maxlen = max_len)
  padded_test = pad_sequences(tokenized_test, padding = 'post', maxlen = max_len)

  return padded_train, padded_val, padded_test 

In [11]:
def get_embedding_matrix(embedding_dim, vocab_size, tok):

  embed_matrix = np.zeros(shape=(vocab_size, embedding_dim))
  for word, i in tok.word_index.items():
    embed_vector = word_vec_dict.get(word)
    if embed_vector is not None:
        embed_matrix[i] = embed_vector
          
  return embed_matrix

In [12]:
w2v_path = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/word2vec/elmo_embeddings_large.txt'
w2c_model = models.KeyedVectors.load_word2vec_format(w2v_path, binary = False)

In [13]:
vocab = w2c_model.wv.vocab
word_vec_dict = {}

for word in vocab:
  word_vec_dict[word] = w2c_model.get_vector(word)

  """Entry point for launching an IPython kernel.


In [14]:
#tokenize stage 1
tokenizer_aspect, tokenized_aspect_train, tokenized_aspect_val, tokenized_aspect_test = get_tokenize(df_train['processed_data'].values, df_val['processed_data'].values, df_test['processed_data'].values)

max_len = 115
#padding stage 1
padded_aspect_train, padded_aspect_val, padded_aspect_test = get_padded(max_len, tokenized_aspect_train, tokenized_aspect_val, tokenized_aspect_test)

embedding_dim = 1024 
#embedding matrix stage 1
vocab_aspect_size = len(tokenizer_aspect.word_index)+1
embedding_aspect_matrix = get_embedding_matrix(embedding_dim, vocab_aspect_size, tokenizer_aspect)

# Model building

### BiLSTM


#### BiLSTM func

In [15]:
def bilstm_aspect(vocab_aspect_size, embedding_dim, max_len, embedding_aspect_matrix, padded_aspect_train, padded_aspect_val, label_aspect_train, label_aspect_val, epochs=70):

  input = Input(shape=(max_len,))
  embed = Embedding(input_dim=vocab_aspect_size,
                  output_dim=embedding_dim,
                  embeddings_initializer=Constant(embedding_aspect_matrix),
                  input_length=max_len,
                  trainable=True)(input)
  lstm = Bidirectional(LSTM(units = 200, activation = 'tanh'))(embed)

  # aspect 
  aspect_dense2 = Dense(128, activation='relu')(lstm)
  aspect_dropout1 = Dropout(0.2)(aspect_dense2)
  aspect_dense3 = Dense(64, activation='relu')(aspect_dropout1)
  aspect_dense4 = Dense(32, activation='relu')(aspect_dense3)
  aspect_dense5 = Dense(8, activation='sigmoid')(aspect_dense4)

  aspect_model = tf.keras.Model(inputs = input, outputs = aspect_dense5)
  aspect_model.compile(optimizer=Adam(learning_rate = 0.0001), loss='binary_crossentropy', metrics=['acc'])
  callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

  # Fit model
  history = aspect_model.fit(x=padded_aspect_train,
    y=label_aspect_train.to_numpy(),

    validation_data=(padded_aspect_val,
      label_aspect_val.to_numpy()),
    batch_size = 128,
    epochs=epochs,
    callbacks = [callback],
    verbose=1)

  return aspect_model

def bilstm_polarity(aspect, vocab_sentiment_size, embedding_dim, max_len, embedding_sentiment_matrix, padded_sentiment_train, padded_sentiment_val, label_sentiment_train, label_sentiment_val, epochs=70):

  # polarity 

  input = Input(shape=(max_len,))
  embed = Embedding(input_dim=vocab_sentiment_size[aspect],
                  output_dim=embedding_dim,
                  embeddings_initializer=Constant(embedding_sentiment_matrix[aspect]),
                  input_length=max_len,
                  trainable=True)(input)
  lstm = Bidirectional(LSTM(units = 200, activation = 'tanh'))(embed)

  sentiment_dense2 = Dense(128, activation='relu')(lstm)
  sentiment_dropout1 = Dropout(0.2)(sentiment_dense2)
  sentiment_dense3 = Dense(64, activation='relu')(sentiment_dropout1)
  sentiment_dense4 = Dense(32, activation='relu')(sentiment_dense3)
  out_sentiment = Dense(units = 3, activation = 'softmax')(sentiment_dense4)

  sentiment_model[aspect] = tf.keras.Model(inputs = input, outputs = out_sentiment)
  sentiment_model[aspect].compile(optimizer=Adam(learning_rate = 0.0001), loss='binary_crossentropy', metrics=['acc'])
  callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

  # Fit model

  history = sentiment_model[aspect].fit(x=padded_sentiment_train[aspect],
    y=label_sentiment_train[aspect],

    validation_data=(padded_sentiment_val[aspect],
      label_sentiment_val[aspect]),
    batch_size = 128,
    epochs=epochs,
    callbacks = [callback],
    verbose=1)


  return sentiment_model[aspect]

#### Execution

- Stage 1: aspect

In [16]:
max_len=padded_aspect_train.shape[1]
aspect_model = bilstm_aspect(vocab_aspect_size, embedding_dim, max_len, embedding_aspect_matrix, padded_aspect_train, padded_aspect_val, label_aspect_train, label_aspect_val)

save_path = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_aspect'
aspect_model.save(save_path)

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_aspect/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_aspect/assets


In [17]:
# load model
save_path = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_aspect'
aspect_model = keras.models.load_model(save_path)

In [18]:
# predict
pred_train = aspect_model.predict(padded_aspect_train)
pred_val = aspect_model.predict(padded_aspect_val)
pred_test = aspect_model.predict(padded_aspect_test)

df_train_pred = round(pd.DataFrame(pred_train), 0)
df_train_true_ = label_aspect_train
df_train_pred.columns = df_train_true_.columns

df_val_pred = round(pd.DataFrame(pred_val), 0)
df_val_true_ = label_aspect_val
df_val_pred.columns = df_val_true_.columns

df_test_pred = round(pd.DataFrame(pred_test), 0)
df_test_true_ = label_aspect_test
df_test_pred.columns = df_test_true_.columns

In [19]:
def get_pd_report(label_aspect_train, df_test_true_, df_test_pred):

  aspect_result_bilstm_report = {}
  for col in label_aspect_train.columns:
    aspect_result_bilstm = classification_report(df_test_true_[col], df_test_pred[col], output_dict=True)
    aspect_result_bilstm_report[col] = pd.DataFrame(aspect_result_bilstm).T
    aspect_result_bilstm_report[col]['aspect'] = col

  output_aspect_report = pd.DataFrame()
  for indx, val in aspect_result_bilstm_report.items():
    output_aspect_report = pd.concat([output_aspect_report, aspect_result_bilstm_report[indx]])

  return output_aspect_report

In [20]:
aspect_result_bilstm_report = {}
for col in label_aspect_train.columns:
  print(col)
  aspect_result_bilstm = classification_report(df_test_true_[col], df_test_pred[col], output_dict=True)
  print(aspect_result_bilstm)

  aspect_result_bilstm_report[col] = pd.DataFrame(aspect_result_bilstm).T
  aspect_result_bilstm_report[col]['aspect'] = col

output_aspect_report = pd.DataFrame()
for indx, val in aspect_result_bilstm_report.items():
  output_aspect_report = pd.concat([output_aspect_report, aspect_result_bilstm_report[indx]])

is_smell
{'0': {'precision': 0.9921383647798742, 'recall': 0.9976284584980237, 'f1-score': 0.9948758376034688, 'support': 1265}, '1': {'precision': 0.9902912621359223, 'recall': 0.9683544303797469, 'f1-score': 0.9792000000000001, 'support': 316}, 'accuracy': 0.9917773561037319, 'macro avg': {'precision': 0.9912148134578982, 'recall': 0.9829914444388853, 'f1-score': 0.9870379188017344, 'support': 1581}, 'weighted avg': {'precision': 0.9917691779136574, 'recall': 0.9917773561037319, 'f1-score': 0.9917426531109349, 'support': 1581}}
is_texture
{'0': {'precision': 0.9619909502262444, 'recall': 0.9770220588235294, 'f1-score': 0.9694482444140448, 'support': 1088}, '1': {'precision': 0.9474789915966386, 'recall': 0.9148073022312373, 'f1-score': 0.9308565531475748, 'support': 493}, 'accuracy': 0.9576217583807717, 'macro avg': {'precision': 0.9547349709114414, 'recall': 0.9459146805273834, 'f1-score': 0.9501523987808098, 'support': 1581}, 'weighted avg': {'precision': 0.9574657158148618, 'recal

In [21]:
save_path  = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/aspect_bilstm_report.csv'
output_aspect_report = get_pd_report(label_aspect_train, df_test_true_, df_test_pred)
output_aspect_report.to_csv(save_path)

- Stage 2 : Polarity

In [22]:
lst_aspect = []
for col in label_aspect_train.columns.values:
  if col!='is_others':
    lst_aspect.append(col.split('_')[1])

In [23]:
#tokenize stage 2
tokenized_sentiment_train = {}
tokenized_sentiment_val = {}
tokenized_sentiment_test = {}
tokenizer_sentiment = {}

for aspect in lst_aspect:
  tokenizer_sentiment[aspect], tokenized_sentiment_train[aspect], tokenized_sentiment_val[aspect], tokenized_sentiment_test[aspect] = get_tokenize(cmt_train[aspect], cmt_val[aspect], cmt_test[aspect])

#padding stage 2
padded_sentiment_train = {}
padded_sentiment_val = {}
padded_sentiment_test = {}
for aspect in lst_aspect:
  padded_sentiment_train[aspect], padded_sentiment_val[aspect], padded_sentiment_test[aspect] = get_padded(max_len, tokenized_sentiment_train[aspect], tokenized_sentiment_val[aspect], tokenized_sentiment_test[aspect])

#embedding matrix stage 2
vocab_sentiment_size = {}
embedding_sentiment_matrix = {}
for aspect in lst_aspect:
  vocab_sentiment_size[aspect]  = len(tokenizer_sentiment[aspect].word_index)+1
  embedding_sentiment_matrix[aspect] = get_embedding_matrix(embedding_dim, vocab_sentiment_size[aspect], tokenizer_sentiment[aspect])

In [24]:
# training model
sentiment_model = {}
for aspect in lst_aspect:
  sentiment_model[aspect] = bilstm_polarity(aspect, vocab_sentiment_size, embedding_dim, max_len, embedding_sentiment_matrix, padded_sentiment_train, padded_sentiment_val, label_sentiment_train, label_sentiment_val)
  sentiment_model[aspect].save('/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_polarity_{}'.format(aspect))

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_polarity_smell/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_polarity_smell/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_polarity_texture/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_polarity_texture/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_polarity_colour/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_polarity_colour/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_polarity_price/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_polarity_price/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_polarity_shipping/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_polarity_shipping/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_polarity_packing/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_polarity_packing/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_polarity_stayingpower/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_polarity_stayingpower/assets


In [25]:
# load model
sentiment_model = {}
for aspect in lst_aspect:
  sentiment_model[aspect] = keras.models.load_model('/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/stl_BiLSTM_polarity_{}'.format(aspect))

In [26]:
# predict
df_sentiment_train_pred = {}
df_sentiment_train_true_ = {}

df_sentiment_val_true_ = {}
df_sentiment_val_pred = {}

df_sentiment_test_true_ = {}
df_sentiment_test_pred = {}


# for aspect in lst_aspect:
for aspect in lst_aspect:
  pred_sentiment_train = sentiment_model[aspect].predict(padded_sentiment_train[aspect])
  pred_sentiment_val = sentiment_model[aspect].predict(padded_sentiment_val[aspect])
  pred_sentiment_test = sentiment_model[aspect].predict(padded_sentiment_test[aspect])

  df_sentiment_train_pred[aspect] = np.argmax(pred_sentiment_train, axis=1)
  df_sentiment_train_true_[aspect] = np.argmax(label_sentiment_train[aspect], axis = 1)

  df_sentiment_val_pred[aspect] = np.argmax(pred_sentiment_val, axis=1)
  df_sentiment_val_true_[aspect] = np.argmax(label_sentiment_val[aspect], axis = 1)

  df_sentiment_test_pred[aspect] = np.argmax(pred_sentiment_test, axis=1)
  df_sentiment_test_true_[aspect] = np.argmax(label_sentiment_test[aspect], axis=1)

In [27]:
def get_pd_report_sentiment(df_sentiment_test_true_, df_sentiment_test_pred):

  sentiment_result_bilstm = {}
  # for aspect in lst_aspect:
  for aspect in lst_aspect:
      sentiment_result_bilstm[aspect] = pd.DataFrame(classification_report(df_sentiment_test_true_[aspect].tolist(), df_sentiment_test_pred[aspect].tolist(), output_dict=True)).T
      sentiment_result_bilstm[aspect]['aspect'] = aspect

  output_sentiment_report = pd.DataFrame()
  for indx, val in sentiment_result_bilstm.items():
    output_sentiment_report = pd.concat([output_sentiment_report, sentiment_result_bilstm[indx]])

  return output_aspect_report

In [28]:
sentiment_result_bilstm = {}
# for aspect in lst_aspect:
for aspect in lst_aspect:
    sentiment_result_bilstm[aspect] = pd.DataFrame(classification_report(df_sentiment_test_true_[aspect].tolist(), df_sentiment_test_pred[aspect].tolist(), output_dict=True)).T
    sentiment_result_bilstm[aspect]['aspect'] = aspect

output_sentiment_report = pd.DataFrame()
for indx, val in sentiment_result_bilstm.items():
  output_sentiment_report = pd.concat([output_sentiment_report, sentiment_result_bilstm[indx]])

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [29]:
save_path = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM/sentiment_bilstm_report.csv'
output_sentiment_report = get_pd_report_sentiment(df_sentiment_test_true_, df_sentiment_test_pred)
output_sentiment_report.to_csv(save_path)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### BiGRU

#### BiGRU func

In [30]:
def bigru_aspect(vocab_aspect_size, embedding_dim, max_len, embedding_aspect_matrix, padded_aspect_train, padded_aspect_val, label_aspect_train, label_aspect_val, epochs=70):

  input = Input(shape=(max_len,))
  embed = Embedding(input_dim=vocab_aspect_size,
                  output_dim=embedding_dim,
                  embeddings_initializer=Constant(embedding_aspect_matrix),
                  input_length=max_len,
                  trainable=True)(input)
  lstm = Bidirectional(GRU(units = 200, activation = 'tanh'))(embed)

  # aspect 
  aspect_dense2 = Dense(128, activation='relu')(lstm)
  aspect_dropout1 = Dropout(0.2)(aspect_dense2)
  aspect_dense3 = Dense(64, activation='relu')(aspect_dropout1)
  aspect_dense4 = Dense(32, activation='relu')(aspect_dense3)
  aspect_dense5 = Dense(8, activation='sigmoid')(aspect_dense4)

  aspect_model = tf.keras.Model(inputs = input, outputs = aspect_dense5)
  aspect_model.compile(optimizer=Adam(learning_rate = 0.0001), loss='binary_crossentropy', metrics=['acc'])
  callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

  # Fit model
  history = aspect_model.fit(x=padded_aspect_train,
    y=label_aspect_train.to_numpy(),

    validation_data=(padded_aspect_val,
      label_aspect_val.to_numpy()),
    batch_size = 128,
    epochs=epochs,
    callbacks = [callback],
    verbose=1)

  return aspect_model

def bigru_polarity(aspect, vocab_sentiment_size, embedding_dim, max_len, embedding_sentiment_matrix, padded_sentiment_train, padded_sentiment_val, label_sentiment_train, label_sentiment_val, epochs=70):

  # polarity 

  input = Input(shape=(max_len,))
  embed = Embedding(input_dim=vocab_sentiment_size[aspect],
                  output_dim=embedding_dim,
                  embeddings_initializer=Constant(embedding_sentiment_matrix[aspect]),
                  input_length=max_len,
                  trainable=True)(input)
  lstm = Bidirectional(GRU(units = 200, activation = 'tanh'))(embed)

  sentiment_dense2 = Dense(128, activation='relu')(lstm)
  sentiment_dropout1 = Dropout(0.2)(sentiment_dense2)
  sentiment_dense3 = Dense(64, activation='relu')(sentiment_dropout1)
  sentiment_dense4 = Dense(32, activation='relu')(sentiment_dense3)
  out_sentiment = Dense(units = 3, activation = 'softmax')(sentiment_dense4)

  sentiment_model[aspect] = tf.keras.Model(inputs = input, outputs = out_sentiment)
  sentiment_model[aspect].compile(optimizer=Adam(learning_rate = 0.0001), loss='binary_crossentropy', metrics=['acc'])
  callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

  # Fit model

  history = sentiment_model[aspect].fit(x=padded_sentiment_train[aspect],
    y=label_sentiment_train[aspect],

    validation_data=(padded_sentiment_val[aspect],
      label_sentiment_val[aspect]),
    batch_size = 128,
    epochs=epochs,
    callbacks = [callback],
    verbose=1)


  return sentiment_model[aspect]

#### Execution

- Stage 1 : aspect

In [31]:
max_len=padded_aspect_train.shape[1]
aspect_model = bigru_aspect(vocab_aspect_size, embedding_dim, max_len, embedding_aspect_matrix, padded_aspect_train, padded_aspect_val, label_aspect_train, label_aspect_val)

save_path = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_aspect'
aspect_model.save(save_path)

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_aspect/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_aspect/assets


In [32]:
# load model
save_path = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_aspect'
aspect_model = keras.models.load_model(save_path)

In [33]:
# predict
pred_train = aspect_model.predict(padded_aspect_train)
pred_val = aspect_model.predict(padded_aspect_val)
pred_test = aspect_model.predict(padded_aspect_test)

df_train_pred = round(pd.DataFrame(pred_train), 0)
df_train_true_ = label_aspect_train
df_train_pred.columns = df_train_true_.columns

df_val_pred = round(pd.DataFrame(pred_val), 0)
df_val_true_ = label_aspect_val
df_val_pred.columns = df_val_true_.columns

df_test_pred = round(pd.DataFrame(pred_test), 0)
df_test_true_ = label_aspect_test
df_test_pred.columns = df_test_true_.columns

In [34]:
for col in label_aspect_train.columns:
  print(col)
  aspect_result_bigru = classification_report(df_test_true_[col], df_test_pred[col])
  print(aspect_result_bigru)

is_smell
              precision    recall  f1-score   support

           0       0.99      1.00      0.99      1265
           1       0.99      0.97      0.98       316

    accuracy                           0.99      1581
   macro avg       0.99      0.98      0.99      1581
weighted avg       0.99      0.99      0.99      1581

is_texture
              precision    recall  f1-score   support

           0       0.99      0.96      0.98      1088
           1       0.93      0.98      0.95       493

    accuracy                           0.97      1581
   macro avg       0.96      0.97      0.96      1581
weighted avg       0.97      0.97      0.97      1581

is_colour
              precision    recall  f1-score   support

           0       0.98      0.97      0.97       854
           1       0.96      0.97      0.97       727

    accuracy                           0.97      1581
   macro avg       0.97      0.97      0.97      1581
weighted avg       0.97      0.97      0.97 

In [35]:
save_path  = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/aspect_bigru_report.csv'
output_aspect_report = get_pd_report(label_aspect_train, df_test_true_, df_test_pred)
output_aspect_report.to_csv(save_path)

- Stage 2 : Polarity

In [36]:
lst_aspect = []
for col in label_aspect_train.columns.values:
  if col!='is_others':
    lst_aspect.append(col.split('_')[1])

In [37]:
#tokenize stage 2
tokenized_sentiment_train = {}
tokenized_sentiment_val = {}
tokenized_sentiment_test = {}
tokenizer_sentiment = {}

for aspect in lst_aspect:
  tokenizer_sentiment[aspect], tokenized_sentiment_train[aspect], tokenized_sentiment_val[aspect], tokenized_sentiment_test[aspect] = get_tokenize(cmt_train[aspect], cmt_val[aspect], cmt_test[aspect])

#padding stage 2
padded_sentiment_train = {}
padded_sentiment_val = {}
padded_sentiment_test = {}
for aspect in lst_aspect:
  padded_sentiment_train[aspect], padded_sentiment_val[aspect], padded_sentiment_test[aspect] = get_padded(max_len, tokenized_sentiment_train[aspect], tokenized_sentiment_val[aspect], tokenized_sentiment_test[aspect])

#embedding matrix stage 2
vocab_sentiment_size = {}
embedding_sentiment_matrix = {}
for aspect in lst_aspect:
  vocab_sentiment_size[aspect]  = len(tokenizer_sentiment[aspect].word_index)+1
  embedding_sentiment_matrix[aspect] = get_embedding_matrix(embedding_dim, vocab_sentiment_size[aspect], tokenizer_sentiment[aspect])

In [38]:
# training model
sentiment_model = {}
for aspect in lst_aspect:
  sentiment_model[aspect] = bigru_polarity(aspect, vocab_sentiment_size, embedding_dim, max_len, embedding_sentiment_matrix, padded_sentiment_train, padded_sentiment_val, label_sentiment_train, label_sentiment_val)
  sentiment_model[aspect].save('/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_polarity_{}'.format(aspect))

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_polarity_smell/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_polarity_smell/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_polarity_texture/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_polarity_texture/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_polarity_colour/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_polarity_colour/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_polarity_price/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_polarity_price/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_polarity_shipping/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_polarity_shipping/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_polarity_packing/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_polarity_packing/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_polarity_stayingpower/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_polarity_stayingpower/assets


In [39]:
# load model
sentiment_model = {}
for aspect in lst_aspect:
  sentiment_model[aspect] = keras.models.load_model('/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/stl_BiGRU_polarity_{}'.format(aspect))

In [40]:
# predict
df_sentiment_train_pred = {}
df_sentiment_train_true_ = {}

df_sentiment_val_true_ = {}
df_sentiment_val_pred = {}

df_sentiment_test_true_ = {}
df_sentiment_test_pred = {}


# for aspect in lst_aspect:
for aspect in lst_aspect:
  pred_sentiment_train = sentiment_model[aspect].predict(padded_sentiment_train[aspect])
  pred_sentiment_val = sentiment_model[aspect].predict(padded_sentiment_val[aspect])
  pred_sentiment_test = sentiment_model[aspect].predict(padded_sentiment_test[aspect])

  df_sentiment_train_pred[aspect] = np.argmax(pred_sentiment_train, axis=1)
  df_sentiment_train_true_[aspect] = np.argmax(label_sentiment_train[aspect], axis = 1)

  df_sentiment_val_pred[aspect] = np.argmax(pred_sentiment_val, axis=1)
  df_sentiment_val_true_[aspect] = np.argmax(label_sentiment_val[aspect], axis = 1)

  df_sentiment_test_pred[aspect] = np.argmax(pred_sentiment_test, axis=1)
  df_sentiment_test_true_[aspect] = np.argmax(label_sentiment_test[aspect], axis=1)

sentiment_result_bigru = {}
# for aspect in lst_aspect:
for aspect in lst_aspect:
    sentiment_result_bigru[aspect] = classification_report(df_sentiment_test_true_[aspect].tolist(), df_sentiment_test_pred[aspect].tolist())

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [41]:
save_path = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU/sentiment_bigru_report.csv'
output_sentiment_report = get_pd_report_sentiment(df_sentiment_test_true_, df_sentiment_test_pred)
output_sentiment_report.to_csv(save_path)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### BiLSTM+CONV

#### def func

In [42]:
def bilstm_conv_aspect(vocab_aspect_size, embedding_dim, max_len, embedding_aspect_matrix, padded_aspect_train, padded_aspect_val, label_aspect_train, label_aspect_val, epochs=70):

  input = Input(shape=(max_len,))
  embed = Embedding(input_dim=vocab_aspect_size,
                  output_dim=embedding_dim,
                  embeddings_initializer=Constant(embedding_aspect_matrix),
                  input_length=max_len,
                  trainable=True)(input)

  dropout1 = SpatialDropout1D(0.2)(embed)
  lstm = Bidirectional(LSTM(units = 200, activation = 'tanh', return_sequences = True))(dropout1)
  conv = Conv1D(128, kernel_size = 2, padding = "valid", kernel_initializer = "he_uniform")(lstm)

  avg_pool1 = GlobalAveragePooling1D()(conv)
  max_pool1 = GlobalMaxPooling1D()(conv)
      
      
  concat = Concatenate(axis=-1)([avg_pool1, max_pool1])

  # aspect 
  aspect_dense2 = Dense(128, activation='relu')(concat)
  aspect_dropout1 = Dropout(0.2)(aspect_dense2)
  aspect_dense3 = Dense(64, activation='relu')(aspect_dropout1)
  aspect_dense4 = Dense(32, activation='relu')(aspect_dense3)
  aspect_dense5 = Dense(8, activation='sigmoid')(aspect_dense4)

  aspect_model = tf.keras.Model(inputs = input, outputs = aspect_dense5)
  aspect_model.compile(optimizer=Adam(learning_rate = 0.0001), loss='binary_crossentropy', metrics=['acc'])
  callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

  # Fit model
  history = aspect_model.fit(x=padded_aspect_train,
    y=label_aspect_train.to_numpy(),

    validation_data=(padded_aspect_val,
      label_aspect_val.to_numpy()),
    batch_size = 128,
    epochs=epochs,
    callbacks = [callback],
    verbose=1)

  return aspect_model

def bilstm_conv_polarity(aspect, vocab_sentiment_size, embedding_dim, max_len, embedding_sentiment_matrix, padded_sentiment_train, padded_sentiment_val, label_sentiment_train, label_sentiment_val, epochs=70):

  # polarity 

  input = Input(shape=(max_len,))
  embed = Embedding(input_dim=vocab_sentiment_size[aspect],
                  output_dim=embedding_dim,
                  embeddings_initializer=Constant(embedding_sentiment_matrix[aspect]),
                  input_length=max_len,
                  trainable=True)(input)
  
  dropout1 = SpatialDropout1D(0.2)(embed)
  lstm = Bidirectional(LSTM(units = 200, activation = 'tanh', return_sequences = True))(dropout1)
  conv = Conv1D(128, kernel_size = 2, padding = "valid", kernel_initializer = "he_uniform")(lstm)

  avg_pool1 = GlobalAveragePooling1D()(conv)
  max_pool1 = GlobalMaxPooling1D()(conv)
      
      
  concat = Concatenate(axis=-1)([avg_pool1, max_pool1])

  sentiment_dense2 = Dense(128, activation='relu')(concat)
  sentiment_dropout1 = Dropout(0.2)(sentiment_dense2)
  sentiment_dense3 = Dense(64, activation='relu')(sentiment_dropout1)
  sentiment_dense4 = Dense(32, activation='relu')(sentiment_dense3)
  out_sentiment = Dense(units = 3, activation = 'softmax')(sentiment_dense4)

  sentiment_model[aspect] = tf.keras.Model(inputs = input, outputs = out_sentiment)
  sentiment_model[aspect].compile(optimizer=Adam(learning_rate = 0.0001), loss='binary_crossentropy', metrics=['acc'])
  callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

  # Fit model

  history = sentiment_model[aspect].fit(x=padded_sentiment_train[aspect],
    y=label_sentiment_train[aspect],

    validation_data=(padded_sentiment_val[aspect],
      label_sentiment_val[aspect]),
    batch_size = 128,
    epochs=epochs,
    callbacks = [callback],
    verbose=1)


  return sentiment_model[aspect]

#### Excution

In [43]:
max_len=padded_aspect_train.shape[1]
aspect_model = bilstm_conv_aspect(vocab_aspect_size, embedding_dim, max_len, embedding_aspect_matrix, padded_aspect_train, padded_aspect_val, label_aspect_train, label_aspect_val)

save_path = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_aspect'
aspect_model.save(save_path)

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_aspect/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_aspect/assets


In [44]:
# load model
save_path = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_aspect'
aspect_model = keras.models.load_model(save_path)

In [45]:
# predict
pred_train = aspect_model.predict(padded_aspect_train)
pred_val = aspect_model.predict(padded_aspect_val)
pred_test = aspect_model.predict(padded_aspect_test)

df_train_pred = round(pd.DataFrame(pred_train), 0)
df_train_true_ = label_aspect_train
df_train_pred.columns = df_train_true_.columns

df_val_pred = round(pd.DataFrame(pred_val), 0)
df_val_true_ = label_aspect_val
df_val_pred.columns = df_val_true_.columns

df_test_pred = round(pd.DataFrame(pred_test), 0)
df_test_true_ = label_aspect_test
df_test_pred.columns = df_test_true_.columns

for col in label_aspect_train.columns:
  print(col)
  aspect_result_bilstm_conv = classification_report(df_test_true_[col], df_test_pred[col])
  print(aspect_result_bilstm_conv)

is_smell
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      1265
           1       0.99      0.97      0.98       316

    accuracy                           0.99      1581
   macro avg       0.99      0.98      0.99      1581
weighted avg       0.99      0.99      0.99      1581

is_texture
              precision    recall  f1-score   support

           0       0.99      0.97      0.98      1088
           1       0.93      0.97      0.95       493

    accuracy                           0.97      1581
   macro avg       0.96      0.97      0.96      1581
weighted avg       0.97      0.97      0.97      1581

is_colour
              precision    recall  f1-score   support

           0       0.98      0.97      0.98       854
           1       0.96      0.98      0.97       727

    accuracy                           0.97      1581
   macro avg       0.97      0.97      0.97      1581
weighted avg       0.97      0.97      0.97 

In [46]:
save_path  = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/aspect_bilstm_conv_report.csv'
output_aspect_report = get_pd_report(label_aspect_train, df_test_true_, df_test_pred)
output_aspect_report.to_csv(save_path)

- Stage 2 : Polarity

In [47]:
lst_aspect = []
for col in label_aspect_train.columns.values:
  if col!='is_others':
    lst_aspect.append(col.split('_')[1])

In [48]:
#tokenize stage 2
tokenized_sentiment_train = {}
tokenized_sentiment_val = {}
tokenized_sentiment_test = {}
tokenizer_sentiment = {}

for aspect in lst_aspect:
  tokenizer_sentiment[aspect], tokenized_sentiment_train[aspect], tokenized_sentiment_val[aspect], tokenized_sentiment_test[aspect] = get_tokenize(cmt_train[aspect], cmt_val[aspect], cmt_test[aspect])

#padding stage 2
padded_sentiment_train = {}
padded_sentiment_val = {}
padded_sentiment_test = {}
for aspect in lst_aspect:
  padded_sentiment_train[aspect], padded_sentiment_val[aspect], padded_sentiment_test[aspect] = get_padded(max_len, tokenized_sentiment_train[aspect], tokenized_sentiment_val[aspect], tokenized_sentiment_test[aspect])

#embedding matrix stage 2
vocab_sentiment_size = {}
embedding_sentiment_matrix = {}
for aspect in lst_aspect:
  vocab_sentiment_size[aspect]  = len(tokenizer_sentiment[aspect].word_index)+1
  embedding_sentiment_matrix[aspect] = get_embedding_matrix(embedding_dim, vocab_sentiment_size[aspect], tokenizer_sentiment[aspect])

In [49]:
# training model
sentiment_model = {}
for aspect in lst_aspect:
  sentiment_model[aspect] = bilstm_conv_polarity(aspect, vocab_sentiment_size, embedding_dim, max_len, embedding_sentiment_matrix, padded_sentiment_train, padded_sentiment_val, label_sentiment_train, label_sentiment_val)
  sentiment_model[aspect].save('/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_polarity_{}'.format(aspect))

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_polarity_smell/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_polarity_smell/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_polarity_texture/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_polarity_texture/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_polarity_colour/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_polarity_colour/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_polarity_price/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_polarity_price/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_polarity_shipping/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_polarity_shipping/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_polarity_packing/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_polarity_packing/assets


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_polarity_stayingpower/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_polarity_stayingpower/assets


In [50]:
# load model
sentiment_model = {}
for aspect in lst_aspect:
  sentiment_model[aspect] = keras.models.load_model('/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/stl_BiLSTM_CONV_polarity_{}'.format(aspect))

In [51]:
# predict
df_sentiment_train_pred = {}
df_sentiment_train_true_ = {}

df_sentiment_val_true_ = {}
df_sentiment_val_pred = {}

df_sentiment_test_true_ = {}
df_sentiment_test_pred = {}

for aspect in lst_aspect:
  pred_sentiment_train = sentiment_model[aspect].predict(padded_sentiment_train[aspect])
  pred_sentiment_val = sentiment_model[aspect].predict(padded_sentiment_val[aspect])
  pred_sentiment_test = sentiment_model[aspect].predict(padded_sentiment_test[aspect])

  df_sentiment_train_pred[aspect] = np.argmax(pred_sentiment_train, axis=1)
  df_sentiment_train_true_[aspect] = np.argmax(label_sentiment_train[aspect], axis = 1)

  df_sentiment_val_pred[aspect] = np.argmax(pred_sentiment_val, axis=1)
  df_sentiment_val_true_[aspect] = np.argmax(label_sentiment_val[aspect], axis = 1)

  df_sentiment_test_pred[aspect] = np.argmax(pred_sentiment_test, axis=1)
  df_sentiment_test_true_[aspect] = np.argmax(label_sentiment_test[aspect], axis=1)

sentiment_result_bilstm_conv = {}
# for aspect in lst_aspect:
for aspect in lst_aspect:
    sentiment_result_bilstm_conv[aspect] = classification_report(df_sentiment_test_true_[aspect].tolist(), df_sentiment_test_pred[aspect].tolist())

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [52]:
save_path = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_CONV/sentiment_bilstm_conv_report.csv'
output_sentiment_report = get_pd_report_sentiment(df_sentiment_test_true_, df_sentiment_test_pred)
output_sentiment_report.to_csv(save_path)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### BiGRU+CONV

#### def func

In [53]:
def bigru_conv_aspect(vocab_aspect_size, embedding_dim, max_len, embedding_aspect_matrix, padded_aspect_train, padded_aspect_val, label_aspect_train, label_aspect_val, epochs=70):

  input = Input(shape=(max_len,))
  embed = Embedding(input_dim=vocab_aspect_size,
                  output_dim=embedding_dim,
                  embeddings_initializer=Constant(embedding_aspect_matrix),
                  input_length=max_len,
                  trainable=True)(input)

  dropout1 = SpatialDropout1D(0.2)(embed)

  lstm = Bidirectional(GRU(units = 200, activation = 'tanh', return_sequences = True))(dropout1)
  conv = Conv1D(128, kernel_size = 2, padding = "valid", kernel_initializer = "he_uniform")(lstm)

  avg_pool1 = GlobalAveragePooling1D()(conv)
  max_pool1 = GlobalMaxPooling1D()(conv)
      
      
  concat = Concatenate(axis=-1)([avg_pool1, max_pool1])

  # aspect 
  aspect_dense2 = Dense(128, activation='relu')(concat)
  aspect_dropout1 = Dropout(0.2)(aspect_dense2)
  aspect_dense3 = Dense(64, activation='relu')(aspect_dropout1)
  aspect_dense4 = Dense(32, activation='relu')(aspect_dense3)
  aspect_dense5 = Dense(8, activation='sigmoid')(aspect_dense4)

  aspect_model = tf.keras.Model(inputs = input, outputs = aspect_dense5)
  aspect_model.compile(optimizer=Adam(learning_rate = 0.0001), loss='binary_crossentropy', metrics=['acc'])
  callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

  # Fit model
  history = aspect_model.fit(x=padded_aspect_train,
    y=label_aspect_train.to_numpy(),

    validation_data=(padded_aspect_val,
      label_aspect_val.to_numpy()),
    batch_size = 128,
    epochs=epochs,
    callbacks = [callback],
    verbose=1)

  return aspect_model

def bigru_conv_polarity(aspect, vocab_sentiment_size, embedding_dim, max_len, embedding_sentiment_matrix, padded_sentiment_train, padded_sentiment_val, label_sentiment_train, label_sentiment_val, epochs=70):

  # polarity 

  input = Input(shape=(max_len,))
  embed = Embedding(input_dim=vocab_sentiment_size[aspect],
                  output_dim=embedding_dim,
                  embeddings_initializer=Constant(embedding_sentiment_matrix[aspect]),
                  input_length=max_len,
                  trainable=True)(input)
  
  dropout1 = SpatialDropout1D(0.2)(embed)

  lstm = Bidirectional(GRU(units = 200, activation = 'tanh', return_sequences = True))(dropout1)
  conv = Conv1D(128, kernel_size = 2, padding = "valid", kernel_initializer = "he_uniform")(lstm)

  avg_pool1 = GlobalAveragePooling1D()(conv)
  max_pool1 = GlobalMaxPooling1D()(conv)
      
      
  concat = Concatenate(axis=-1)([avg_pool1, max_pool1])

  sentiment_dense2 = Dense(128, activation='relu')(concat)
  sentiment_dropout1 = Dropout(0.2)(sentiment_dense2)
  sentiment_dense3 = Dense(64, activation='relu')(sentiment_dropout1)
  sentiment_dense4 = Dense(32, activation='relu')(sentiment_dense3)
  out_sentiment = Dense(units = 3, activation = 'softmax')(sentiment_dense4)

  sentiment_model[aspect] = tf.keras.Model(inputs = input, outputs = out_sentiment)
  sentiment_model[aspect].compile(optimizer=Adam(learning_rate = 0.0001), loss='binary_crossentropy', metrics=['acc'])
  callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

  # Fit model

  history = sentiment_model[aspect].fit(x=padded_sentiment_train[aspect],
    y=label_sentiment_train[aspect],

    validation_data=(padded_sentiment_val[aspect],
      label_sentiment_val[aspect]),
    batch_size = 128,
    epochs=epochs,
    callbacks = [callback],
    verbose=1)


  return sentiment_model[aspect]

#### execution

In [None]:
max_len=padded_aspect_train.shape[1]
aspect_model_bigru = bigru_aspect(vocab_aspect_size, embedding_dim, max_len, embedding_aspect_matrix, padded_aspect_train, padded_aspect_val, label_aspect_train, label_aspect_val)

save_path = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU_CONV/stl_BiGRU_aspect'
aspect_model.save(save_path)

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70




INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU_CONV/stl_BiGRU_aspect/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU_CONV/stl_BiGRU_aspect/assets


In [None]:
# load model
save_path = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU_CONV/stl_BiGRU_aspect'
aspect_model = keras.models.load_model(save_path)

In [None]:
# predict
pred_train = aspect_model.predict(padded_aspect_train)
pred_val = aspect_model.predict(padded_aspect_val)
pred_test = aspect_model.predict(padded_aspect_test)

df_train_pred = round(pd.DataFrame(pred_train), 0)
df_train_true_ = label_aspect_train
df_train_pred.columns = df_train_true_.columns

df_val_pred = round(pd.DataFrame(pred_val), 0)
df_val_true_ = label_aspect_val
df_val_pred.columns = df_val_true_.columns

df_test_pred = round(pd.DataFrame(pred_test), 0)
df_test_true_ = label_aspect_test
df_test_pred.columns = df_test_true_.columns

for col in label_aspect_train.columns:
  print(col)
  aspect_result_bigru_conv = classification_report(df_test_true_[col], df_test_pred[col])
  print(aspect_result_bigru_conv)

In [None]:
save_path  = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU_CONV/aspect_bigru_conv_report.csv'
output_aspect_report = get_pd_report(label_aspect_train, df_test_true_, df_test_pred)
output_aspect_report.to_csv(save_path)

- Stage 2 : Polarity

In [None]:
lst_aspect = []
for col in label_aspect_train.columns.values:
  if col!='is_others':
    lst_aspect.append(col.split('_')[1])

In [None]:
#tokenize stage 2
tokenized_sentiment_train = {}
tokenized_sentiment_val = {}
tokenized_sentiment_test = {}
tokenizer_sentiment = {}

for aspect in lst_aspect:
  tokenizer_sentiment[aspect], tokenized_sentiment_train[aspect], tokenized_sentiment_val[aspect], tokenized_sentiment_test[aspect] = get_tokenize(cmt_train[aspect], cmt_val[aspect], cmt_test[aspect])

#padding stage 2
padded_sentiment_train = {}
padded_sentiment_val = {}
padded_sentiment_test = {}
for aspect in lst_aspect:
  padded_sentiment_train[aspect], padded_sentiment_val[aspect], padded_sentiment_test[aspect] = get_padded(max_len, tokenized_sentiment_train[aspect], tokenized_sentiment_val[aspect], tokenized_sentiment_test[aspect])

#embedding matrix stage 2
vocab_sentiment_size = {}
embedding_sentiment_matrix = {}
for aspect in lst_aspect:
  vocab_sentiment_size[aspect]  = len(tokenizer_sentiment[aspect].word_index)+1
  embedding_sentiment_matrix[aspect] = get_embedding_matrix(embedding_dim, vocab_sentiment_size[aspect], tokenizer_sentiment[aspect])

In [None]:
# training model
sentiment_model = {}
for aspect in lst_aspect:
  sentiment_model[aspect] = bigru_conv_polarity(aspect, vocab_sentiment_size, embedding_dim, max_len, embedding_sentiment_matrix, padded_sentiment_train, padded_sentiment_val, label_sentiment_train, label_sentiment_val)
  sentiment_model[aspect].save('/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU_CONV/stl_BiGRU_CONV_polarity_{}'.format(aspect))

In [None]:
# load model
sentiment_model = {}
for aspect in lst_aspect:
  sentiment_model[aspect] = keras.models.load_model('/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU_CONV/stl_BiGRU_CONV_polarity_{}'.format(aspect))

In [None]:
# predict
df_sentiment_train_pred = {}
df_sentiment_train_true_ = {}

df_sentiment_val_true_ = {}
df_sentiment_val_pred = {}

df_sentiment_test_true_ = {}
df_sentiment_test_pred = {}


# for aspect in lst_aspect:
for aspect in lst_aspect:
  pred_sentiment_train = sentiment_model[aspect].predict(padded_sentiment_train[aspect])
  pred_sentiment_val = sentiment_model[aspect].predict(padded_sentiment_val[aspect])
  pred_sentiment_test = sentiment_model[aspect].predict(padded_sentiment_test[aspect])

  df_sentiment_train_pred[aspect] = np.argmax(pred_sentiment_train, axis=1)
  df_sentiment_train_true_[aspect] = np.argmax(label_sentiment_train[aspect], axis = 1)

  df_sentiment_val_pred[aspect] = np.argmax(pred_sentiment_val, axis=1)
  df_sentiment_val_true_[aspect] = np.argmax(label_sentiment_val[aspect], axis = 1)

  df_sentiment_test_pred[aspect] = np.argmax(pred_sentiment_test, axis=1)
  df_sentiment_test_true_[aspect] = np.argmax(label_sentiment_test[aspect], axis=1)

sentiment_result_bigru_conv = {}
# for aspect in lst_aspect:
for aspect in lst_aspect:
    sentiment_result_bigru_conv[aspect] = classification_report(df_sentiment_test_true_[aspect].tolist(), df_sentiment_test_pred[aspect].tolist())

In [None]:
save_path = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiGRU_CONV/sentiment_bigru_conv_report.csv'
output_sentiment_report = get_pd_report_sentiment(df_sentiment_test_true_, df_sentiment_test_pred)
output_sentiment_report.to_csv(save_path)

### BiLSTM+BiGRU+CONV

#### def func

In [None]:
def bilstm_bigru_conv_aspect(vocab_aspect_size, embedding_dim, max_len, embedding_aspect_matrix, padded_aspect_train, padded_aspect_val, label_aspect_train, label_aspect_val, epochs=70):

  input = Input(shape=(max_len,))
  embed = Embedding(input_dim=vocab_aspect_size,
                  output_dim=embedding_dim,
                  embeddings_initializer=Constant(embedding_aspect_matrix),
                  input_length=max_len,
                  trainable=True)(input)

  dropout1 = SpatialDropout1D(0.2)(embed)

  lstm = Bidirectional(LSTM(units = 200, activation = 'tanh', return_sequences = True))(dropout1)
  conv_lstm = Conv1D(128, kernel_size = 2, padding = "valid", kernel_initializer = "he_uniform")(lstm)

  gru = Bidirectional(GRU(units = 200, activation = 'tanh', return_sequences = True))(dropout1)
  conv_gru = Conv1D(128, kernel_size = 2, padding = "valid", kernel_initializer = "he_uniform")(gru)


  avg_pool1 = GlobalAveragePooling1D()(conv_lstm)
  max_pool1 = GlobalMaxPooling1D()(conv_lstm)

  avg_pool2 = GlobalAveragePooling1D()(conv_gru)
  max_pool2 = GlobalMaxPooling1D()(conv_gru)
      
      
  concat = Concatenate(axis=-1)([avg_pool1, max_pool1, avg_pool2, max_pool2])

  # aspect 
  aspect_dense1 = Dense(units = 256, activation = 'relu')(concat)
  aspect_dense2 = Dense(128, activation='relu')(aspect_dense1)
  aspect_dropout1 = Dropout(0.2)(aspect_dense2)
  aspect_dense3 = Dense(64, activation='relu')(aspect_dropout1)
  aspect_dense4 = Dense(32, activation='relu')(aspect_dense3)
  aspect_dense5 = Dense(8, activation='sigmoid')(aspect_dense4)

  aspect_model = tf.keras.Model(inputs = input, outputs = aspect_dense5)
  aspect_model.compile(optimizer=Adam(learning_rate = 0.0001), loss='binary_crossentropy', metrics=['acc'])
  callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

  # Fit model
  history = aspect_model.fit(x=padded_aspect_train,
    y=label_aspect_train.to_numpy(),

    validation_data=(padded_aspect_val,
      label_aspect_val.to_numpy()),
    batch_size = 128,
    epochs=epochs,
    callbacks = [callback],
    verbose=1)

  return aspect_model

def bilstm_bigru_conv_polarity(aspect, vocab_sentiment_size, embedding_dim, max_len, embedding_sentiment_matrix, padded_sentiment_train, padded_sentiment_val, label_sentiment_train, label_sentiment_val, epochs=70):

  # polarity 

  input = Input(shape=(max_len,))
  embed = Embedding(input_dim=vocab_sentiment_size[aspect],
                  output_dim=embedding_dim,
                  embeddings_initializer=Constant(embedding_sentiment_matrix[aspect]),
                  input_length=max_len,
                  trainable=True)(input)
  
  dropout1 = SpatialDropout1D(0.2)(embed)

  lstm = Bidirectional(LSTM(units = 200, activation = 'tanh', return_sequences = True))(dropout1)
  conv_lstm = Conv1D(128, kernel_size = 2, padding = "valid", kernel_initializer = "he_uniform")(lstm)

  gru = Bidirectional(GRU(units = 200, activation = 'tanh', return_sequences = True))(dropout1)
  conv_gru = Conv1D(128, kernel_size = 2, padding = "valid", kernel_initializer = "he_uniform")(gru)


  avg_pool1 = GlobalAveragePooling1D()(conv_lstm)
  max_pool1 = GlobalMaxPooling1D()(conv_lstm)

  avg_pool2 = GlobalAveragePooling1D()(conv_gru)
  max_pool2 = GlobalMaxPooling1D()(conv_gru)
      
      
  concat = Concatenate(axis=-1)([avg_pool1, max_pool1, avg_pool2, max_pool2])

  sentiment_dense1 = Dense(units = 256, activation = 'relu')(concat)
  sentiment_dense2 = Dense(128, activation='relu')(sentiment_dense1)
  sentiment_dropout1 = Dropout(0.2)(sentiment_dense2)
  sentiment_dense3 = Dense(64, activation='relu')(sentiment_dropout1)
  sentiment_dense4 = Dense(32, activation='relu')(sentiment_dense3)
  out_sentiment = Dense(units = 3, activation = 'softmax')(sentiment_dense4)

  sentiment_model[aspect] = tf.keras.Model(inputs = input, outputs = out_sentiment)
  sentiment_model[aspect].compile(optimizer=Adam(learning_rate = 0.0001), loss='binary_crossentropy', metrics=['acc'])
  callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

  # Fit model

  history = sentiment_model[aspect].fit(x=padded_sentiment_train[aspect],
    y=label_sentiment_train[aspect],

    validation_data=(padded_sentiment_val[aspect],
      label_sentiment_val[aspect]),
    batch_size = 128,
    epochs=epochs,
    callbacks = [callback],
    verbose=1)


  return sentiment_model[aspect]

#### execution

In [None]:
max_len=padded_aspect_train.shape[1]
aspect_model = bilstm_bigru_conv_aspect(vocab_aspect_size, embedding_dim, max_len, embedding_aspect_matrix, padded_aspect_train, padded_aspect_val, label_aspect_train, label_aspect_val)

save_path = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_BiGRU_CONV/stl_BiLSTM_BiGRU_CONV_aspect'
aspect_model.save(save_path)

In [None]:
# load model
save_path = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_BiGRU_CONV/stl_BiLSTM_BiGRU_CONV_aspect'
aspect_model = keras.models.load_model(save_path)

In [None]:
# predict
pred_train = aspect_model.predict(padded_aspect_train)
pred_val = aspect_model.predict(padded_aspect_val)
pred_test = aspect_model.predict(padded_aspect_test)

df_train_pred = round(pd.DataFrame(pred_train), 0)
df_train_true_ = label_aspect_train
df_train_pred.columns = df_train_true_.columns

df_val_pred = round(pd.DataFrame(pred_val), 0)
df_val_true_ = label_aspect_val
df_val_pred.columns = df_val_true_.columns

df_test_pred = round(pd.DataFrame(pred_test), 0)
df_test_true_ = label_aspect_test
df_test_pred.columns = df_test_true_.columns

for col in label_aspect_train.columns:
  print(col)
  aspect_result_bilstm_bigru_conv = classification_report(df_test_true_[col], df_test_pred[col])
  print(aspect_result_bilstm_bigru_conv)

In [None]:
save_path  = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_BiGRU_CONV/aspect_bilstm_bigru_conv_report.csv'
output_aspect_report = get_pd_report(label_aspect_train, df_test_true_, df_test_pred)
output_aspect_report.to_csv(save_path)

- Stage 2 : Polarity

In [None]:
lst_aspect = []
for col in label_aspect_train.columns.values:
  if col!='is_others':
    lst_aspect.append(col.split('_')[1])

In [None]:
#tokenize stage 2
tokenized_sentiment_train = {}
tokenized_sentiment_val = {}
tokenized_sentiment_test = {}
tokenizer_sentiment = {}

for aspect in lst_aspect:
  tokenizer_sentiment[aspect], tokenized_sentiment_train[aspect], tokenized_sentiment_val[aspect], tokenized_sentiment_test[aspect] = get_tokenize(cmt_train[aspect], cmt_val[aspect], cmt_test[aspect])

#padding stage 2
padded_sentiment_train = {}
padded_sentiment_val = {}
padded_sentiment_test = {}
for aspect in lst_aspect:
  padded_sentiment_train[aspect], padded_sentiment_val[aspect], padded_sentiment_test[aspect] = get_padded(max_len, tokenized_sentiment_train[aspect], tokenized_sentiment_val[aspect], tokenized_sentiment_test[aspect])

#embedding matrix stage 2
vocab_sentiment_size = {}
embedding_sentiment_matrix = {}
for aspect in lst_aspect:
  vocab_sentiment_size[aspect]  = len(tokenizer_sentiment[aspect].word_index)+1
  embedding_sentiment_matrix[aspect] = get_embedding_matrix(embedding_dim, vocab_sentiment_size[aspect], tokenizer_sentiment[aspect])

In [None]:
# training model
sentiment_model = {}
for aspect in lst_aspect:
  sentiment_model[aspect] = bilstm_bigru_conv_polarity(aspect, vocab_sentiment_size, embedding_dim, max_len, embedding_sentiment_matrix, padded_sentiment_train, padded_sentiment_val, label_sentiment_train, label_sentiment_val)
  sentiment_model[aspect].save('/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_BiGRU_CONV/stl_BiLSTM_BiGRU_CONV_polarity_{}'.format(aspect))

In [None]:
# load model
sentiment_model = {}
for aspect in lst_aspect:
  sentiment_model[aspect] = keras.models.load_model('/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_BiGRU_CONV/stl_BiLSTM_BiGRU_CONV_polarity_{}'.format(aspect))

In [None]:
# predict
df_sentiment_train_pred = {}
df_sentiment_train_true_ = {}

df_sentiment_val_true_ = {}
df_sentiment_val_pred = {}

df_sentiment_test_true_ = {}
df_sentiment_test_pred = {}


# for aspect in lst_aspect:
for aspect in lst_aspect:
  pred_sentiment_train = sentiment_model[aspect].predict(padded_sentiment_train[aspect])
  pred_sentiment_val = sentiment_model[aspect].predict(padded_sentiment_val[aspect])
  pred_sentiment_test = sentiment_model[aspect].predict(padded_sentiment_test[aspect])

  df_sentiment_train_pred[aspect] = np.argmax(pred_sentiment_train, axis=1)
  df_sentiment_train_true_[aspect] = np.argmax(label_sentiment_train[aspect], axis = 1)

  df_sentiment_val_pred[aspect] = np.argmax(pred_sentiment_val, axis=1)
  df_sentiment_val_true_[aspect] = np.argmax(label_sentiment_val[aspect], axis = 1)

  df_sentiment_test_pred[aspect] = np.argmax(pred_sentiment_test, axis=1)
  df_sentiment_test_true_[aspect] = np.argmax(label_sentiment_test[aspect], axis=1)

sentiment_result_bilstm_bigru_conv = {}
# for aspect in lst_aspect:
for aspect in lst_aspect:
    sentiment_result_bilstm_bigru_conv[aspect] = classification_report(df_sentiment_test_true_[aspect].tolist(), df_sentiment_test_pred[aspect].tolist())

In [None]:
save_path = '/content/drive/MyDrive/Final Project - Real Time Aspect-based Sentiment Classification/Notebooks/Dat/BiLSTM_BiGRU_CONV/sentiment_bilstm_bigru_conv_report.csv'
output_sentiment_report = get_pd_report_sentiment(df_sentiment_test_true_, df_sentiment_test_pred)
output_sentiment_report.to_csv(save_path)