In [10]:
import pandas as pd
import numpy as np
import matplotlib as plt
import json
import jsonlines

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, LSTM, Embedding, Input, Bidirectional, Conv1D, MaxPool1D
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.losses import mse
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.optimizers import Adam

In [11]:
result = pd.read_pickle('Dataset/Cleanded.pkl')

In [12]:
tv        = result['TV'].tolist()
fintech   = result['FinTech'].tolist()
iot       = result['IoT'].tolist()
insurance = result['Insurance'].tolist()
mbb       = result['MBB'].tolist()
fbb       = result['FBB'].tolist()
e_sport   = result['E-Sport'].tolist()
other     = result['Other'].tolist()

In [13]:
import fasttext

# load saved into python
model = fasttext.load_model('cc.th.300.bin')

In [14]:
def text_reshaper(text_list, reshaped_text_list, MAX_LENGTH):
  for i in range(text_list.shape[0]-1):
     if len(text_list[i]) >= MAX_LENGTH:
         reshaped_text_list[i, :] = text_list[i][0:MAX_LENGTH]
     else:
         reshaped_text_list[i, 0:len(text_list[i])] = text_list[i]
         reshaped_text_list[i, len(text_list[i]):] = " "
         
  return reshaped_text_list

In [15]:
lines_of_file = []
lines = []
with open('Dataset/tokenize.txt', 'r', encoding="utf-8") as f:
  lines_of_file = f.readlines()
  for oneline in lines_of_file:
    lines.append(oneline.split(" "))

In [16]:
# Clear the space element out of token list
for item in lines:
  for element in item:
    if(element == ''):
      item.remove(element)

In [17]:
# RESHAPE THE DATA 
MAX_LENGTH = 300

y_train = np.array(mbb[:1362])
y_test = np.array(mbb[1362: 19450])

texts_token_list_reshaped = np.zeros([1945, MAX_LENGTH], dtype=np.object)
texts_token_list = np.array(lines, dtype=np.object)
texts_token_list_reshaped = text_reshaper(texts_token_list, texts_token_list_reshaped, MAX_LENGTH)

In [18]:
input = Input((MAX_LENGTH,model.get_dimension()))

x = Conv1D(32,3,padding='same',activation='relu')(input)
x = MaxPool1D()(x)
x = Bidirectional(LSTM(128, return_sequences=True))(x)
x = Bidirectional(LSTM(64, return_sequences=False))(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='relu')(x)

out = Dense(2, activation='softmax')(x)

In [19]:
wn_sentiment_model = Model(input, out)
wn_sentiment_model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 300, 300)]        0         
_________________________________________________________________
conv1d (Conv1D)              (None, 300, 32)           28832     
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 150, 32)           0         
_________________________________________________________________
bidirectional (Bidirectional (None, 150, 256)          164864    
_________________________________________________________________
bidirectional_1 (Bidirection (None, 128)               164352    
_________________________________________________________________
dense (Dense)                (None, 64)                8256      
_________________________________________________________________
dropout (Dropout)            (None, 64)               

In [20]:
# Create embeded input vector (takes a while)
fasttext_vec = np.zeros((1945, MAX_LENGTH, 300), dtype=np.float_)

for text in range(fasttext_vec.shape[0]):
  for word in range(fasttext_vec.shape[1]):
      fasttext_vec[text][word] = model.get_word_vector(texts_token_list_reshaped[text][word])


TypeError: getWordVector(): incompatible function arguments. The following argument types are supported:
    1. (self: fasttext_pybind.fasttext, arg0: fasttext_pybind.Vector, arg1: str) -> None

Invoked with: <fasttext_pybind.fasttext object at 0x0000019B408B98B0>, <fasttext_pybind.Vector object at 0x0000019D0C731F30>, 0

In [21]:
x_train = fasttext_vec[:1362]
x_test = fasttext_vec[1362: 1945]

y_train_tv = tv[:1362]
y_test_tv = tv[1362: 1945]
y_train_fintech = fintech[:1362]
y_test_fintech = fintech[1362: 1945]
y_train_iot = iot[:1362]
y_test_iot = iot[1362: 1945]
y_train_insurance = insurance[:1362]
y_test_insurance = insurance[1362: 1945]
y_train_mbb = mbb[:1362]
y_test_mbb = mbb[1362: 1945]
y_train_fbb = fbb[:1362]
y_test_fbb = fbb[1362: 1945]
y_train_e_sport = e_sport[:1362]
y_test_e_sport = e_sport[1362: 1945]
y_train_other = other[:1362]
y_test_other = other[1362: 1945]

In [22]:
x_train = np.array(x_train)
x_test = np.array(x_test)

y_train_tv = np.array(y_train_tv)
y_test_tv = np.array(y_test_tv)
y_train_fintech = np.array(y_train_fintech)
y_test_fintech = np.array(y_test_fintech)
y_train_iot = np.array(y_train_iot)
y_test_iot = np.array(y_test_iot)
y_train_insurance = np.array(y_train_insurance)
y_test_insurance = np.array(y_test_insurance)
y_train_mbb = np.array(y_train_mbb)
y_test_mbb = np.array(y_test_mbb)
y_train_fbb = np.array(y_train_fbb)
y_test_fbb = np.array(y_test_fbb)
y_train_e_sport = np.array(y_train_e_sport)
y_test_e_sport = np.array(y_test_e_sport)
y_train_other = np.array(y_train_other)
y_test_other = np.array(y_test_other)

In [19]:
wn_sentiment_model.compile(
    optimizer=RMSprop(learning_rate=0.0005, momentum=0.9),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'])

In [20]:
model_checkpoint_callback_tv = tf.keras.callbacks.ModelCheckpoint(
    filepath="./checkpoint_tv",
    save_best_only=True)

In [21]:
model_tv = wn_sentiment_model.fit(x_train, y_train_tv, validation_data=(x_test,y_test_tv), epochs=10, callbacks=model_checkpoint_callback_tv)

Epoch 1/10
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: .\checkpoint_tv\assets
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [22]:
model_tv = keras.models.load_model("./checkpoint_tv")

In [23]:
model_tv.evaluate(x_test,  y_test_tv, verbose=2)

19/19 - 0s - loss: 0.2038 - accuracy: 0.9520


[0.20377884805202484, 0.9519725441932678]

In [24]:
# Fintech

model_checkpoint_callback_fintech = tf.keras.callbacks.ModelCheckpoint(
    filepath="./checkpoint_fintech",
    save_best_only=True)

In [25]:
wn_sentiment_model.fit(x_train, 
                      y_train_fintech, 
                      validation_data=(x_test,y_test_fintech), 
                      epochs=10, 
                      callbacks=model_checkpoint_callback_fintech)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1e29d4cc208>

In [26]:
model_fintech = keras.models.load_model("./checkpoint_fintech")

model_fintech.evaluate(x_test,  y_test_fintech, verbose=2)

19/19 - 0s - loss: 0.2752 - accuracy: 1.0000


[0.2752346992492676, 1.0]

In [27]:
# IoT

model_checkpoint_callback_iot = tf.keras.callbacks.ModelCheckpoint(
                                                filepath="./checkpoint_iot",
                                                save_best_only=True)

In [28]:
# IoT

wn_sentiment_model.fit(x_train, 
                      y_train_iot, 
                      validation_data=(x_test,y_test_iot), 
                      epochs=10, 
                      callbacks=model_checkpoint_callback_iot)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1e22c2d7248>

In [29]:
# IoT

model_iot = keras.models.load_model("./checkpoint_iot")

model_iot.evaluate(x_test,  y_test_iot, verbose=2)

19/19 - 1s - loss: 0.4475 - accuracy: 1.0000


[0.4474872648715973, 1.0]

In [30]:
# Insurance

model_checkpoint_callback_insurance = tf.keras.callbacks.ModelCheckpoint(
                                                filepath="./checkpoint_insurance",
                                                save_best_only=True)

In [31]:
# Insurance

wn_sentiment_model.fit(x_train, 
                      y_train_insurance, 
                      validation_data=(x_test,y_test_insurance), 
                      epochs=10, 
                      callbacks=model_checkpoint_callback_insurance)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1e23047d5c8>

In [32]:
# Insurance

model_insurance = keras.models.load_model("./checkpoint_insurance")

model_insurance.evaluate(x_test,  y_test_insurance, verbose=2)

19/19 - 0s - loss: 0.1655 - accuracy: 1.0000


[0.1655135154724121, 1.0]

In [33]:
# MBB

model_checkpoint_callback_mbb = tf.keras.callbacks.ModelCheckpoint(
                                                filepath="./checkpoint_mbb",
                                                save_best_only=True)

In [34]:
# MBB

wn_sentiment_model.fit(x_train, 
                      y_train_mbb, 
                      validation_data=(x_test,y_test_mbb), 
                      epochs=10, 
                      callbacks=model_checkpoint_callback_mbb)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1e301c75988>

In [35]:
# MBB

model_mbb = keras.models.load_model("./checkpoint_mbb")

model_mbb.evaluate(x_test,  y_test_mbb, verbose=2)

19/19 - 0s - loss: 0.5695 - accuracy: 0.5540


[0.5694996118545532, 0.5540308952331543]

In [36]:
# FBB

model_checkpoint_callback_fbb = tf.keras.callbacks.ModelCheckpoint(
                                                filepath="./checkpoint_fbb",
                                                save_best_only=True)

In [37]:
# FBB

wn_sentiment_model.fit(x_train, 
                      y_train_fbb, 
                      validation_data=(x_test,y_test_fbb), 
                      epochs=10, 
                      callbacks=model_checkpoint_callback_fbb)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1e2fd85a7c8>

In [38]:
# FBB

model_fbb = keras.models.load_model("./checkpoint_fbb")

model_fbb.evaluate(x_test,  y_test_fbb, verbose=2)

19/19 - 0s - loss: 0.4468 - accuracy: 1.0000


[0.4467894732952118, 1.0]

In [39]:
# E-sport

model_checkpoint_callback_e_sport = tf.keras.callbacks.ModelCheckpoint(
                                                filepath="./checkpoint_e_sport",
                                                save_best_only=True)

In [40]:
# E-sport

wn_sentiment_model.fit(x_train, 
                      y_train_e_sport, 
                      validation_data=(x_test,y_test_e_sport), 
                      epochs=10, 
                      callbacks=model_checkpoint_callback_e_sport)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1e22da612c8>

In [41]:
# E-sport

model_e_sport = keras.models.load_model("./checkpoint_e_sport")

model_e_sport.evaluate(x_test,  y_test_e_sport, verbose=2)

19/19 - 0s - loss: 0.0685 - accuracy: 1.0000


[0.06852228939533234, 1.0]

In [42]:
# Other

model_checkpoint_callback_other = tf.keras.callbacks.ModelCheckpoint(
                                                filepath="./checkpoint_other",
                                                save_best_only=True)

In [43]:
# Other

wn_sentiment_model.fit(x_train, 
                      y_train_other, 
                      validation_data=(x_test,y_test_other), 
                      epochs=10, 
                      callbacks=model_checkpoint_callback_other)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1e22e1c9508>

In [44]:
# Other

model_other = keras.models.load_model("./checkpoint_other")

model_other.evaluate(x_test,  y_test_other, verbose=2)

19/19 - 1s - loss: 0.5784 - accuracy: 0.5352


[0.5783796906471252, 0.5351629257202148]

# Model f1-score

In [None]:
x_train = np.array(x_train)
x_test = np.array(x_test)

y_train_tv = np.array(y_train_tv)
y_test_tv = np.array(y_test_tv)
y_train_fintech = np.array(y_train_fintech)
y_test_fintech = np.array(y_test_fintech)
y_train_iot = np.array(y_train_iot)
y_test_iot = np.array(y_test_iot)
y_train_insurance = np.array(y_train_insurance)
y_test_insurance = np.array(y_test_insurance)
y_train_mbb = np.array(y_train_mbb)
y_test_mbb = np.array(y_test_mbb)
y_train_fbb = np.array(y_train_fbb)
y_test_fbb = np.array(y_test_fbb)
y_train_e_sport = np.array(y_train_e_sport)
y_test_e_sport = np.array(y_test_e_sport)
y_train_other = np.array(y_train_other)
y_test_other = np.array(y_test_other)

In [4]:
model_tv = keras.models.load_model("./checkpoint_tv")
model_fintech = keras.models.load_model("./checkpoint_fintech")
model_iot = keras.models.load_model("./checkpoint_iot")
model_insurance = keras.models.load_model("./checkpoint_insurance")
model_mbb = keras.models.load_model("./checkpoint_mbb")
model_fbb = keras.models.load_model("./checkpoint_fbb")
model_e_sport = keras.models.load_model("./checkpoint_e_sport")
model_other = keras.models.load_model("./checkpoint_other")

In [5]:
from sklearn.metrics import f1_score
from sklearn.metrics import average_precision_score

TV model

In [23]:
pred = np.argmax(model_tv.predict(x_test), axis=1)  # from log probabilities to 0 or 1
print("f1-score :",f1_score(y_test_tv, pred, average='macro'))
print("precision :",average_precision_score(y_test_tv, pred))

f1-score : 0.6925461444465881
precision : 0.2349045206658195


Fintech model

In [24]:
pred = np.argmax(model_fintech.predict(x_test), axis=1)  # from log probabilities to 0 or 1
print("f1-score :",f1_score(y_test_fintech, pred, average='macro'))
print("precision :",average_precision_score(y_test_fintech, pred))

f1-score : 0.47899910634495085
precision : 0.08061749571183534


IoT model

In [25]:
pred = np.argmax(model_iot.predict(x_test), axis=1)  # from log probabilities to 0 or 1
print("f1-score :",f1_score(y_test_iot, pred, average='macro'))
print("precision :",average_precision_score(y_test_iot, pred))

f1-score : 0.45155221072436497
precision : 0.17667238421955403


Insurance Model

In [26]:
pred = np.argmax(model_insurance.predict(x_test), axis=1)  # from log probabilities to 0 or 1
print("f1-score :",f1_score(y_test_insurance, pred, average='macro'))
print("precision :",average_precision_score(y_test_insurance, pred))

f1-score : 0.4908296943231441
precision : 0.036020583190394515


MBB Model

In [27]:
pred = np.argmax(model_mbb.predict(x_test), axis=1)  # from log probabilities to 0 or 1
print("f1-score :",f1_score(y_test_mbb, pred, average='macro'))
print("precision :",average_precision_score(y_test_mbb, pred))

f1-score : 0.6877766007532957
precision : 0.7011584641773321


FBB Model

In [31]:
pred = np.argmax(model_other.predict(x_test), axis=1)  # from log probabilities to 0 or 1
print("f1-score :",f1_score(y_test_other, pred, average='macro'))
print("precision :",average_precision_score(y_test_other, pred))

f1-score : 0.6586821126058794
precision : 0.4297931101830008


E-sport Model

In [30]:
pred = np.argmax(model_e_sport.predict(x_test), axis=1)  # from log probabilities to 0 or 1
print("f1-score :",f1_score(y_test_e_sport, pred, average='macro'))
print("precision :",average_precision_score(y_test_e_sport, pred))

f1-score : 0.49611063094209157
precision : 0.015437392795883362


Others model

In [82]:
pred = np.argmax(model_other.predict(x_test), axis=1)  # from log probabilities to 0 or 1
print("f1-score :",f1_score(y_test_other, pred, average='macro'))
print("precision :",average_precision_score(y_test_other, pred))

f1-score : 0.6586821126058794
precision : 0.4297931101830008
