In [1]:
from model.doer import Coextractor
from model.feature_extractor import FeatureExtractor
from config import Config
from utils import load_data, prep_train_data, load_lexicon

from sklearn.model_selection import train_test_split

from tensorflow.keras.backend import clear_session
import argparse
import numpy as np
import time

from datetime import timedelta

In [2]:
np.random.seed(42)
clear_session()

train_data = 'dataset/train_4k.txt'
test_data = 'dataset/test_1k.txt'
mpqa_lexicon_data = 'dataset/annotated/mpqa_lexicon.txt'
general_embedding_model = '../word_embedding/general_embedding/general_embedding_300.model'
domain_embedding_model = '../word_embedding/domain_embedding/domain_embedding_100.model'
config = Config()
config.mpqa_lexicon = load_lexicon(mpqa_lexicon_data)

X, y = load_data(train_data)
X_test, y_test = load_data(test_data)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.25, random_state=42)
sentences = X_val

feature_extractor = FeatureExtractor(general_embedding_model, domain_embedding_model, general_dim=config.dim_general, domain_dim=config.dim_domain)

X_train, y_train = prep_train_data(X_train, y_train, feature_extractor, feature='double_embedding', config=config)

X_test = feature_extractor.get_features(X_test, max_len=config.max_sentence_size)
X_val, y_val2 = prep_train_data(X_val, y_val, feature_extractor, feature='double_embedding', config=config)

coextractor = Coextractor(config)
coextractor.load("saved_models/P3_Diff/P3_300_1_0.5_weights")
print(coextractor.model.summary())

coextractor.evaluate(X_val, y_val)

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, None, 400)]  0                                            
__________________________________________________________________________________________________
first_ate_rnn (Bidirectional)   (None, None, 600)    1321800     input[0][0]                      
__________________________________________________________________________________________________
first_asc_rnn (Bidirectional)   (None, None, 600)    1321800     input[0][0]                      
__________________________________________________________________________________________________
first_ate_dropout (Dropout)     (None, None, 600)    0           first_ate_rnn[0][0]              
______________________________________________________________________________________________

[0.9180616525324259, 0.9239847970848517]

In [3]:
sample_sentence = 'ac nya tidak dingin. kamar sempit tapi wajar untuk harga segitu'
sample_sentence2 = 'pelayanan ramah, kamar bersih, kasur empuk. tetapi kamar mandi nya kotor'
print(coextractor.predict_one(sample_sentence, feature_extractor))
print(coextractor.predict_one(sample_sentence2, feature_extractor))

[[('ac nya', 'tidak dingin.', 'NG'), ('kamar', 'sempit', 'NG')]]
[[('pelayanan', 'ramah,', 'PO'), ('kamar', 'bersih,', 'PO'), ('kasur', 'empuk.', 'PO'), ('kamar mandi nya', 'empuk.', 'NG')]]
