In [None]:
from model.doer import Coextractor
from model.feature_extractor import FeatureExtractor
from config import Config
from utils import load_data, prep_train_data, load_lexicon

from sklearn.model_selection import train_test_split

from tensorflow.keras.backend import clear_session
import argparse
import numpy as np
import time

from datetime import timedelta
import csv

In [None]:
np.random.seed(42)
clear_session()

train_data = 'dataset/train_4k.txt'
test_data = 'dataset/test_1k.txt'
mpqa_lexicon_data = 'dataset/annotated/mpqa_lexicon.txt'
general_embedding_model = '../word_embedding/general_embedding/general_embedding_300.model'
domain_embedding_model = '../word_embedding/domain_embedding/domain_embedding_100.model'
config = Config()
config.mpqa_lexicon = load_lexicon(mpqa_lexicon_data)

X, y = load_data(train_data)
X_test, y_test = load_data(test_data)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.25, random_state=42)
sentences = X_val

feature_extractor = FeatureExtractor(general_embedding_model, domain_embedding_model, general_dim=config.dim_general, domain_dim=config.dim_domain)

X_train, y_train = prep_train_data(X_train, y_train, feature_extractor, feature='double_embedding', config=config)

X_test = feature_extractor.get_features(X_test, max_len=config.max_sentence_size)
X_val, y_val2 = prep_train_data(X_val, y_val, feature_extractor, feature='double_embedding', config=config)

coextractor = Coextractor(config)
coextractor.load("saved_models/P3_Diff/P3_300_1_0.5_weights")
print(coextractor.model.summary())

coextractor.evaluate(X_val, y_val)

In [None]:
sample_sentence = 'ac nya tidak dingin. kamar sempit tapi wajar untuk harga segitu'
sample_sentence2 = 'pelayanan ramah, kamar bersih, kasur empuk. tetapi kamar mandi nya kotor'
print(coextractor.predict_one(sample_sentence, feature_extractor))
print(coextractor.predict_one(sample_sentence2, feature_extractor))

In [None]:
def load_data(filename):
    data = []
    with open(filename, encoding='utf-8') as f:
        for line in f:
            line = line.rstrip()
            if line:
                data.append(line)

    return data


def write_data_and_predictions(data, predictions):
    with open('output_pred.tsv', 'wt') as out_file:
        tsv_writer = csv.writer(out_file, delimiter='\t')
        for sentence, prediction in zip(data, predictions):
            tsv_writer.writerow([sentence, prediction])    

In [None]:
data = load_data('reviews_sample.txt')
predictions = []
for sentence in data:
    predictions.append(coextractor.predict_one(sentence, feature_extractor))
    
write_data_and_predictions(data, predictions)