In [1]:
from model.bert import bert_ATE, bert_ABSA
from transformers import BertTokenizer
import torch
import pandas as pd
import numpy as np

In [2]:
DEVICE = torch.device("mps" if torch.has_mps else "cpu")
pretrain_model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(pretrain_model_name)
lr = 2e-5
model_ATE = bert_ATE(pretrain_model_name).to(DEVICE)
optimizer_ATE = torch.optim.Adam(model_ATE.parameters(), lr=lr)
model_ABSA = bert_ABSA(pretrain_model_name).to(DEVICE)
optimizer_ABSA = torch.optim.Adam(model_ABSA.parameters(), lr=lr)

In [3]:
def load_model(model, path):
    model.load_state_dict(torch.load(path), strict=False)
    return model

In [4]:
model_ATE = load_model(model_ATE, 'bert_ATE.pkl')
model_ABSA = load_model(model_ABSA, 'bert_ABSA.pkl')

In [5]:
polarities = {0:"Negative", 1:"Neutral", 2:"Positive"}

In [7]:
def predict_model_ATE(sentence, tokenizer):
    word_pieces = []
    tokens = tokenizer.tokenize(sentence)
    word_pieces += tokens

    ids = tokenizer.convert_tokens_to_ids(word_pieces)
    input_tensor = torch.tensor([ids]).to(DEVICE)

    with torch.no_grad():
        outputs = model_ATE(input_tensor, None, None)
        _, predictions = torch.max(outputs, dim=2)
    predictions = predictions[0].tolist()

    return word_pieces, predictions, outputs

In [8]:
def predict_model_ABSA(sentence, aspect, tokenizer):
    t1 = tokenizer.tokenize(sentence)
    t2 = tokenizer.tokenize(aspect)

    word_pieces = ['[cls]']
    word_pieces += t1
    word_pieces += ['[sep]']
    word_pieces += t2

    segment_tensor = [0] + [0]*len(t1) + [0] + [1]*len(t2)

    ids = tokenizer.convert_tokens_to_ids(word_pieces)
    input_tensor = torch.tensor([ids]).to(DEVICE)
    segment_tensor = torch.tensor(segment_tensor).to(DEVICE)

    with torch.no_grad():
        outputs = model_ABSA(input_tensor, None, None, segments_tensors=segment_tensor)
        outputs = torch.softmax(outputs, dim=1)
        _,predictions = torch.max(outputs,dim=1)
    
    return word_pieces, predictions, outputs

In [16]:
def ATE_ABSA(text):
    terms = []
    word = ""
    x, y, z = predict_model_ATE(text, tokenizer)
    for i in range(len(y)):
        if y[i] == 1:
            if len(word) != 0:
                terms.append(word.replace(" ##",""))
            word = x[i]
        if y[i] == 2:
            word += (" " + x[i])
            
    
    if len(word) != 0:
            terms.append(word.replace(" ##",""))
            
    print("Aspects:", terms)
    
    if len(terms) != 0:
        for i in terms:
            _, c, p = predict_model_ABSA(text, i, tokenizer)
            print("Term:", i, ", Class:", polarities[int(c)], ", Probability:", round(float(p[0][int(c)]),3))


In [17]:
text = "For the price you pay, this product is very good. However, battery life is a little lack-luster coming from a MacBook Pro."
ATE_ABSA(text)

Aspects: ['price', 'battery life']
Term: price , Class: Positive , Probability: 0.914
Term: battery life , Class: Negative , Probability: 0.999


In [36]:
text = "Speakers are great but screen colors are dull."
ATE_ABSA(text)

Aspects: ['speakers', 'screen colors']
Term: ['speakers'] , Class: Positive , Probability: 0.999
Term: ['screen colors'] , Class: Negative , Probability: 1.0
