In [29]:
import json
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import json

In [None]:
SAVED_MODEL_NER = "../model/NER/deberta_ner_saved"
SAVED_MODEL_ABSA = "../model/absa/deberta_absa_saved"
MAX_LENGTH = 256

In [13]:
ner_pipeline = pipeline(
    "token-classification",
    model=SAVED_MODEL_NER,
    tokenizer=SAVED_MODEL_NER,
    aggregation_strategy="simple"
)

Device set to use cpu


In [14]:
absa_tokenizer = AutoTokenizer.from_pretrained(SAVED_MODEL_ABSA)
absa_model = AutoModelForSequenceClassification.from_pretrained(SAVED_MODEL_ABSA)
absa_model.eval()


DebertaV2ForSequenceClassification(
  (deberta): DebertaV2Model(
    (embeddings): DebertaV2Embeddings(
      (word_embeddings): Embedding(128100, 768, padding_idx=0)
      (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): DebertaV2Encoder(
      (layer): ModuleList(
        (0-11): 12 x DebertaV2Layer(
          (attention): DebertaV2Attention(
            (self): DisentangledSelfAttention(
              (query_proj): Linear(in_features=768, out_features=768, bias=True)
              (key_proj): Linear(in_features=768, out_features=768, bias=True)
              (value_proj): Linear(in_features=768, out_features=768, bias=True)
              (pos_dropout): Dropout(p=0.1, inplace=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): DebertaV2SelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): Layer

In [None]:
def extract_actors(ner_results):
    """Ambil entitas dengan label ACTOR dari hasil NER."""
    actors = [ent["word"] for ent in ner_results if ent["entity_group"].upper() == "ACTOR"]
    return list(set(actors))  # remove duplicates


def predict_absa(text, aspect):
    """Prediksi sentimen untuk (text, aspect)."""
    encoded = absa_tokenizer(
        aspect,
        text,
        truncation=True,
        padding="max_length",
        max_length=MAX_LENGTH,
        return_tensors="pt"
    )

    with torch.no_grad():
        output = absa_model(**encoded)
        pred_id = torch.argmax(output.logits, dim=-1).item()

    return absa_model.config.id2label[pred_id]


def run_ner_absa(text):
    """Proses lengkap: NER → ambil aktor → ABSA per aktor."""
    ner_results = ner_pipeline(text)
    actors = extract_actors(ner_results)

    absa_outputs = [
        {
            "actor": actor,
            "sentiment": predict_absa(text, actor)
        }
        for actor in actors
    ]

    return {
        "text": text,
        "actors": absa_outputs
    }

In [30]:
def load_test_data(text_file):
    """Load test_text.json dalam format seperti contohmu."""
    with open(text_file, "r", encoding="utf-8") as f:
        data = json.load(f)

    return {item["id"]: item["text"] for item in data["reviews"]}

In [31]:
if __name__ == "__main__":
    TEST_TEXT_PATH = "../data/test_text.json"

    test_texts = load_test_data(TEST_TEXT_PATH)

    print("===== Combined NER + ABSA (Test Set) =====\n")

    for review_id, text in test_texts.items():
        result = run_ner_absa(text)
        print(f"ID: {review_id}")
        print(result)
        print("-" * 60)

===== Combined NER + ABSA (Test Set) =====

ID: pos_601
{'text': 'I was surprised at the low rating this film got from viewers. I saw it one late night on TV and it hit the spot - I actually think it was back in 1989 when it first appeared. Yet I remember it pretty well, with a nice twist or two, and an interesting ambiance on a windmill farm. Michael Pollard looks suitably seedy for his role which pretty much sums up the unfulfilled early promise of his career, and everyone else plays it pretty straight ahead. I definitely recommend it as a rental, although some of the themes, which might have seemed a bit edgy in 1989, now may seem tame, which is a shame, considering that contemporary "edginess" is often just used as a necessary marketing tool, sort of like clamoring just to get noticed.', 'actors': [{'actor': 'Michael Pollard', 'sentiment': 'positive'}]}
------------------------------------------------------------
ID: pos_602
{'text': "I have never danced flamenco before, but someho

KeyboardInterrupt: 