In [4]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import json, torch
from typing import Union, List  # Add this import


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def load_model_classify(checkpoint_path):
    # Load tokenizer dari model base
    tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased")

    # Load model dari checkpoint
    model = AutoModelForSequenceClassification.from_pretrained(
        checkpoint_path,
        use_safetensors=True
    )

    # Load label mapping
    with open(f"{checkpoint_path}/config.json") as f:
        config = json.load(f)
        id2label = config.get("id2label", {})

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    return model, tokenizer, id2label, device

In [5]:


def classify_intent(texts: Union[str, List[str]]):
    # Handle single text input
    if isinstance(texts, str):
        texts = [texts]

    # Tokenisasi batch
    inputs = tokenizer(
        texts,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=128,
        return_attention_mask=True
    ).to(device)

    # Prediksi batch
    with torch.no_grad():
        outputs = model(**inputs)

    # Hitung probabilitas
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    confidences, pred_indices = torch.max(probs, dim=1)

    results = []
    for i, text in enumerate(texts):
        predicted_label = id2label.get(str(pred_indices[i].item()), "unknown")
        results.append({
            "text": text,
            "intent": predicted_label,
            "confidence": confidences[i].item(),
            "details": {id2label.get(str(j), str(j)): prob.item()
                      for j, prob in enumerate(probs[i])}
        })

    return results if len(results) > 1 else results[0]


In [11]:
CHECKPOINT_PATH = "models/classification"
model, tokenizer, id2label, device = load_model_classify(CHECKPOINT_PATH)
model.eval()

test_texts = [
"Kuliah sistem operasi s1 ilkom?",
"Seminar Usul bersama Rico Andrian yang dijadwalkan pada tanggal 23 Mei 2025, kapan tepatnya?",
"Seminar salma dara carita kapan ya",
]

results = classify_intent(test_texts)

for idx, result in enumerate(results, 1):
  print(f"\n=== Hasil {idx} ===")
  print(f"Input: {result['text']}")
  print(f"Intent: {result['intent']} ({result['confidence']*100:.2f}%)")
  print("Detail Probabilitas:")
  for label, prob in result['details'].items():
    print(f"  - {label}: {prob*100:.2f}%")



=== Hasil 1 ===
Input: Kuliah sistem operasi s1 ilkom?
Intent: seminar (50.89%)
Detail Probabilitas:
  - kuliah: 49.11%
  - seminar: 50.89%

=== Hasil 2 ===
Input: Seminar Usul bersama Rico Andrian yang dijadwalkan pada tanggal 23 Mei 2025, kapan tepatnya?
Intent: seminar (94.31%)
Detail Probabilitas:
  - kuliah: 5.69%
  - seminar: 94.31%

=== Hasil 3 ===
Input: Seminar salma dara carita kapan ya
Intent: kuliah (59.85%)
Detail Probabilitas:
  - kuliah: 59.85%
  - seminar: 40.15%
