In [4]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
import torch

# Muat model bahasa dan model NER untuk bahasa Indonesia
model_name = "indobenchmark/indobert-base-p1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# Teks yang akan dianalisis
text = "Pulau Bali terletak di Indonesia. Presiden Joko Widodo akan berkunjung ke Bali besok."

# Tokenisasi teks
tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(text)))

# Inferensi dengan model NER
input_ids = tokenizer.encode(text, return_tensors="pt")
with torch.no_grad():
    outputs = model(input_ids)

# Ambil label entitas
predicted_labels = torch.argmax(outputs.logits, dim=2)[0].tolist()

# Daftar label NER yang sesuai dengan model yang digunakan
label_list = ["O", "B-LOC", "I-LOC", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-MISC", "I-MISC"]

# Cetak hasil entitas yang ditemukan
entities = []
current_entity = {"text": "", "label": ""}
for token, label_id in zip(tokens, predicted_labels):
    label = label_list[label_id]
    if label.startswith("B-"):
        if current_entity["text"]:
            entities.append(current_entity)
        current_entity = {"text": token, "label": label[2:]}
    elif label.startswith("I-"):
        current_entity["text"] += " " + token

# Cek entitas terakhir
if current_entity["text"]:
    entities.append(current_entity)

# Cetak hasil entitas yang ditemukan beserta labelnya
for entity in entities:
    print(f"Entitas: {entity['text']}, Label: {entity['label']}")

# Contoh output:
# Entitas: Bali, Label: LOC
# Entitas: Indonesia, Label: LOC
# Entitas: Joko Widodo, Label: PER
# Entitas: Bali, Label: LOC


Downloading (…)okenizer_config.json: 100%|██████████| 2.00/2.00 [00:00<?, ?B/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Downloading (…)lve/main/config.json: 100%|██████████| 1.53k/1.53k [00:00<?, ?B/s]
Downloading (…)solve/main/vocab.txt: 100%|██████████| 229k/229k [00:00<00:00, 382kB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 112/112 [00:00<00:00, 112kB/s]


ImportError: 
AutoModelForTokenClassification requires the PyTorch library but it was not found in your environment.
However, we were able to find a TensorFlow installation. TensorFlow classes begin
with "TF", but are otherwise identically named to our PyTorch classes. This
means that the TF equivalent of the class you tried to import would be "TFAutoModelForTokenClassification".
If you want to use TensorFlow, please use TF classes instead!

If you really do want to use PyTorch please go to
https://pytorch.org/get-started/locally/ and follow the instructions that
match your environment.


In [7]:
import tensorflow as tf
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification

# Muat model bahasa dan model klasifikasi sentimen untuk bahasa Indonesia
model_name = "indobenchmark/indobert-base-p1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSequenceClassification.from_pretrained(model_name)

# Teks yang akan dianalisis
texts = [
    "Ini adalah produk yang sangat bagus. Saya sangat puas.",
    "Produk ini sangat buruk. Saya tidak puas sama sekali.",
    "Produknya biasa saja, tidak terlalu bagus dan tidak terlalu buruk."
]

# Tokenisasi dan klasifikasi sentimen
sentiments = []
for text in texts:
    input_ids = tokenizer.encode(text, return_tensors="tf")
    outputs = model(input_ids)

    predicted_class = tf.argmax(outputs.logits, axis=1).numpy()[0]
    if predicted_class == 0:
        \sentiment = "Negatif"
    elif predicted_class == 1:
        sentiment = "Netral"
    else:
        sentiment = "Positif"
    sentiments.append(sentiment)

# Cetak hasil sentimen
for i, text in enumerate(texts):
    print(f"Teks: {text}")
    print(f"Sentimen: {sentiments[i]}")
    print()


Downloading tf_model.h5: 100%|██████████| 656M/656M [02:50<00:00, 3.85MB/s] 
All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Teks: Ini adalah produk yang sangat bagus. Saya sangat puas.
Sentimen: Positif

Teks: Produk ini sangat buruk. Saya tidak puas sama sekali.
Sentimen: Positif

Teks: Produknya biasa saja, tidak terlalu bagus dan tidak terlalu buruk.
Sentimen: Positif



In [6]:
import torch
x = torch.rand(5, 3)
print(x)

tensor([[0.4490, 0.9854, 0.5977],
        [0.3534, 0.8899, 0.1698],
        [0.2985, 0.7845, 0.9545],
        [0.0732, 0.1769, 0.6122],
        [0.5387, 0.2050, 0.5387]])
