In [10]:
import os
import re
import pandas as pd
import numpy as np
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from sklearn.metrics import accuracy_score, f1_score, precision_recall_fscore_support

# Setup paths
BASE_PATH = r'C:\Users\muham\Project\nlp-ki'
MODEL_SAVE_PATH = os.path.join(BASE_PATH, 'saved_model_id')

print(f"Base Path: {BASE_PATH}")
print(f"Model Save Path: {MODEL_SAVE_PATH}")
print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")

Base Path: C:\Users\muham\Project\nlp-ki
Model Save Path: C:\Users\muham\Project\nlp-ki\saved_model_id
PyTorch Version: 2.7.1+cu118
CUDA Available: False


In [12]:
# Load Indonesian sentiment dataset langsung dari GitHub (bypass HF dataset script)
print("Loading Indonesian SMSA sentiment dataset from GitHub...")

# URL dataset dari IndoNLU GitHub repository
train_url = "https://raw.githubusercontent.com/IndoNLP/indonlu/master/dataset/smsa_doc-sentiment-prosa/train_preprocess.tsv"
valid_url = "https://raw.githubusercontent.com/IndoNLP/indonlu/master/dataset/smsa_doc-sentiment-prosa/valid_preprocess.tsv"
test_url = "https://raw.githubusercontent.com/IndoNLP/indonlu/master/dataset/smsa_doc-sentiment-prosa/test_preprocess.tsv"

# Load dari TSV files (no header in file)
train_df = pd.read_csv(train_url, sep='\t', header=None, names=['text', 'label'])
valid_df = pd.read_csv(valid_url, sep='\t', header=None, names=['text', 'label'])
test_df = pd.read_csv(test_url, sep='\t', header=None, names=['text', 'label'])

print(f"\n‚úÖ Dataset loaded successfully!")
print(f"Train samples: {len(train_df)}")
print(f"Validation samples: {len(valid_df)}")
print(f"Test samples: {len(test_df)}")

# Lihat struktur data
print(f"\nColumns: {train_df.columns.tolist()}")
print(f"\nFirst few rows:")
print(train_df.head())

# Cek label distribution
print(f"\nLabel distribution in training set:")
print(train_df['label'].value_counts())

# Map string labels ke numeric
label_map = {'positive': 2, 'neutral': 1, 'negative': 0}
print("\n‚ö†Ô∏è Converting string labels to numeric (positive=2, neutral=1, negative=0)")
train_df['label'] = train_df['label'].map(label_map)
valid_df['label'] = valid_df['label'].map(label_map)
test_df['label'] = test_df['label'].map(label_map)
print(f"\nNumeric label distribution:")
print(train_df['label'].value_counts().sort_index())

# Konversi ke HuggingFace Dataset format
from datasets import Dataset, DatasetDict

dataset = DatasetDict({
    'train': Dataset.from_pandas(train_df),
    'validation': Dataset.from_pandas(valid_df),
    'test': Dataset.from_pandas(test_df)
})

print(f"\n‚úÖ Dataset converted to HuggingFace format!")
print(f"Dataset structure: {dataset}")

Loading Indonesian SMSA sentiment dataset from GitHub...

‚úÖ Dataset loaded successfully!
Train samples: 11000
Validation samples: 1260
Test samples: 500

Columns: ['text', 'label']

First few rows:
                                                text     label
0  warung ini dimiliki oleh pengusaha pabrik tahu...  positive
1  mohon ulama lurus dan k212 mmbri hujjah partai...   neutral
2  lokasi strategis di jalan sumatera bandung . t...  positive
3  betapa bahagia nya diri ini saat unboxing pake...  positive
4  duh . jadi mahasiswa jangan sombong dong . kas...  negative

Label distribution in training set:
label
positive    6416
negative    3436
neutral     1148
Name: count, dtype: int64

‚ö†Ô∏è Converting string labels to numeric (positive=2, neutral=1, negative=0)

Numeric label distribution:
label
0    3436
1    1148
2    6416
Name: count, dtype: int64

‚úÖ Dataset converted to HuggingFace format!
Dataset structure: DatasetDict({
    train: Dataset({
        features: ['text', 'lab

In [13]:
# Dictionary untuk normalisasi kata slang/informal Bahasa Indonesia
slang_dict = {
    'gak': 'tidak',
    'ga': 'tidak',
    'gk': 'tidak',
    'ngga': 'tidak',
    'nggak': 'tidak',
    'tdk': 'tidak',
    'yg': 'yang',
    'dgn': 'dengan',
    'utk': 'untuk',
    'dg': 'dengan',
    'krn': 'karena',
    'bgt': 'banget',
    'bgt': 'banget',
    'bgt': 'banget',
    'bgd': 'banget',
    'bngt': 'banget',
    'bener': 'benar',
    'bner': 'benar',
    'kalo': 'kalau',
    'klo': 'kalau',
    'udah': 'sudah',
    'udh': 'sudah',
    'dah': 'sudah',
    'emang': 'memang',
    'emg': 'memang',
    'hrs': 'harus',
    'gue': 'saya',
    'gw': 'saya',
    'ane': 'saya',
    'aku': 'saya',
    'lu': 'kamu',
    'lo': 'kamu',
    'ente': 'kamu',
    'jd': 'jadi',
    'jdi': 'jadi',
    'kyk': 'seperti',
    'kayak': 'seperti',
    'gmn': 'bagaimana',
    'gimana': 'bagaimana',
    'gmana': 'bagaimana',
    'knp': 'kenapa',
    'knapa': 'kenapa',
    'td': 'tadi',
    'skrg': 'sekarang',
    'skrng': 'sekarang',
    'ajah': 'saja',
    'aja': 'saja',
    'aj': 'saja',
    'gitu': 'begitu',
    'gt': 'begitu',
    'bkin': 'bikin',
    'bikin': 'membuat',
    'sampe': 'sampai',
    'smp': 'sampai',
    'tp': 'tetapi',
    'tapi': 'tetapi',
    'org': 'orang',
    'orng': 'orang',
    'blm': 'belum',
    'blom': 'belum',
    'kpn': 'kapan',
    'kapan': 'kapan',
    'pgn': 'ingin',
    'pgen': 'ingin',
    'pengen': 'ingin',
    'mo': 'mau',
    'mau': 'ingin',
    'ada': 'ada',
    'adanya': 'ada',
    'nyebelin': 'menyebalkan',
    'sebel': 'kesal',
    'bete': 'kesal',
    'males': 'malas',
    'cape': 'capek',
    'capek': 'lelah',
    'mantap': 'bagus',
    'mantep': 'bagus',
    'keren': 'bagus',
    'jelek': 'buruk',
    'ancur': 'hancur',
    'parah': 'buruk',
    'lemot': 'lambat',
    'lelet': 'lambat',
    'eror': 'error',
    'error': 'kesalahan',
    'crash': 'rusak',
    'ngecrash': 'rusak',
    'ngelag': 'lambat',
    'lag': 'lambat',
    'lemot': 'lambat',
    'loadingnya': 'loading'
}

print(f"Slang dictionary loaded: {len(slang_dict)} entries")
print("\nContoh slang normalization:")
for key, value in list(slang_dict.items())[:10]:
    print(f"  {key} -> {value}")

Slang dictionary loaded: 88 entries

Contoh slang normalization:
  gak -> tidak
  ga -> tidak
  gk -> tidak
  ngga -> tidak
  nggak -> tidak
  tdk -> tidak
  yg -> yang
  dgn -> dengan
  utk -> untuk
  dg -> dengan


In [14]:
def preprocess_text(text):
    """
    Preprocessing text untuk Bahasa Indonesia
    
    Args:
        text (str): Raw text input
        
    Returns:
        str: Cleaned and preprocessed text
    """
    if not isinstance(text, str):
        return ""
    
    # 1. Case folding - convert to lowercase
    text = text.lower()
    
    # 2. Remove URLs
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    
    # 3. Remove mentions (@username)
    text = re.sub(r'@\w+', '', text)
    
    # 4. Remove hashtags (#hashtag)
    text = re.sub(r'#\w+', '', text)
    
    # 5. Remove numbers
    text = re.sub(r'\d+', '', text)
    
    # 6. Remove excessive punctuation (keep single punctuation)
    text = re.sub(r'([!?.,])\1+', r'\1', text)
    
    # 7. Remove special characters (keep letters and basic punctuation)
    text = re.sub(r'[^a-zA-Z\s!?.,]', '', text)
    
    # 8. Normalize slang words
    words = text.split()
    normalized_words = [slang_dict.get(word, word) for word in words]
    text = ' '.join(normalized_words)
    
    # 9. Remove extra whitespaces
    text = re.sub(r'\s+', ' ', text).strip()
    
    return text

# Test preprocessing function
test_texts = [
    "Aplikasi ini bgtttt keren!!! üëçüëçüëç",
    "Gak bisa dibuka, crash terus @developer #disappointed üò°",
    "Biasa aja sih, gak ada yg spesial www.example.com"
]

print("Testing preprocessing function:")
print("="*70)
for text in test_texts:
    cleaned = preprocess_text(text)
    print(f"Original: {text}")
    print(f"Cleaned:  {cleaned}")
    print("-"*70)

Testing preprocessing function:
Original: Aplikasi ini bgtttt keren!!! üëçüëçüëç
Cleaned:  aplikasi ini bgtttt keren!
----------------------------------------------------------------------
Original: Gak bisa dibuka, crash terus @developer #disappointed üò°
Cleaned:  tidak bisa dibuka, rusak terus
----------------------------------------------------------------------
Original: Biasa aja sih, gak ada yg spesial www.example.com
Cleaned:  biasa saja sih, tidak ada yang spesial
----------------------------------------------------------------------


In [15]:
def preprocess_dataset(examples):
    """
    Apply preprocessing to dataset examples
    """
    examples['text'] = [preprocess_text(text) for text in examples['text']]
    return examples

# Apply preprocessing
print("Applying preprocessing to all splits...")

dataset = dataset.map(preprocess_dataset, batched=True)

print("\nPreprocessing complete!")
print("\nContoh hasil preprocessing:")
for i in range(3):
    print(f"\nSample {i+1}:")
    print(f"Text: {dataset['train'][i]['text']}")
    print(f"Label: {dataset['train'][i]['label']}")

Applying preprocessing to all splits...


Map:   0%|          | 0/11000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1260 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]


Preprocessing complete!

Contoh hasil preprocessing:

Sample 1:
Text: warung ini dimiliki oleh pengusaha pabrik tahu yang sudah puluhan tahun terkenal membuat tahu putih di bandung . tahu berkualitas , dipadu keahlian memasak , dipadu kretivitas , jadilah warung yang menyajikan menu utama berbahan tahu , ditambah menu umum lain seperti ayam . semuanya selera indonesia . harga cukup terjangkau . jangan lewatkan tahu bletoka nya , tidak kalah dengan yang asli dari tegal !
Label: 2

Sample 2:
Text: mohon ulama lurus dan k mmbri hujjah partai apa yang harus diwlh agar suara islam tidak pecahpecah
Label: 1

Sample 3:
Text: lokasi strategis di jalan sumatera bandung . tempat nya nyaman terutama sofa di lantai . paella nya enak , sangat pas dimakan dengan minum bir dingin . appetiser nya juga enakenak .
Label: 2


In [16]:
# Model name
model_name = 'indobenchmark/indobert-base-p1'

print(f"Loading tokenizer: {model_name}")
tokenizer = AutoTokenizer.from_pretrained(model_name)

print(f"Loading model: {model_name}")
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3  # positive, neutral, negative
)

# Check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"\nUsing device: {device}")

if device.type == 'cuda':
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")

Loading tokenizer: indobenchmark/indobert-base-p1


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Loading model: indobenchmark/indobert-base-p1


pytorch_model.bin:   0%|          | 0.00/498M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Using device: cpu


In [17]:
def tokenize_function(examples):
    """
    Tokenize text examples
    """
    return tokenizer(
        examples['text'],
        padding='max_length',
        truncation=True,
        max_length=128  # Maksimal panjang sequence
    )

# Tokenize all splits
print("Tokenizing datasets...")

tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Set format for PyTorch
tokenized_dataset.set_format(
    'torch', 
    columns=['input_ids', 'attention_mask', 'label']
)

print("\nTokenization complete!")
print(f"Train dataset: {len(tokenized_dataset['train'])} samples")
print(f"Validation dataset: {len(tokenized_dataset['validation'])} samples")
print(f"Test dataset: {len(tokenized_dataset['test'])} samples")

Tokenizing datasets...


Map:   0%|          | 0/11000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1260 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]


Tokenization complete!
Train dataset: 11000 samples
Validation dataset: 1260 samples
Test dataset: 500 samples


In [18]:
def compute_metrics(eval_pred):
    """
    Compute accuracy and F1-score for evaluation
    
    Args:
        eval_pred: Tuple of (predictions, labels)
        
    Returns:
        dict: Dictionary containing accuracy and f1 scores
    """
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    # Calculate metrics
    accuracy = accuracy_score(labels, predictions)
    f1_weighted = f1_score(labels, predictions, average='weighted')
    f1_macro = f1_score(labels, predictions, average='macro')
    
    # Detailed metrics per class
    precision, recall, f1_per_class, _ = precision_recall_fscore_support(
        labels, predictions, average=None
    )
    
    return {
        'accuracy': accuracy,
        'f1_weighted': f1_weighted,
        'f1_macro': f1_macro,
        'f1_negative': f1_per_class[0],
        'f1_neutral': f1_per_class[1],
        'f1_positive': f1_per_class[2],
    }

print("Metrics function defined!")

Metrics function defined!


In [21]:
# Create output directory
output_dir = os.path.join(BASE_PATH, 'training_output_id')
os.makedirs(output_dir, exist_ok=True)

# Training arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    learning_rate=2e-5,
    weight_decay=0.01,
    warmup_steps=500,
    logging_dir=os.path.join(BASE_PATH, 'logs_id'),
    logging_steps=100,
    eval_strategy='epoch',  # Changed from evaluation_strategy
    save_strategy='epoch',
    load_best_model_at_end=True,
    metric_for_best_model='f1_weighted',
    greater_is_better=True,
    save_total_limit=2,
    use_cpu=not torch.cuda.is_available(),  # Changed from fp16
    report_to='none',
    seed=42
)

print("Training configuration:")
print(f"  - Epochs: {training_args.num_train_epochs}")
print(f"  - Batch size: {training_args.per_device_train_batch_size}")
print(f"  - Learning rate: {training_args.learning_rate}")
print(f"  - Weight decay: {training_args.weight_decay}")
print(f"  - Device: {'CPU' if training_args.use_cpu else 'GPU'}")

Training configuration:
  - Epochs: 3
  - Batch size: 16
  - Learning rate: 2e-05
  - Weight decay: 0.01
  - Device: CPU


In [22]:
# Data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['validation'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

print("Trainer initialized successfully!")
print(f"\nTotal training steps: {len(tokenized_dataset['train']) // training_args.per_device_train_batch_size * training_args.num_train_epochs}")

Trainer initialized successfully!

Total training steps: 2061


  trainer = Trainer(


In [23]:
# Start training
print("\n" + "="*70)
print("üöÄ STARTING MODEL TRAINING...")
print("="*70 + "\n")

# Train the model
train_result = trainer.train()

# Print training summary
print("\n" + "="*70)
print("‚úÖ TRAINING COMPLETE!")
print("="*70)

print("\nTraining metrics:")
for key, value in train_result.metrics.items():
    print(f"  {key}: {value:.4f}")


üöÄ STARTING MODEL TRAINING...



Epoch,Training Loss,Validation Loss,Accuracy,F1 Weighted,F1 Macro,F1 Negative,F1 Neutral,F1 Positive
1,0.2239,0.234812,0.927778,0.926387,0.893464,0.91,0.815451,0.954943
2,0.1629,0.192293,0.940476,0.940232,0.915575,0.929936,0.856031,0.960758
3,0.0862,0.250183,0.938889,0.938511,0.91415,0.929193,0.854839,0.958419



‚úÖ TRAINING COMPLETE!

Training metrics:
  train_runtime: 4760.0105
  train_samples_per_second: 6.9330
  train_steps_per_second: 0.4340
  total_flos: 2170685696256000.0000
  train_loss: 0.1790
  epoch: 3.0000


In [24]:
# Evaluate on validation set
print("\n" + "="*70)
print("üìä EVALUATING ON VALIDATION SET...")
print("="*70 + "\n")

eval_results = trainer.evaluate()

print("\nValidation Results:")
for key, value in eval_results.items():
    print(f"  {key}: {value:.4f}")


üìä EVALUATING ON VALIDATION SET...




Validation Results:
  eval_loss: 0.1923
  eval_accuracy: 0.9405
  eval_f1_weighted: 0.9402
  eval_f1_macro: 0.9156
  eval_f1_negative: 0.9299
  eval_f1_neutral: 0.8560
  eval_f1_positive: 0.9608
  eval_runtime: 35.4697
  eval_samples_per_second: 35.5230
  eval_steps_per_second: 1.1280
  epoch: 3.0000


In [25]:
# Evaluate on test set
print("\n" + "="*70)
print("üîç EVALUATING ON TEST SET...")
print("="*70 + "\n")

test_results = trainer.evaluate(tokenized_dataset['test'])

print("\nTest Results:")
for key, value in test_results.items():
    print(f"  {key}: {value:.4f}")


üîç EVALUATING ON TEST SET...


Test Results:
  eval_loss: 0.2921
  eval_accuracy: 0.9080
  eval_f1_weighted: 0.9050
  eval_f1_macro: 0.8825
  eval_f1_negative: 0.9381
  eval_f1_neutral: 0.7871
  eval_f1_positive: 0.9224
  eval_runtime: 13.7832
  eval_samples_per_second: 36.2760
  eval_steps_per_second: 1.1610
  epoch: 3.0000

Test Results:
  eval_loss: 0.2921
  eval_accuracy: 0.9080
  eval_f1_weighted: 0.9050
  eval_f1_macro: 0.8825
  eval_f1_negative: 0.9381
  eval_f1_neutral: 0.7871
  eval_f1_positive: 0.9224
  eval_runtime: 13.7832
  eval_samples_per_second: 36.2760
  eval_steps_per_second: 1.1610
  epoch: 3.0000


In [26]:
# CRITICAL: Save model to specific path
print("\n" + "="*70)
print("üíæ SAVING MODEL AND TOKENIZER...")
print("="*70)

# Create directory if it doesn't exist
os.makedirs(MODEL_SAVE_PATH, exist_ok=True)

# Save model
print(f"\nSaving model to: {MODEL_SAVE_PATH}")
trainer.save_model(MODEL_SAVE_PATH)

# Save tokenizer
tokenizer.save_pretrained(MODEL_SAVE_PATH)

print("\n" + "="*70)
print("‚úÖ MODEL AND TOKENIZER SAVED SUCCESSFULLY!")
print("="*70)

# Verify saved files
saved_files = os.listdir(MODEL_SAVE_PATH)
print(f"\nSaved files ({len(saved_files)} files):")
for file in saved_files[:10]:  # Show first 10 files
    print(f"  - {file}")
if len(saved_files) > 10:
    print(f"  ... and {len(saved_files) - 10} more files")


üíæ SAVING MODEL AND TOKENIZER...

Saving model to: C:\Users\muham\Project\nlp-ki\saved_model_id

‚úÖ MODEL AND TOKENIZER SAVED SUCCESSFULLY!

Saved files (7 files):
  - config.json
  - model.safetensors
  - special_tokens_map.json
  - tokenizer.json
  - tokenizer_config.json
  - training_args.bin
  - vocab.txt

‚úÖ MODEL AND TOKENIZER SAVED SUCCESSFULLY!

Saved files (7 files):
  - config.json
  - model.safetensors
  - special_tokens_map.json
  - tokenizer.json
  - tokenizer_config.json
  - training_args.bin
  - vocab.txt


In [27]:
# Load model from saved path
print("\n" + "="*70)
print("üîÑ LOADING MODEL FROM SAVED PATH FOR TESTING...")
print("="*70)

# Load tokenizer and model
loaded_tokenizer = AutoTokenizer.from_pretrained(MODEL_SAVE_PATH)
loaded_model = AutoModelForSequenceClassification.from_pretrained(MODEL_SAVE_PATH)

# Move to device
loaded_model = loaded_model.to(device)
loaded_model.eval()

print(f"\n‚úÖ Model loaded successfully from: {MODEL_SAVE_PATH}")


üîÑ LOADING MODEL FROM SAVED PATH FOR TESTING...

‚úÖ Model loaded successfully from: C:\Users\muham\Project\nlp-ki\saved_model_id


In [28]:
# Test sentences
test_sentences = [
    "Aplikasi ini sangat membantu, fiturnya lengkap.",  # Expected: Positive
    "Sering crash pas dibuka, nyesel download.",        # Expected: Negative
    "Biasa aja sih, standar."                           # Expected: Neutral
]

# Label mapping
label_map = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}

print("\n" + "="*70)
print("üß™ TESTING INFERENCE WITH MANUAL SENTENCES")
print("="*70 + "\n")

for i, sentence in enumerate(test_sentences, 1):
    print(f"Test {i}:")
    print(f"  Original: {sentence}")
    
    # Preprocess
    cleaned_sentence = preprocess_text(sentence)
    print(f"  Cleaned:  {cleaned_sentence}")
    
    # Tokenize
    inputs = loaded_tokenizer(
        cleaned_sentence,
        return_tensors='pt',
        padding=True,
        truncation=True,
        max_length=128
    )
    
    # Move to device
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    # Predict
    with torch.no_grad():
        outputs = loaded_model(**inputs)
        logits = outputs.logits
        probabilities = torch.softmax(logits, dim=-1)
        prediction = torch.argmax(logits, dim=-1).item()
    
    # Get probabilities for each class
    probs = probabilities[0].cpu().numpy()
    
    print(f"  Predicted Sentiment: {label_map[prediction]}")
    print(f"  Confidence Scores:")
    print(f"    - Negative: {probs[0]:.4f} ({probs[0]*100:.2f}%)")
    print(f"    - Neutral:  {probs[1]:.4f} ({probs[1]*100:.2f}%)")
    print(f"    - Positive: {probs[2]:.4f} ({probs[2]*100:.2f}%)")
    print("\n" + "-"*70 + "\n")

print("="*70)
print("‚úÖ INFERENCE TESTING COMPLETE!")
print("="*70)


üß™ TESTING INFERENCE WITH MANUAL SENTENCES

Test 1:
  Original: Aplikasi ini sangat membantu, fiturnya lengkap.
  Cleaned:  aplikasi ini sangat membantu, fiturnya lengkap.
  Predicted Sentiment: Positive
  Confidence Scores:
    - Negative: 0.0006 (0.06%)
    - Neutral:  0.0013 (0.13%)
    - Positive: 0.9980 (99.80%)

----------------------------------------------------------------------

Test 2:
  Original: Sering crash pas dibuka, nyesel download.
  Cleaned:  sering rusak pas dibuka, nyesel download.
  Predicted Sentiment: Negative
  Confidence Scores:
    - Negative: 0.9956 (99.56%)
    - Neutral:  0.0009 (0.09%)
    - Positive: 0.0034 (0.34%)

----------------------------------------------------------------------

Test 3:
  Original: Biasa aja sih, standar.
  Cleaned:  biasa saja sih, standar.
  Predicted Sentiment: Negative
  Confidence Scores:
    - Negative: 0.9931 (99.31%)
    - Neutral:  0.0024 (0.24%)
    - Positive: 0.0045 (0.45%)

----------------------------------------

In [29]:
# Test with random samples from test dataset
print("\n" + "="*70)
print("üé≤ TESTING WITH RANDOM SAMPLES FROM TEST DATASET")
print("="*70 + "\n")

import random

# Get 5 random samples
random_indices = random.sample(range(len(dataset['test'])), 5)

for idx in random_indices:
    sample = dataset['test'][idx]
    text = sample['text']
    true_label = sample['label']
    
    print(f"Sample:")
    print(f"  Text: {text[:100]}{'...' if len(text) > 100 else ''}")
    print(f"  True Label: {label_map[true_label]}")
    
    # Tokenize
    inputs = loaded_tokenizer(
        text,
        return_tensors='pt',
        padding=True,
        truncation=True,
        max_length=128
    )
    
    # Move to device
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    # Predict
    with torch.no_grad():
        outputs = loaded_model(**inputs)
        logits = outputs.logits
        prediction = torch.argmax(logits, dim=-1).item()
    
    print(f"  Predicted Label: {label_map[prediction]}")
    print(f"  ‚úÖ Correct!" if prediction == true_label else "  ‚ùå Incorrect!")
    print("\n" + "-"*70 + "\n")


üé≤ TESTING WITH RANDOM SAMPLES FROM TEST DATASET

Sample:
  Text: menyesal saya beli vivo , kamera nya tidak bagus .
  True Label: Negative
  Predicted Label: Negative
  ‚úÖ Correct!

----------------------------------------------------------------------

Sample:
  Text: bicara kemiskinan tetapi hidup dengan kemewahan . bicara kesenjangan tetapi harta triliunan . bicara...
  True Label: Negative
  Predicted Label: Negative
  ‚úÖ Correct!

----------------------------------------------------------------------

Sample:
  Text: saya sudah bayar tetapi etiket tidak dikirim malah kadaluwarsa , diminta struk pembayaran saya sudah...
  True Label: Negative
  Predicted Label: Negative
  ‚úÖ Correct!

----------------------------------------------------------------------

Sample:
  Text: saya bersyukur jokowi bakal jadi presiden selama sepuluh tahun .
  True Label: Positive
  Predicted Label: Positive
  ‚úÖ Correct!

----------------------------------------------------------------------

Sam

In [None]:
# ============================================
# üöÄ UPLOAD MODEL TO HUGGINGFACE HUB
# ============================================

from huggingface_hub import HfApi, login
import os

# Configuration
USERNAME = "rkkzone"  # Change this to your HuggingFace username
REPO_NAME = "indobert-sentiment-indonesian-playstore"  # FIXED: IndoBERT not RoBERTa
MODEL_FOLDER = "saved_model_id"

print("=" * 70)
print("üöÄ UPLOADING INDONESIAN SENTIMENT MODEL TO HUGGINGFACE HUB")
print("=" * 70)

# Get token
print("\nüìù Get your token from: https://huggingface.co/settings/tokens")
TOKEN = input("Paste your HuggingFace token: ").strip()

# Login
try:
    login(token=TOKEN)
    print("‚úÖ Login successful!")
except Exception as e:
    print(f"‚ùå Login failed: {e}")
    raise

# Prepare upload
api = HfApi()
repo_id = f"{USERNAME}/{REPO_NAME}"

print(f"\nüì¶ Repository: {repo_id}")
print(f"üìÅ Model folder: {MODEL_FOLDER} (~475MB)")
print("\nFiles to upload:")
for file in os.listdir(MODEL_FOLDER):
    size = os.path.getsize(os.path.join(MODEL_FOLDER, file)) / (1024**2)
    print(f"  - {file} ({size:.2f} MB)")

confirm = input("\n‚ö†Ô∏è  Proceed with upload? (y/n): ").strip().lower()
if confirm != 'y':
    print("‚ùå Upload cancelled")
else:
    # Create repo
    print(f"\nüì¶ Creating repository: {repo_id}")
    api.create_repo(repo_id=repo_id, repo_type="model", exist_ok=True, private=False)
    
    # Upload folder
    print(f"‚¨ÜÔ∏è  Uploading files from {MODEL_FOLDER}/...")
    print("‚è≥ This may take 5-10 minutes for ~475MB...")
    
    api.upload_folder(
        folder_path=MODEL_FOLDER,
        repo_id=repo_id,
        repo_type="model",
        commit_message="Upload Indonesian sentiment model (IndoBERT) trained on SMSA dataset (11K reviews)"
    )
    
    print(f"\n‚úÖ Upload successful!")
    print(f"üîó Model available at: https://huggingface.co/{repo_id}")

üöÄ UPLOADING INDONESIAN SENTIMENT MODEL TO HUGGINGFACE HUB

üìù Get your token from: https://huggingface.co/settings/tokens
‚úÖ Login successful!

üì¶ Repository: rkkzone/roberta-sentiment-indonesian-playstore
üìÅ Model folder: saved_model_id (~475MB)

Files to upload:
  - config.json (0.00 MB)
  - model.safetensors (474.74 MB)
  - special_tokens_map.json (0.00 MB)
  - tokenizer.json (0.68 MB)
  - tokenizer_config.json (0.00 MB)
  - training_args.bin (0.01 MB)
  - vocab.txt (0.22 MB)
‚úÖ Login successful!

üì¶ Repository: rkkzone/roberta-sentiment-indonesian-playstore
üìÅ Model folder: saved_model_id (~475MB)

Files to upload:
  - config.json (0.00 MB)
  - model.safetensors (474.74 MB)
  - special_tokens_map.json (0.00 MB)
  - tokenizer.json (0.68 MB)
  - tokenizer_config.json (0.00 MB)
  - training_args.bin (0.01 MB)
  - vocab.txt (0.22 MB)

üì¶ Creating repository: rkkzone/roberta-sentiment-indonesian-playstore

üì¶ Creating repository: rkkzone/roberta-sentiment-indonesian-p

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            


‚úÖ Upload successful!
üîó Model available at: https://huggingface.co/rkkzone/roberta-sentiment-indonesian-playstore


In [None]:
# ============================================
# üìù CREATE README FOR HUGGINGFACE MODEL
# ============================================

readme_content = f"""---
language: id
license: mit
tags:
- sentiment-analysis
- indobert
- indonesian
- google-play-reviews
- text-classification
datasets:
- smsa
metrics:
- accuracy
- f1
base_model: indobenchmark/indobert-base-p1
model_type: bert
---

# {REPO_NAME}

Fine-tuned **IndoBERT** model for Indonesian sentiment analysis on Google Play Store reviews.

## Model Description

This model is based on **IndoBERT** (`indobenchmark/indobert-base-p1`) and performs 3-class sentiment classification:
- **Positive** (label: 2) üòä
- **Neutral** (label: 1) üòê
- **Negative** (label: 0) üòû

## Training Data

- **Dataset**: SMSA (Sentiment Analysis on Indonesian Movie Reviews)
- **Language**: Indonesian (Bahasa Indonesia)
- **Size**: 11,000 reviews
  - Positive: 6,416 reviews (58.3%)
  - Negative: 3,436 reviews (31.2%)
  - Neutral: 1,148 reviews (10.4%)
- **Domain**: App reviews (Google Play Store)

## Usage

```python
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model_name = "{repo_id}"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Example Indonesian review
text = "Aplikasi bagus sekali! Sangat direkomendasikan."

inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)

with torch.no_grad():
    outputs = model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    sentiment = torch.argmax(predictions, dim=-1).item()

sentiment_labels = {{0: "Negative", 1: "Neutral", 2: "Positive"}}
print(f"Sentiment: {{sentiment_labels[sentiment]}}")
print(f"Confidence: {{predictions[0][sentiment].item():.4f}}")
```

### Batch Processing

```python
reviews = [
    "Aplikasi bagus, mudah digunakan!",
    "Tidak bisa login, aplikasi error terus",
    "Biasa aja sih"
]

inputs = tokenizer(reviews, return_tensors="pt", padding=True, truncation=True, max_length=128)

with torch.no_grad():
    outputs = model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    sentiments = torch.argmax(predictions, dim=-1)

for review, sentiment, probs in zip(reviews, sentiments, predictions):
    label = sentiment_labels[sentiment.item()]
    conf = probs[sentiment].item()
    print(f"Review: {{review}}")
    print(f"Sentiment: {{label}} ({{conf:.2%}} confident)\\n")
```

## Model Performance

**Training Configuration:**
- **Base Model**: IndoBERT (`indobenchmark/indobert-base-p1`)
- **Architecture**: BERT-based encoder
- **Training Epochs**: 3
- **Batch Size**: 16
- **Learning Rate**: 2e-5
- **Max Length**: 128 tokens
- **Optimizer**: AdamW
- **Weight Decay**: 0.01

**Evaluation Results:**
- Check model card for detailed metrics on test set
- Includes per-class precision, recall, and F1 scores

## Dataset Preprocessing

The model was trained with the following preprocessing:
1. **Indonesian Slang Normalization**: Common abbreviations expanded (gak‚Üítidak, bgd‚Üíbanget, etc.)
2. **Text Cleaning**: Remove special characters, URLs, excessive whitespace
3. **Tokenization**: IndoBERT tokenizer with max 128 tokens
4. **Label Mapping**: 
   - `negative` ‚Üí 0
   - `neutral` ‚Üí 1
   - `positive` ‚Üí 2

## Intended Use

This model is optimized for:
- ‚úÖ Indonesian language app reviews
- ‚úÖ Google Play Store sentiment analysis
- ‚úÖ Customer feedback classification
- ‚úÖ Review monitoring and analytics
- ‚úÖ E-commerce product reviews (Indonesian)

## Limitations

- Optimized for **informal Indonesian** text (app reviews)
- May not generalize well to formal Indonesian literature
- Best performance on **short texts** (< 128 tokens)
- Sensitive to **slang variations** across Indonesian regions
- Trained primarily on app domain data

## Applications

### 1. **App Analytics Dashboard**
Analyze user sentiment in Indonesian app reviews to track satisfaction over time.

### 2. **Customer Support Prioritization**
Automatically flag negative reviews for urgent response.

### 3. **Market Research**
Understand Indonesian user preferences and pain points.

### 4. **Review Filtering**
Filter out spam or irrelevant reviews based on sentiment patterns.

## Example Predictions

| Review (Indonesian) | Predicted Sentiment | Confidence |
|---------------------|---------------------|------------|
| "Aplikasi keren banget! Fiturnya lengkap!" | Positive | 95.3% |
| "Aplikasi sering crash, mohon diperbaiki" | Negative | 87.2% |
| "Lumayan lah, standar aja" | Neutral | 72.8% |
| "Mantap jiwa! Recommended!" | Positive | 91.5% |
| "Mengecewakan, buang-buang waktu" | Negative | 89.6% |

## Citation

```bibtex
@misc{{indobert_sentiment_indonesian_playstore,
  author = {{{USERNAME}}},
  title = {{Indonesian Sentiment Analysis for Google Play Reviews (IndoBERT)}},
  year = {{2025}},
  publisher = {{Hugging Face}},
  url = {{https://huggingface.co/{repo_id}}},
  note = {{Fine-tuned IndoBERT for Indonesian app review sentiment analysis}}
}}
```

## License

MIT License - Free for commercial and non-commercial use.

## Related Resources

- **Base Model**: [IndoBERT](https://huggingface.co/indobenchmark/indobert-base-p1)
- **Dataset**: [SMSA Indonesian Sentiment](https://github.com/IndoNLP/indonlu)
- **Live Demo**: [Google Play Review Analyzer](https://google-play-review-analyzer.streamlit.app)

## Acknowledgments

- Dataset: Indonesian NLP research community (SMSA dataset)
- Base Model: IndoNLP team for IndoBERT (`indobenchmark/indobert-base-p1`)
- Framework: Hugging Face Transformers

---

**Built with ‚ù§Ô∏è for Indonesian app developers**

*For issues or questions, please open an issue on the model repository.*
"""

# Upload README
try:
    api.upload_file(
        path_or_fileobj=readme_content.encode(),
        path_in_repo="README.md",
        repo_id=repo_id,
        repo_type="model",
        commit_message="Add comprehensive README with usage examples and model details (IndoBERT-based)"
    )
    print("\n‚úÖ README created successfully!")
    print(f"\nüéâ All done! Visit your model at:")
    print(f"   https://huggingface.co/{repo_id}")
except Exception as e:
    print(f"‚ùå README upload failed: {e}")


‚úÖ README created successfully!

üéâ All done! Visit your model at:
   https://huggingface.co/rkkzone/roberta-sentiment-indonesian-playstore
