In [1]:
import pandas as pd
import torch
from transformers import BertForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback, DataCollatorWithPadding, BertTokenizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from imblearn.over_sampling import RandomOverSampler
import json

In [8]:
# Load data
data_path = r"C:\Users\ASUS\AppData\Roaming\Python\Python313\site-packages\pandas\io\excel\SampleTraining_1559.xlsx"
data = pd.read_excel(data_path)
 
# token_ids dalam integers (parse if stored as strings)
data['token_ids'] = data['token_ids'].apply(lambda x: json.loads(x) if isinstance(x, str) else x)
 
# Over-sample kelas minoritas pakai 'token_ids' 
ros = RandomOverSampler(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = ros.fit_resample(data[['token_ids']], data['label'])
data_resampled = pd.DataFrame(X_resampled, columns=['token_ids'])
data_resampled['label'] = y_resampled
 
# Distribusi kelas baru
print("Class distribution after over-sampling:")
print(data_resampled['label'].value_counts())
 
# Split data into training (60%), validation (20%), and test (20%) 
train_data, temp_data = train_test_split(data_resampled, test_size=0.4, stratify=data_resampled['label'], random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=0.5, stratify=temp_data['label'], random_state=42)

Class distribution after over-sampling:
label
0    633
1    633
2    633
Name: count, dtype: int64


In [9]:
# Initialize tokenizer dengan IndoBERT
tokenizer = BertTokenizer.from_pretrained("indobenchmark/indobert-base-p2")
 
# Buat torch dataset pakai 'token_ids' 
class SentimentDataset(torch.utils.data.Dataset):
    def __init__(self, token_ids, labels):
        self.token_ids = token_ids
        self.labels = labels
 
    def __getitem__(self, idx):
        item = {
            "input_ids": torch.tensor(self.token_ids[idx], dtype=torch.long),  # Ensuring long tensor type
            "labels": torch.tensor(self.labels[idx], dtype=torch.long)
        }
        return item
 
    def __len__(self):
        return len(self.labels)

# Prepare datasets dengan 'token_ids'
train_dataset = SentimentDataset(train_data['token_ids'].tolist(), train_data['label'].tolist())
val_dataset = SentimentDataset(val_data['token_ids'].tolist(), val_data['label'].tolist())
test_dataset = SentimentDataset(test_data['token_ids'].tolist(), test_data['label'].tolist())

# Pasang dropout 0.9 supaya tidak overfitting
class CustomBertForSequenceClassification(BertForSequenceClassification):
    def __init__(self, config):
        super().__init__(config)
        self.dropout = torch.nn.Dropout(0.9) 
 
model = CustomBertForSequenceClassification.from_pretrained("indobenchmark/indobert-base-p2", num_labels=3)
 
# Evaluation metrics
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}
 
# Padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Training arguments pakai early stopping dan dropout
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=1e-5,  
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,  
    weight_decay=0.03,
    logging_dir='./logs',
    logging_steps=10,
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True
)

# Trainer with early stopping callback & data collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]  # Early stopping kalua ga ada improvement sehabis 2 epochs
)
 
# Train and evaluate model
trainer.train()
 
# Evaluate on the test set
test_results = trainer.evaluate(test_dataset)
print("Test set results:", test_results)
 
# Prediksi sentiment untuk keseluruhan data
dataset_full = SentimentDataset(data['token_ids'].tolist(), data['label'].tolist())
predictions = trainer.predict(dataset_full).predictions.argmax(-1)
 
# Buat dataframe untuk prediction
data['bert_sentiment'] = predictions  # new column for BERT predictions

# Save 
output_path = "C:\\Users\\ASUS\\AppData\\Roaming\\Python\\Python313\\site-packages\\pandas\\io\\excel\\BERT_ModellingKontan_Training1560.xlsx"
data.to_excel(output_path, index=False)
 
print("Model training, evaluation, and sentiment prediction are complete! Results saved as 'BERT_ModellingKontan_Training1560.xlsx'.")

Some weights of CustomBertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9885,0.769853,0.628947,0.620036,0.628947,0.613932
2,0.6821,0.523891,0.757895,0.794696,0.757895,0.753225
3,0.3627,0.348994,0.865789,0.87293,0.865789,0.865061
4,0.228,0.323061,0.910526,0.910808,0.910526,0.910629
5,0.1357,0.32553,0.913158,0.913558,0.913158,0.91332


Test set results: {'eval_loss': 0.31098154187202454, 'eval_accuracy': 0.9, 'eval_precision': 0.9023117201426025, 'eval_recall': 0.9, 'eval_f1': 0.8994443264942247, 'eval_runtime': 20.7155, 'eval_samples_per_second': 18.344, 'eval_steps_per_second': 2.317, 'epoch': 5.0}
Model training, evaluation, and sentiment prediction are complete! Results saved as 'BERT_ModellingKontan_Training1560.xlsx'.
