In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from transformers import TFAutoModel, AutoTokenizer
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
import tensorflow.data as tf_data
from tensorflow.keras import layers
import re
import pickle
import json

#Mount ke googledrive untuk akses data
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
class IndoBERTHoaxClassifier:
    def __init__(self, model_name='indobenchmark/indobert-base-p1', max_length=128):
        """
        IndoBERT Fine-Tuning classifier
        Args:
        model_name : IndoBERT model identifier
        max_length: Max sequence length
        """
        self.model_name = model_name
        self.max_length = max_length
        self.tokenizer = None
        self.bert_model = None
        self.model = None

        # Initialize tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)

    def preprocess_text(self, text, minimal_preprocessing=True):
        # Text preprocessing
        if pd.isna(text) or not text:
            return ""

        text = str(text).strip()

        if minimal_preprocessing:
            # for already cleaned data
            text = re.sub(r'\s+', ' ', text).strip()
            return text if len(text) > 3 else ""
        else:
            # Additional cleaning if needed
            text = text.lower()
            text = re.sub(r'http\S+|www\S+|https\S+', '', text)
            text = re.sub(r'@\w+|#\w+', '', text)
            text = re.sub(r'\s+', ' ', text).strip()
            return text

    def balance_dataset(self, df, text_column='text_akhir', label_column='label'):
        # Balance the dataset
        print("Dataset distribution before balancing:")
        print(df[label_column].value_counts())

        class_0 = df[df[label_column] == 0]
        class_1 = df[df[label_column] == 1]

        min_size = min(len(class_0), len(class_1))

        # Check if min_size is zero, if so, return an empty DataFrame
        if min_size == 0:
            print("Warning: One of the classes is empty, returning empty DataFrame after balancing.")
            return pd.DataFrame(columns=df.columns)

        class_0_balanced = class_0.sample(n=min_size, random_state=42)
        class_1_balanced = class_1.sample(n=min_size, random_state=42)

        balanced_df = pd.concat([class_0_balanced, class_1_balanced])
        balanced_df = balanced_df.sample(frac=1, random_state=42).reset_index(drop=True)
        print("Dataset distribution after balancing:")
        print(balanced_df[label_column].value_counts())

        return balanced_df

    def tokenize_texts(self, texts):
        """Tokenize texts using IndoBERT tokenizer"""
        # Check if texts is empty before calling tolist()
        if texts.empty:
            print("Warning: Input texts are empty during tokenization.")
            return {'input_ids': tf.constant([], dtype=tf.int32),
                    'attention_mask': tf.constant([], dtype=tf.int32)}

        encoded = self.tokenizer(
            texts.tolist(),
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='tf',
            return_attention_mask=True,
            verbose=False  # Changed to False to reduce output
        )

        return {
            'input_ids': encoded['input_ids'],
            'attention_mask': encoded['attention_mask']
        }

    def build_model(self):
        """Build IndoBERT model using subclassing approach"""

        # Create a custom model class that properly integrates BERT
        class IndoBERTClassifier(tf.keras.Model):
            def __init__(self, model_name, max_length, **kwargs):
                super(IndoBERTClassifier, self).__init__(**kwargs)
                self.max_length = max_length
                self.bert_model = TFAutoModel.from_pretrained(model_name)
                self.dropout1 = layers.Dropout(0.3)
                self.dense1 = layers.Dense(256, activation='relu', name='dense_1')
                self.dropout2 = layers.Dropout(0.2)
                self.dense2 = layers.Dense(128, activation='relu', name='dense_2')
                self.dropout3 = layers.Dropout(0.1)
                self.classifier = layers.Dense(1, activation='sigmoid', name='classifier')

            def call(self, inputs, training=None):
                input_ids = inputs['input_ids']
                attention_mask = inputs['attention_mask']

                # Pass through BERT
                bert_output = self.bert_model(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    training=training
                )

                # Use CLS token (first token) for classification
                pooled_output = bert_output.last_hidden_state[:, 0, :]

                # Classification layers
                x = self.dropout1(pooled_output, training=training)
                x = self.dense1(x)
                x = self.dropout2(x, training=training)
                x = self.dense2(x)
                x = self.dropout3(x, training=training)
                outputs = self.classifier(x)

                return outputs

        # Create the model
        self.model = IndoBERTClassifier(
            model_name=self.model_name,
            max_length=self.max_length,
            name='IndoBERT_Classifier'
        )

        # Build the model by calling it with dummy input
        dummy_input = {
            'input_ids': tf.zeros((1, self.max_length), dtype=tf.int32),
            'attention_mask': tf.zeros((1, self.max_length), dtype=tf.int32)
        }
        _ = self.model(dummy_input)

        return self.model

    def prepare_data(self, csv_path, text_column='text_akhir', label_column='label'):
        """Prepare data for training"""
        # Load data
        print(f"Loading data from {csv_path}")
        try:
            df = pd.read_csv(csv_path)
        except FileNotFoundError:
            print(f"Error: File not found at {csv_path}")
            return pd.Series(dtype=str), pd.Series(dtype=int)  # Return empty Series

        print(f"Original dataset shape: {df.shape}")

        # Remove missing values
        df = df.dropna(subset=[text_column, label_column])
        print(f"Shape after dropping NA: {df.shape}")

        # Balance dataset
        df = self.balance_dataset(df, text_column, label_column)
        print(f"Shape after balancing: {df.shape}")

        # Preprocess text (minimal since already preprocessed)
        df[text_column] = df[text_column].apply(lambda x: self.preprocess_text(x, True))
        print(f"Shape after preprocessing text: {df.shape}")

        # Remove empty texts
        df = df[df[text_column].str.len() > 0]
        print(f"Shape after removing empty texts: {df.shape}")

        print(f"Final dataset shape: {df.shape}")

        if df.empty:
            print("Warning: Final dataset is empty.")
            return pd.Series(dtype=str), pd.Series(dtype=int)  # Return empty Series

        return df[text_column], df[label_column]

    def train(self, csv_path, validation_split=0.2, epochs=5, batch_size=16, learning_rate=2e-5):
        """Fine-tune IndoBERT for hoax classification"""

        # Prepare data
        print("Preparing data")
        texts, labels = self.prepare_data(csv_path)

        # Check if data is empty
        if texts.empty or labels.empty:
            print("Error: No data available for training after preparation.")
            return None

        # Split data
        if len(texts) < 2 or len(labels) < 2:
            print("Error: Not enough data samples to perform train-validation split.")
            return None

        # Ensure stratify is possible if classes are present
        if len(np.unique(labels)) > 1:
            X_train, X_val, y_train, y_val = train_test_split(
                texts,
                labels,
                test_size=validation_split,
                random_state=42,
                stratify=labels
            )
        else:
            print("Warning: Only one class present in the dataset. Skipping stratification.")
            X_train, X_val, y_train, y_val = train_test_split(
                texts,
                labels,
                test_size=validation_split,
                random_state=42,
            )

        print(f"Training samples: {len(X_train)}")
        print(f"Validation samples: {len(X_val)}")

        # Tokenize data
        print("Tokenizing texts...")
        train_encodings = self.tokenize_texts(X_train)
        val_encodings = self.tokenize_texts(X_val)

        # Build model
        print("Building model...")
        self.build_model()

        # Set BERT layers to trainable (fine-tuning)
        if hasattr(self.model, 'bert_model'):
            for layer in self.model.bert_model.layers:
                layer.trainable = True

        # Compile model
        optimizer = Adam(learning_rate=learning_rate)
        self.model.compile(
            optimizer=optimizer,
            loss='binary_crossentropy',
            metrics=['accuracy', 'precision', 'recall']
        )

        # Print model summary
        print("\nModel Architecture:")
        self.model.summary()

        # Calculate class weights
        if y_train.empty or len(np.unique(y_train)) < 1:
            class_weights_dict = {}
            print("Warning: y_train is empty or has no unique classes, cannot compute class weights.")
        else:
            class_weights = compute_class_weight(
                'balanced',
                classes=np.unique(y_train),
                y=y_train
            )
            class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}

        # Callbacks
        callbacks = [
            EarlyStopping(
                monitor='val_loss',
                patience=3,
                restore_best_weights=True,
                verbose=1
            ),
            ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.5,
                patience=2,
                min_lr=1e-7,
                verbose=1
            )
        ]

        # Prepare training data as tf.data.Dataset
        def create_dataset(encodings, labels, batch_size):
            def gen():
                for i in range(len(labels)):
                    yield {
                        'input_ids': encodings['input_ids'][i],
                        'attention_mask': encodings['attention_mask'][i]
                    }, labels.iloc[i]

            dataset = tf.data.Dataset.from_generator(
                gen,
                output_signature=(
                    {
                        'input_ids': tf.TensorSpec(shape=(self.max_length,), dtype=tf.int32),
                        'attention_mask': tf.TensorSpec(shape=(self.max_length,), dtype=tf.int32)
                    },
                    tf.TensorSpec(shape=(), dtype=tf.int64)
                )
            )
            return dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

        # Create datasets
        train_dataset = create_dataset(train_encodings, y_train, batch_size)
        val_dataset = create_dataset(val_encodings, y_val, batch_size) if not y_val.empty else None

        # Train model
        print(f"\nStarting fine-tuning with {epochs} epochs...")
        history = self.model.fit(
            train_dataset,
            validation_data=val_dataset,
            epochs=epochs,
            callbacks=callbacks,
            class_weight=class_weights_dict,
            verbose=1
        )

        # Evaluate
        print("\nEvaluating model...")
        if val_dataset and not y_val.empty:
            val_predictions = self.model.predict(val_dataset)
            val_pred_binary = (val_predictions > 0.5).astype(int).flatten()

            print("\nValidation Results:")
            print(classification_report(y_val, val_pred_binary))
            print("\nConfusion Matrix:")
            print(confusion_matrix(y_val, val_pred_binary))

        return history

    def predict(self, texts):
        """Make predictions on new texts"""
        if self.model is None or self.tokenizer is None:
            print("Error: Model or tokenizer not loaded. Cannot make predictions.")
            return []

        if isinstance(texts, str):
            texts = [texts]

        # Preprocess
        processed_texts = [self.preprocess_text(text, True) for text in texts]
        processed_texts_series = pd.Series(processed_texts)

        # Tokenize
        if processed_texts_series.empty or all(p == "" for p in processed_texts):
            print("Warning: Input texts are empty or become empty after preprocessing.")
            return []

        encodings = self.tokenize_texts(processed_texts_series)

        if encodings['input_ids'].shape[0] == 0:
            print("Warning: Tokenization resulted in empty inputs.")
            return []

        # Create prediction dataset
        def gen():
            for i in range(len(texts)):
                yield {
                    'input_ids': encodings['input_ids'][i],
                    'attention_mask': encodings['attention_mask'][i]
                }

        predict_dataset = tf.data.Dataset.from_generator(
            gen,
            output_signature={
                'input_ids': tf.TensorSpec(shape=(self.max_length,), dtype=tf.int32),
                'attention_mask': tf.TensorSpec(shape=(self.max_length,), dtype=tf.int32)
            }
        ).batch(len(texts))

        # Predict
        predictions = self.model.predict(predict_dataset)

        # Format results
        results = []
        for i, text in enumerate(texts):
            if i < len(predictions):
                confidence = float(predictions[i][0])
                is_hoax = confidence > 0.5

                results.append({
                    'text': text,
                    'prediction': 'HOAX' if is_hoax else 'NOT HOAX',
                    'confidence': confidence,
                    'is_hoax': is_hoax
                })
            else:
                results.append({
                    'text': text,
                    'prediction': 'Error',
                    'confidence': 0.0,
                    'is_hoax': False
                })

        return results

    def save_model(self, model_path='/content/drive/MyDrive/PengolahanData/Hasil_Modelling/indobert_hoax_model', tokenizer_path='/content/drive/MyDrive/PengolahanData/Hasil_Modelling/tokenizer_config.json'):
        """Save fine tuned model"""
        if self.model:
          #Ensure the model_path ends with .weights.h5
          if not model_path.endswith('.weights.h5'):
            model_path += '.weights.h5'

          self.model.save_weights(model_path)

          # Save tokenizer configuration
          tokenizer_config = {
             'model_name': self.model_name,
             'max_length': self.max_length
            }

          with open(tokenizer_path, 'w') as f:
              json.dump(tokenizer_config, f)

          print(f"Model weights saved to {model_path}")
          print(f"Tokenizer config saved to {tokenizer_path}")
        else:
            print("Error: No model to save.")

    def load_model(self, model_path='/content/drive/MyDrive/PengolahanData/Hasil_Modelling/indobert_hoax_model', tokenizer_path='/content/drive/MyDrive/PengolahanData/Hasil_Modelling/tokenizer_config.json'):
        """Load fine tuned model"""
        try:
            # Load tokenizer config
            with open(tokenizer_path, 'r') as f:
                tokenizer_config = json.load(f)

            self.model_name = tokenizer_config['model_name']
            self.max_length = tokenizer_config['max_length']

            # Reinitialize tokenizer
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)

            # Build model architecture
            self.build_model()

            # Load weights
            self.model.load_weights(model_path)

            print("Model and tokenizer loaded successfully!")
        except FileNotFoundError as e:
            print(f"Error loading model/tokenizer: {e}")
            self.model = None
            self.tokenizer = None
        except Exception as e:
            print(f"An unexpected error occurred while loading the model: {e}")
            self.model = None
            self.tokenizer = None

In [3]:
# Simplified Inference Class for IndoBERT
class IndoBERTInference:
    def __init__(self, model_path='/content/drive/MyDrive/PengolahanData/Hasil_Modelling/indobert_hoax_model', tokenizer_path='/content/drive/MyDrive/PengolahanData/Hasil_Modelling/tokenizer_config.json'):
        self.model = None
        self.tokenizer = None
        self.max_length = None
        self.load_model(model_path, tokenizer_path)

    def load_model(self, model_path, tokenizer_path):
        """Load Trained model"""
        try:
            # Load config
            with open(tokenizer_path, 'r') as f:
                config = json.load(f)

            self.max_length = config['max_length']

            # Load tokenizer
            self.tokenizer = AutoTokenizer.from_pretrained(config['model_name'])

            # Recreate model architecture (same as in main class)
            class IndoBERTClassifier(tf.keras.Model):
                def __init__(self, model_name, max_length, **kwargs):
                    super(IndoBERTClassifier, self).__init__(**kwargs)
                    self.max_length = max_length
                    self.bert_model = TFAutoModel.from_pretrained(model_name)
                    self.dropout1 = layers.Dropout(0.3)
                    self.dense1 = layers.Dense(256, activation='relu', name='dense_1')
                    self.dropout2 = layers.Dropout(0.2)
                    self.dense2 = layers.Dense(128, activation='relu', name='dense_2')
                    self.dropout3 = layers.Dropout(0.1)
                    self.classifier = layers.Dense(1, activation='sigmoid', name='classifier')

                def call(self, inputs, training=None):
                    input_ids = inputs['input_ids']
                    attention_mask = inputs['attention_mask']

                    bert_output = self.bert_model(
                        input_ids=input_ids,
                        attention_mask=attention_mask,
                        training=training
                    )

                    pooled_output = bert_output.last_hidden_state[:, 0, :]

                    x = self.dropout1(pooled_output, training=training)
                    x = self.dense1(x)
                    x = self.dropout2(x, training=training)
                    x = self.dense2(x)
                    x = self.dropout3(x, training=training)
                    outputs = self.classifier(x)

                    return outputs

            # Create and build model
            self.model = IndoBERTClassifier(
                model_name=config['model_name'],
                max_length=self.max_length,
                name='IndoBERT_Classifier'
            )

            # Build with dummy input
            dummy_input = {
                'input_ids': tf.zeros((1, self.max_length), dtype=tf.int32),
                'attention_mask': tf.zeros((1, self.max_length), dtype=tf.int32)
            }
            _ = self.model(dummy_input)

            if not model_path.endswith('.weights.h5'):
                model_path += '.weights.h5'

            # Load weights
            self.model.load_weights(model_path)

            print("IndoBERT model loaded successfully ✓")
        except FileNotFoundError as e:
            print(f"❌ Error loading model: {e}")
            self.model = None
            self.tokenizer = None
        except Exception as e:
            print(f"❌ An unexpected error occurred while loading model: {str(e)}")
            self.model = None
            self.tokenizer = None

    def predict(self, text):
        """Simple prediction function"""
        if self.model is None or self.tokenizer is None:
            print("Error: Model or tokenizer not loaded. Cannot make predictions.")
            return {'text': text, 'prediction': 'Error', 'confidence': 0.0, 'is_hoax': False}

        # Tokenize
        encoded = self.tokenizer(
            [text],
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='tf',
            return_attention_mask=True
        )

        # Predict
        predictions = self.model({
            'input_ids': encoded['input_ids'],
            'attention_mask': encoded['attention_mask']
        })

        if predictions.shape[0] > 0:
            confidence = float(predictions[0][0])
            is_hoax = confidence > 0.5
        else:
            confidence = 0.0
            is_hoax = False

        return {
            'text': text,
            'prediction': 'HOAX' if is_hoax else 'NOT HOAX',
            'confidence': confidence,
            'is_hoax': is_hoax
        }

In [4]:
# Main execution
if __name__ == "__main__":
    # Initialize classifier
    classifier = IndoBERTHoaxClassifier(
        model_name='indobenchmark/indobert-base-p1',
        max_length=128
    )

    # Train the model (fine tuning)
    print("Fine-tuning IndoBERT for hoax classification...")
    history = classifier.train(
        csv_path='/content/drive/MyDrive/PengolahanData/Data_Fix/preprocessed_text_label.csv',
        validation_split=0.2,
        epochs=3,
        batch_size=16,
        learning_rate=2e-5
    )

    # Only save model if training was successful
    if history is not None and classifier.model is not None:
        classifier.save_model()
    else:
        print("Skipping model save due to training failure or empty dataset.")

    # Test predictions
    test_texts = [
        "Pemerintah akan memberikan bantuan langsung tunai kepada seluruh rakyat Indonesia",
        "Menteri Kesehatan mengumumkan kebijakan baru terkait protokol kesehatan",
        "Vaksin COVID-19 berbahaya dan dapat mengubah DNA manusia secara permanen",
        "",
        "Pendek text"
    ]

    print("\nTesting IndoBERT predictions:")
    if classifier.model is not None:
        predictions = classifier.predict(test_texts)
        for pred in predictions:
            if pred['text']:  # Only show non-empty texts
                print(f"\nText: {pred['text'][:80]}...")
                print(f"Prediction: {pred['prediction']}")
                print(f"Confidence: {pred['confidence']:.3f}")
                print(f"Is Hoax: {pred['is_hoax']}")
    else:
        print("Cannot make predictions: Model is not available.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/229k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Fine-tuning IndoBERT for hoax classification...
Preparing data
Loading data from /content/drive/MyDrive/PengolahanData/Data_Fix/preprocessed_text_label.csv
Original dataset shape: (31678, 2)
Shape after dropping NA: (31660, 2)
Dataset distribution before balancing:
label
0    21710
1     9950
Name: count, dtype: int64
Dataset distribution after balancing:
label
0    9950
1    9950
Name: count, dtype: int64
Shape after balancing: (19900, 2)
Shape after preprocessing text: (19900, 2)
Shape after removing empty texts: (19895, 2)
Final dataset shape: (19895, 2)
Training samples: 15916
Validation samples: 3979
Tokenizing texts...
Building model...


tf_model.h5:   0%|          | 0.00/656M [00:00<?, ?B/s]

Some layers from the model checkpoint at indobenchmark/indobert-base-p1 were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at indobenchmark/indobert-base-p1.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.



Model Architecture:



Starting fine-tuning with 3 epochs...
Epoch 1/3
    995/Unknown [1m161s[0m 132ms/step - accuracy: 0.7375 - loss: 0.5254 - precision: 0.7513 - recall: 0.6912



[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 166ms/step - accuracy: 0.7376 - loss: 0.5253 - precision: 0.7514 - recall: 0.6913 - val_accuracy: 0.9193 - val_loss: 0.2349 - val_precision: 0.9108 - val_recall: 0.9296 - learning_rate: 2.0000e-05
Epoch 2/3
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m161s[0m 140ms/step - accuracy: 0.8898 - loss: 0.2877 - precision: 0.8824 - recall: 0.8972 - val_accuracy: 0.9264 - val_loss: 0.2060 - val_precision: 0.9240 - val_recall: 0.9291 - learning_rate: 2.0000e-05
Epoch 3/3
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 139ms/step - accuracy: 0.9065 - loss: 0.2504 - precision: 0.8985 - recall: 0.9147 - val_accuracy: 0.9294 - val_loss: 0.1928 - val_precision: 0.9257 - val_recall: 0.9336 - learning_rate: 2.0000e-05
Restoring model weights from the end of the best epoch: 3.

Evaluating model...
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 130ms/step

Validation Results:
      



In [5]:
texts = [
    "Jokowi akan digantikan oleh alien dalam upacara kenegaraan minggu depan.",
    "Presiden mengunjungi nelayan di Bitung untuk menyerahkan bantuan alat tangkap.",
    "juru bicara klaim prabowo percaya maju pilpres restu jokowi juru bicara menteri pertahanan prabowo subianto dahnil anzar simanjuntak prabowo percaya maju calon presiden capres pilpres restu presiden joko widodo jokowi dahnil restu suara dukungan prabowo bertambah restu restu jokowi prabowo semangat pemilih prabowo bertambah dahnil acara political show cnn indonesia tv senin malam dahnil prabowo salah tokoh memiliki adab capres prabowo izin jokowi melenggang kontestasi politik prabowo agenda politik lakukan mengganggu kinerja tugastugasnya menteri pertahanan kepemimpinan jokowi prabowo beliau jokowi silahkan mengizinkan prabowo proses kontestasi dahnil gerindra mendukung sepenuhnya pencapresan prabowo suara grass root gerindra sambungnya jokowi memperkenalkan tokoh berpotensi capres cawapres pilpres peringatan ulang partai persatuan pembangunan ppp jumat salah tokoh jokowi prabowo jokowi menyinggung kans prabowo calon presiden pilpres mengungkit reputasi kemenangan prabowo pilpres kali pilpres menang mohon maaf prabowo jatahnya prabowo jokowi puncak peringatan ulang perindo jakarta senin lnadal"
]

for text in texts:
    result = classifier.predict(text)
    print(f"Teks: {text}\nHasil: {result}\n")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
Teks: Jokowi akan digantikan oleh alien dalam upacara kenegaraan minggu depan.
Hasil: [{'text': 'Jokowi akan digantikan oleh alien dalam upacara kenegaraan minggu depan.', 'prediction': 'HOAX', 'confidence': 0.7233927845954895, 'is_hoax': True}]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Teks: Presiden mengunjungi nelayan di Bitung untuk menyerahkan bantuan alat tangkap.
Hasil: [{'text': 'Presiden mengunjungi nelayan di Bitung untuk menyerahkan bantuan alat tangkap.', 'prediction': 'HOAX', 'confidence': 0.918043315410614, 'is_hoax': True}]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Teks: juru bicara klaim prabowo percaya maju pilpres restu jokowi juru bicara menteri pertahanan prabowo subianto dahnil anzar simanjuntak prabowo percaya maju calon presiden capres pilpres restu presiden joko widodo jokowi dahnil restu suara dukungan prabowo bertambah restu restu