In [6]:
# ==========================================
# 1. PERSIAPAN & IMPORT LIBRARY
# ==========================================
import tensorflow as tf
import pandas as pd
import numpy as np
import os

from google.colab import drive
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

# Pastikan TensorFlow menggunakan GPU jika tersedia
print("TensorFlow Version:", tf.__version__)
print("GPU Available:", tf.config.list_physical_devices('GPU'))

# ==========================================
# 2. LOAD DATASET (Google Drive)
# ==========================================
drive.mount('/content/drive')

BASE_PATH = '/content/drive/MyDrive/DEEP LEARNING/dataset'

train_path = os.path.join(BASE_PATH, 'dataset_sentimen_train.csv')
test_path  = os.path.join(BASE_PATH, 'dataset_sentimen_test.csv')

print(f"Loading Train Data from: {train_path}")
df_train = pd.read_csv(train_path)

print(f"Loading Test Data from: {test_path}")
df_test = pd.read_csv(test_path)

print("\nContoh Data Train:")
print(df_train.head())

# ==========================================
# 3. PRE-PROCESSING
# ==========================================
# A. Label Encoding
le = LabelEncoder()
train_labels = le.fit_transform(df_train['label'])
num_classes = len(np.unique(train_labels))

# B. Tokenisasi & Padding
vocab_size = 5000
max_len = 50
embedding_dim = 64

tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(df_train['text'])

train_seq = tokenizer.texts_to_sequences(df_train['text'])
test_seq  = tokenizer.texts_to_sequences(df_test['text'])

x_train = pad_sequences(train_seq, maxlen=max_len, padding='post', truncating='post')
x_test  = pad_sequences(test_seq,  maxlen=max_len, padding='post', truncating='post')

# ==========================================
# 4. KOMPONEN TRANSFORMER
# ==========================================
class PositionalEmbedding(tf.keras.layers.Layer):
    def __init__(self, vocab_size, d_model, max_len):
        super().__init__()
        self.d_model = d_model
        self.embedding = tf.keras.layers.Embedding(vocab_size, d_model)
        self.pos_embedding = tf.keras.layers.Embedding(max_len, d_model)

    def call(self, x):
        positions = tf.range(start=0, limit=tf.shape(x)[1], delta=1)
        x = self.embedding(x)
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        return x + self.pos_embedding(positions)

class CausalSelfAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads):
        super().__init__()
        self.mha = tf.keras.layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=d_model
        )

    def call(self, x):
        return self.mha(x, x, use_causal_mask=True)

def feed_forward(d_model, dff):
    return tf.keras.Sequential([
        tf.keras.layers.Dense(dff, activation='relu'),
        tf.keras.layers.Dense(d_model)
    ])

class DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, rate):
        super().__init__()
        self.att = CausalSelfAttention(d_model, num_heads)
        self.ffn = feed_forward(d_model, dff)
        self.norm1 = tf.keras.layers.LayerNormalization()
        self.norm2 = tf.keras.layers.LayerNormalization()
        self.drop1 = tf.keras.layers.Dropout(rate)
        self.drop2 = tf.keras.layers.Dropout(rate)

    def call(self, x, training):
        attn = self.att(x)
        x = self.norm1(x + self.drop1(attn, training=training))
        ffn = self.ffn(x)
        return self.norm2(x + self.drop2(ffn, training=training))

class SentimentTransformer(tf.keras.Model):
    def __init__(self, vocab_size, d_model, num_layers, num_heads, dff, num_classes, max_len, rate):
        super().__init__()
        self.embed = PositionalEmbedding(vocab_size, d_model, max_len)
        # Renamed 'layers' to 'transformer_layers' to avoid conflict with Keras Model's reserved 'layers' attribute
        self.transformer_layers = [DecoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)]
        self.pool = tf.keras.layers.GlobalAveragePooling1D()
        self.out = tf.keras.layers.Dense(num_classes, activation='softmax')

    def call(self, x, training):
        x = self.embed(x)
        for layer in self.transformer_layers:
            x = layer(x, training=training) # Pass training as a keyword argument
        x = self.pool(x)
        return self.out(x)

# ==========================================
# 5. TRAINING MODEL
# ==========================================
model = SentimentTransformer(
    vocab_size=vocab_size,
    d_model=embedding_dim,
    num_layers=2,
    num_heads=4,
    dff=128,
    num_classes=num_classes,
    max_len=max_len,
    rate=0.1
)

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print("\nMulai Training...")
model.fit(x_train, train_labels, epochs=20, batch_size=16)

# ==========================================
# 6. PREDIKSI & SIMPAN HASIL
# ==========================================
print("\nPrediksi Data Test...")
pred = model.predict(x_test)
pred_idx = np.argmax(pred, axis=1)
df_test['prediksi_label'] = le.inverse_transform(pred_idx)

output_path = os.path.join(BASE_PATH, 'hasil_prediksi_sentimen_transformer.csv')
df_test.to_csv(output_path, index=False)

print("\nSUKSES!")
print(f"Hasil tersimpan di: {output_path}")
print(df_test.head())

TensorFlow Version: 2.19.0
GPU Available: []
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Loading Train Data from: /content/drive/MyDrive/DEEP LEARNING/dataset/dataset_sentimen_train.csv
Loading Test Data from: /content/drive/MyDrive/DEEP LEARNING/dataset/dataset_sentimen_test.csv

Contoh Data Train:
                                         text    label
0    Fiturnya sangat rusak tidak recommended.  negatif
1  Kualitasnya cukup jelek tidak recommended.  negatif
2        Produk tersebut sangat ramah sekali!  positif
3                         Ini adalah plastik.   netral
4             Lokasinya ada di nasi dan ayam.   netral

Mulai Training...
Epoch 1/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 66ms/step - accuracy: 0.3705 - loss: 1.5083
Epoch 2/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 64ms/step - accuracy: 0.8688 - loss: 0.5633
Epoch 3/20
[1m19/19[0m 