In [9]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
df = pd.read_csv('ajaib.co.id_reviews.csv')
df.head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion
0,1e9f1226-b2ce-4101-9cea-bd57262c0230,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,"Jelek sekali, pelayanan CS nya lama, kita mau ...",1,0,2.65.1,2025-07-16 11:28:49,"Hi Kak , mohon maaf atas ketidaknyamanannya da...",2025-07-16 14:53:20,2.65.1
1,08ad0973-0c57-48b4-909c-dc63717e18a9,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,apk nya sangat bagus dan sangat mudah untuk di...,5,0,2.71.0,2025-07-16 11:26:56,,,2.71.0
2,063fab80-9b48-4be5-b747-658544cb8909,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,Saya pemula yg satu bulan belakangan coba trad...,5,0,2.71.0,2025-07-16 10:52:55,,,2.71.0
3,06009f20-f077-4d95-80e5-40d2c9fa96fe,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,Pertama kali pake ajaib sangat gampang buat na...,5,0,,2025-07-16 10:52:12,,,
4,17bb8845-258f-472e-abe2-d278f4ba8275,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,aseet saya hilang di portofolio.. ada apa ini....,2,0,2.73.0,2025-07-16 10:36:53,"Hi kak. Mohon maaf atas ketidaknyamanannya, te...",2025-07-16 14:53:52,2.73.0


In [4]:
def label_sentiment(score):
    if score > 3:
        return 'positive'
    elif score == 3:
        return 'neutral'
    else:
        return 'negative'

df['sentiment'] = df['score'].apply(label_sentiment)
df.head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion,sentiment
0,1e9f1226-b2ce-4101-9cea-bd57262c0230,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,"Jelek sekali, pelayanan CS nya lama, kita mau ...",1,0,2.65.1,2025-07-16 11:28:49,"Hi Kak , mohon maaf atas ketidaknyamanannya da...",2025-07-16 14:53:20,2.65.1,negative
1,08ad0973-0c57-48b4-909c-dc63717e18a9,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,apk nya sangat bagus dan sangat mudah untuk di...,5,0,2.71.0,2025-07-16 11:26:56,,,2.71.0,positive
2,063fab80-9b48-4be5-b747-658544cb8909,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,Saya pemula yg satu bulan belakangan coba trad...,5,0,2.71.0,2025-07-16 10:52:55,,,2.71.0,positive
3,06009f20-f077-4d95-80e5-40d2c9fa96fe,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,Pertama kali pake ajaib sangat gampang buat na...,5,0,,2025-07-16 10:52:12,,,,positive
4,17bb8845-258f-472e-abe2-d278f4ba8275,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,aseet saya hilang di portofolio.. ada apa ini....,2,0,2.73.0,2025-07-16 10:36:53,"Hi kak. Mohon maaf atas ketidaknyamanannya, te...",2025-07-16 14:53:52,2.73.0,negative


In [5]:
reviews = df['content'].values
labels = pd.get_dummies(df['sentiment']).values

X_train, X_test, y_train, y_test = train_test_split(reviews, labels, test_size=0.2, random_state=42)

In [6]:
tokenizer = Tokenizer(num_words=5000, oov_token='<unk>')
tokenizer.fit_on_texts(X_train)

X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_test_sequences = tokenizer.texts_to_sequences(X_test)

X_train_padded = pad_sequences(X_train_sequences, maxlen=120, padding='post', truncating='post')
X_test_padded = pad_sequences(X_test_sequences, maxlen=120, padding='post', truncating='post')

In [7]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=5000, output_dim=64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.GlobalMaxPool1D(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(3, activation='softmax')
])

In [8]:
model.compile(loss='categorical_crossentropy', optimizer='adamax', metrics=['accuracy'])

In [10]:
early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)

In [11]:
history = model.fit(X_train_padded, y_train, epochs=20, validation_data=(X_test_padded, y_test), callbacks=[early_stopping])

Epoch 1/20
[1m1830/1830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 136ms/step - accuracy: 0.7912 - loss: 0.5804 - val_accuracy: 0.8730 - val_loss: 0.3688
Epoch 2/20
[1m1830/1830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m246s[0m 134ms/step - accuracy: 0.8784 - loss: 0.3705 - val_accuracy: 0.8850 - val_loss: 0.3372
Epoch 3/20
[1m1830/1830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 133ms/step - accuracy: 0.8873 - loss: 0.3379 - val_accuracy: 0.8899 - val_loss: 0.3259
Epoch 4/20
[1m1830/1830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 131ms/step - accuracy: 0.8948 - loss: 0.3193 - val_accuracy: 0.8933 - val_loss: 0.3213
Epoch 5/20
[1m1830/1830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m245s[0m 134ms/step - accuracy: 0.8976 - loss: 0.3142 - val_accuracy: 0.8951 - val_loss: 0.3137
Epoch 6/20
[1m1830/1830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m264s[0m 135ms/step - accuracy: 0.9000 - loss: 0.2979 - val_accuracy: 0.8935 - val_loss:

In [12]:
loss, accuracy = model.evaluate(X_test_padded, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 34ms/step - accuracy: 0.8978 - loss: 0.3118
Test Accuracy: 89.81%
