In [31]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [32]:
df = pd.read_csv('ajaib.co.id_reviews.csv')
df.head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion
0,7eb5ebd4-1205-4764-82c2-70a1c9a03851,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,sangat bermanfaat,5,0,2.71.0,2025-07-16 06:39:22,,,2.71.0
1,e30627a1-a026-470e-a7d3-7ddae65f0697,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,keren,5,0,2.71.0,2025-07-16 06:12:23,,,2.71.0
2,00433893-322e-4240-9cc5-34a884f70e20,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,Mudah dipahami oleh pemula,5,0,2.71.0,2025-07-16 05:05:58,,,2.71.0
3,58bf02b6-a385-490c-abcc-7a65162fc234,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,keren,5,0,2.71.0,2025-07-16 05:03:30,,,2.71.0
4,1e9f1226-b2ce-4101-9cea-bd57262c0230,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,"Jelek sekali, pelayanan CS nya lama, kita mau ...",1,0,2.65.1,2025-07-16 04:28:49,"Hi Kak , mohon maaf atas ketidaknyamanannya da...",2025-07-16 07:53:20,2.65.1


In [33]:
def label_sentiment(score):
    if score > 3:
        return 'positive'
    elif score == 3:
        return 'neutral'
    else:
        return 'negative'

df['sentiment'] = df['score'].apply(label_sentiment)
df.head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion,sentiment
0,7eb5ebd4-1205-4764-82c2-70a1c9a03851,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,sangat bermanfaat,5,0,2.71.0,2025-07-16 06:39:22,,,2.71.0,positive
1,e30627a1-a026-470e-a7d3-7ddae65f0697,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,keren,5,0,2.71.0,2025-07-16 06:12:23,,,2.71.0,positive
2,00433893-322e-4240-9cc5-34a884f70e20,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,Mudah dipahami oleh pemula,5,0,2.71.0,2025-07-16 05:05:58,,,2.71.0,positive
3,58bf02b6-a385-490c-abcc-7a65162fc234,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,keren,5,0,2.71.0,2025-07-16 05:03:30,,,2.71.0,positive
4,1e9f1226-b2ce-4101-9cea-bd57262c0230,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,"Jelek sekali, pelayanan CS nya lama, kita mau ...",1,0,2.65.1,2025-07-16 04:28:49,"Hi Kak , mohon maaf atas ketidaknyamanannya da...",2025-07-16 07:53:20,2.65.1,negative


In [34]:
reviews = df['content'].values
labels = pd.get_dummies(df['sentiment']).values

X_train, X_test, y_train, y_test = train_test_split(reviews, labels, test_size=0.2, random_state=42)

In [35]:
tokenizer = Tokenizer(num_words=5000, oov_token='<unk>')
tokenizer.fit_on_texts(X_train)

X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_test_sequences = tokenizer.texts_to_sequences(X_test)

X_train_padded = pad_sequences(X_train_sequences, maxlen=120, padding='post', truncating='post')
X_test_padded = pad_sequences(X_test_sequences, maxlen=120, padding='post', truncating='post')

In [36]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=5000, output_dim=16),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')
])

In [37]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [38]:
history = model.fit(X_train_padded, y_train, epochs=10, validation_data=(X_test_padded, y_test))

Epoch 1/10
[1m1830/1830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 75ms/step - accuracy: 0.7240 - loss: 0.7305 - val_accuracy: 0.7243 - val_loss: 0.7186
Epoch 2/10
[1m1830/1830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 75ms/step - accuracy: 0.7220 - loss: 0.7243 - val_accuracy: 0.7243 - val_loss: 0.7180
Epoch 3/10
[1m1830/1830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 74ms/step - accuracy: 0.7254 - loss: 0.7209 - val_accuracy: 0.7243 - val_loss: 0.7171
Epoch 4/10
[1m1830/1830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 74ms/step - accuracy: 0.7296 - loss: 0.7095 - val_accuracy: 0.7243 - val_loss: 0.7170
Epoch 5/10
[1m1830/1830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 74ms/step - accuracy: 0.7226 - loss: 0.7211 - val_accuracy: 0.7243 - val_loss: 0.7169
Epoch 6/10
[1m1830/1830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 75ms/step - accuracy: 0.7242 - loss: 0.7202 - val_accuracy: 0.7243 - val_loss: 0.717

In [39]:
loss, accuracy = model.evaluate(X_test_padded, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 22ms/step - accuracy: 0.7221 - loss: 0.7224
Test Accuracy: 72.43%


In [44]:
def predict_sentiment(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=120, padding='post', truncating='post')
    prediction = model.predict(padded_sequence)
    sentiment = ['negative', 'neutral', 'positive'][np.argmax(prediction)]
    return sentiment

# Example usage
new_review = "Aplikasi ini sangat membantu!"
print(f'The review "{new_review}" is {predict_sentiment(new_review)}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
The review "Aplikasi ini sangat membantu!" is positive
