In [42]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report, confusion_matrix
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from function import text_preprocessing_process, sentiment_analysis_lexicon_indonesia

In [43]:
# Baca data dari file CSV
df = pd.read_csv('ML2.csv')

In [44]:
# Membuat peta untuk encoding
polarity_encode = {'negative': 0, 'neutral': 1, 'positive': 2}

# Mengonversi label 'polarity' ke nilai numerik
df['polarity_encoded'] = df['polarity'].map(polarity_encode)

In [45]:
# Pisahkan data menjadi data latih dan uji
X_train, X_test, y_train, y_test = train_test_split(df['clean_teks'], df['polarity_encoded'], test_size=0.2, random_state=42)


In [46]:
# Tokenisasi teks
tokenizer = Tokenizer(num_words=5000, oov_token='<OOV>')
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [47]:
# Padding sequence agar memiliki panjang yang sama
X_train_pad = pad_sequences(X_train_seq, maxlen=100, truncating='post', padding='post')
X_test_pad = pad_sequences(X_test_seq, maxlen=100, truncating='post', padding='post')

In [48]:
df

Unnamed: 0.1,Unnamed: 0,userName,score,at,content,clean_teks,polarity_score,polarity,polarity_encoded
0,0,Kayna Adiva,1,2023-10-31T17:01:24.000,Saya rank legend 5 malah terus bertemu musuh y...,"peringkat,legend,temu,musuh,legend,kalah,tim,m...",-6,negative,0
1,1,kasfy nisya,4,2023-11-03T10:00:36.000,"Bagus, cuman kadang dlm pertandingan ga seimba...","bagus,cuman,kadang,dlm,tanding,tidak,imbang,so...",-15,negative,0
2,2,Sun thin Then,1,2023-10-29T15:10:42.000,"Gamenya sih udah bagus bgt, grafiknya mantap, ...","game,sih,sudah,bagus,banget,grafik,mantap,back...",12,positive,2
3,3,Star space,1,2023-10-29T11:50:33.000,Game nya sudah bagus tetapi ada beberapa masal...,"game,bagus,resah,dark,sistem,sudah,hapus,tu,na...",-18,negative,0
4,4,Abdul Ghani Rossyidi,3,2023-10-29T20:25:49.000,Untuk event2 sdah oke lah. Tapi tolong priorit...,"event,sdah,oke,tolong,prioritas,nyaman,main,ja...",-7,negative,0
...,...,...,...,...,...,...,...,...,...
95,95,Ardi Putra,2,2023-10-10T22:50:04.000,"Keluh kesah banget nih hari ini, Pliss lah dar...","keluh,kesah,banget,ini,tolong,moonton,tolong,k...",-21,negative,0
96,96,Gilang Gilang,1,2023-09-27T06:10:55.000,Aneh padahal penyimpanan masih banyak masa lag...,"aneh,simpan,lag,langsung,relog,gameplay,ku,rus...",-7,negative,0
97,97,Ktek alas,1,2023-10-03T09:08:58.000,Untuk Moonton tolong perbaiki masalah jaringan...,"moonton,tolong,baik,jaringan,bug,jaringan,alam...",-2,negative,0
98,98,Umrotull 029,1,2023-09-27T18:10:49.000,Kenapa ya sekarang kalau update lama banget pd...,"iya,terbaru,banget,padahal,pakai,wifi,pakai,da...",0,neutral,1


In [49]:
# Bangun model LSTM
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=16, input_length=100))
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [50]:
# Latih model
model.fit(X_train_pad, y_train, epochs=5, validation_data=(X_test_pad, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1ddcb2489d0>

In [51]:
# Evaluasi model
loss, accuracy = model.evaluate(X_test_pad, y_test)
print(f"Loss: {loss}")
print(f"Accuracy: {accuracy}")

Loss: 0.6966400146484375
Accuracy: 0.05000000074505806


In [52]:
# Prediksi pada data uji
predictions = model.predict(X_test_pad)

# Konversi nilai probabilitas menjadi kelas
predicted_labels = [1 if pred > 0.5 else 0 for pred in predictions]

# Evaluasi klasifikasi
print("Confusion Matrix:")
print(confusion_matrix(y_test, predicted_labels))

print("\nClassification Report:")
print(classification_report(y_test, predicted_labels))

Confusion Matrix:
[[ 0 15  0]
 [ 0  1  0]
 [ 0  4  0]]

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        15
           1       0.05      1.00      0.10         1
           2       0.00      0.00      0.00         4

    accuracy                           0.05        20
   macro avg       0.02      0.33      0.03        20
weighted avg       0.00      0.05      0.00        20



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [53]:
model.save('model.h5')