In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.metrics import classification_report, confusion_matrix
from keras.optimizers import Adam, RMSprop
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from timeout_decorator import timeout, TimeoutError
import multiprocessing
import functools


In [2]:
# Baca data dari file CSV
df = pd.read_csv('ML2.csv')

In [3]:
# Membuat peta untuk encoding
polarity_encode = {'negative': 0, 'neutral': 1, 'positive': 2}

# Mengonversi label 'polarity' ke nilai numerik
df['polarity_encoded'] = df['polarity'].map(polarity_encode)

In [4]:
# Pisahkan data menjadi data latih dan uji
X_train, X_test, y_train, y_test = train_test_split(df['clean_teks'], df['polarity_encoded'], test_size=0.2, random_state=42)


In [5]:
# Tokenisasi teks
tokenizer = Tokenizer(num_words=5000, oov_token='<OOV>')
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [6]:
# Padding sequence agar memiliki panjang yang sama
X_train_pad = pad_sequences(X_train_seq, maxlen=100, truncating='post', padding='post')
X_test_pad = pad_sequences(X_test_seq, maxlen=100, truncating='post', padding='post')


In [7]:
df

Unnamed: 0.1,Unnamed: 0,userName,score,at,content,clean_teks,polarity_score,polarity,polarity_encoded
0,0,Kayna Adiva,1,2023-10-31T17:01:24.000,Saya rank legend 5 malah terus bertemu musuh y...,"peringkat,legend,temu,musuh,legend,kalah,tim,m...",-6,negative,0
1,1,kasfy nisya,4,2023-11-03T10:00:36.000,"Bagus, cuman kadang dlm pertandingan ga seimba...","bagus,cuman,kadang,dlm,tanding,tidak,imbang,so...",-15,negative,0
2,2,Sun thin Then,1,2023-10-29T15:10:42.000,"Gamenya sih udah bagus bgt, grafiknya mantap, ...","game,sih,sudah,bagus,banget,grafik,mantap,back...",12,positive,2
3,3,Star space,1,2023-10-29T11:50:33.000,Game nya sudah bagus tetapi ada beberapa masal...,"game,bagus,resah,dark,sistem,sudah,hapus,tu,na...",-18,negative,0
4,4,Abdul Ghani Rossyidi,3,2023-10-29T20:25:49.000,Untuk event2 sdah oke lah. Tapi tolong priorit...,"event,sdah,oke,tolong,prioritas,nyaman,main,ja...",-7,negative,0
...,...,...,...,...,...,...,...,...,...
95,95,Ardi Putra,2,2023-10-10T22:50:04.000,"Keluh kesah banget nih hari ini, Pliss lah dar...","keluh,kesah,banget,ini,tolong,moonton,tolong,k...",-21,negative,0
96,96,Gilang Gilang,1,2023-09-27T06:10:55.000,Aneh padahal penyimpanan masih banyak masa lag...,"aneh,simpan,lag,langsung,relog,gameplay,ku,rus...",-7,negative,0
97,97,Ktek alas,1,2023-10-03T09:08:58.000,Untuk Moonton tolong perbaiki masalah jaringan...,"moonton,tolong,baik,jaringan,bug,jaringan,alam...",-2,negative,0
98,98,Umrotull 029,1,2023-09-27T18:10:49.000,Kenapa ya sekarang kalau update lama banget pd...,"iya,terbaru,banget,padahal,pakai,wifi,pakai,da...",0,neutral,1


In [8]:
# Buat fungsi model dengan nilai hyperparameter default
def create_model(embed_dim=16, hidden_unit=16, dropout_rate=0.2, optimizers=Adam, learning_rate=0.001):
    model = Sequential()
    model.add(Embedding(input_dim=5000, output_dim=embed_dim, input_length=100))
    model.add(LSTM(units=hidden_unit, activation='tanh'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(units=1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=optimizers(lr=learning_rate), metrics=['accuracy'])
    print(model.summary())
    return model


In [9]:
# Model pembungkus dengan nilai hyperparameter terbaik
model = KerasClassifier(build_fn=create_model,
                        dropout_rate=0.2,
                        embed_dim=32,
                        hidden_unit=16,
                        optimizers=RMSprop,
                        learning_rate=0.001,
                        epochs=10,
                        batch_size=128,
                        validation_split=0.1)

  model = KerasClassifier(build_fn=create_model,


In [10]:
# Evaluasi model dengan GridSearchCV untuk mengetahui hyperparameter terbaik untuk model
param_grid = dict(embed_dim=[32, 64],
                  hidden_unit=[16, 32, 64],
                  dropout_rate=[0.2],
                  optimizers=[Adam, RMSprop],
                  learning_rate=[0.01, 0.001, 0.0001],
                  epochs=[10, 25, 50, 100],
                  batch_size=[128, 256])

In [11]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)
grid_result = grid.fit(X_train_pad, y_train)


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 32)           160000    
                                                                 
 lstm (LSTM)                 (None, 16)                3136      
                                                                 
 dropout (Dropout)           (None, 16)                0         
                                                                 
 dense (Dense)               (None, 1)                 17        
                                                                 
Total params: 163,153
Trainable params: 163,153
Non-trainable params: 0
_________________________________________________________________


  super().__init__(name, **kwargs)


None
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 100, 32)           160000    
                                                                 
 lstm_1 (LSTM)               (None, 16)                3136      
                                                                 
 dropout_1 (Dropout)         (None, 16)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 17        
                                                                 
Total params: 163,153
Trainable params: 163,153
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoc

  super().__init__(name, **kwargs)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 100, 32)           160000    
                                                                 
 lstm_4 (LSTM)               (None, 16)                3136      
                                                                 
 dropout_4 (Dropout)         (None, 16)                0         
                                                                 
 dense_4 (Dense)             (None, 1)                 17        
                                                                 
Total params: 163,153
Trainable params: 163,153
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/1

KeyboardInterrupt: 

In [None]:
# Menampilkan hasil terbaik
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))


In [None]:
# Membuat model dengan hyperparameter terbaik yang telah ditentukan
best_params = grid_result.best_params_
model = KerasClassifier(build_fn=create_model, **best_params)

model_prediction = model.fit(X_train_pad, y_train)

In [None]:
# Melanjutkan eksekusi kode setelah pencarian hiperparameter

# Evaluasi model
loss, accuracy = model.evaluate(X_test_pad, y_test)
print(f"Loss: {loss}")
print(f"Accuracy: {accuracy}")

In [None]:
# Prediksi pada data uji
predictions = model.predict(X_test_pad)

# Konversi nilai probabilitas menjadi kelas
predicted_labels = [1 if pred > 0.5 else 0 for pred in predictions]

# Evaluasi klasifikasi
print("Confusion Matrix:")
print(confusion_matrix(y_test, predicted_labels))

print("\nClassification Report:")
print(classification_report(y_test, predicted_labels))

In [None]:
# model.save('model.h5')