# Analisis Sentimen pada Aplikasi My Telkomsel

## 1. Import Library dan Import Data

In [4]:
import pandas as pd  # Pandas untuk manipulasi dan analisis data
pd.options.mode.chained_assignment = None  # Menonaktifkan peringatan chaining
import numpy as np  # NumPy untuk komputasi numerik
seed = 0
np.random.seed(seed)  # Mengatur seed untuk reproduktibilitas
import matplotlib.pyplot as plt  # Matplotlib untuk visualisasi data
import seaborn as sns  # Seaborn untuk visualisasi data statistik, mengatur gaya visualisasi
from sklearn.metrics import accuracy_score

import re  # Modul untuk bekerja dengan ekspresi reguler
import string  # Berisi konstanta string, seperti tanda baca
import unicodedata # Modul untuk bekerja dengan Unicode
import requests # Modul yang memungkinkan untuk mengirim htttp request
import os # Modul untuk pekerja terkait os seperti manajemen file
from dotenv import load_dotenv
load_dotenv()

from nltk.tokenize import word_tokenize  # Tokenisasi teks
from nltk.corpus import stopwords  # Daftar kata-kata berhenti dalam teks

from Sastrawi.Stemmer import StemmerFactory
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory  # Stemming (penghilangan imbuhan kata) dalam bahasa Indonesia
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory  # Menghapus kata-kata berhenti dalam bahasa Indonesia

from wordcloud import WordCloud  # Membuat visualisasi berbentuk awan kata (word cloud) dari teks

import nltk  # Import pustaka NLTK (Natural Language Toolkit).
nltk.download('punkt')  # Mengunduh dataset yang diperlukan untuk tokenisasi teks.
nltk.download('stopwords')  # Mengunduh dataset yang berisi daftar kata-kata berhenti (stopwords) dalam berbagai bahasa.
nltk.download('punkt_tab') # Download the punkt_tab data package
pd.options.mode.copy_on_write = True

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\khair\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\khair\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\khair\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from tensorflow.keras.optimizers import RMSprop, AdamW, Adam
from sklearn.model_selection import train_test_split 

In [6]:
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

from tenacity import retry, stop_after_attempt, wait_fixed, RetryCallState
from tqdm import tqdm
import time

In [7]:
app_reviews_df = pd.read_csv('app_reviews.csv')

## 2. Dataset Overview

In [8]:
app_reviews_df.head(3)

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion
0,bb582a94-9844-40aa-8c97-506e07c22fb7,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,Jujur pake Shopee ini baguss banget sebenernya...,2,20,3.42.24,2025-01-21 20:19:08,"Hai kak, mohon maaf atas ketidaknyamanan nya. ...",2025-01-21 21:18:24,3.42.24
1,d8474dc1-da09-4605-9aab-de73f1d61dbe,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,Terus terang saya suka sekali belanja di shope...,3,17,3.42.24,2025-01-21 05:13:42,,,3.42.24
2,63dbc9c4-2da7-4825-b335-2a5a1e1575de,Pengguna Google,https://play-lh.googleusercontent.com/EGemoI2N...,"Aplikasinya bagus, mudah dipahami. Seeing bela...",5,309,3.42.24,2025-01-20 09:29:58,"Hi kak, maaf ya udh buat kamu ga nyaman. Terka...",2023-02-07 16:24:08,3.42.24


In [9]:
app_reviews_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000 entries, 0 to 2999
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   reviewId              3000 non-null   object
 1   userName              3000 non-null   object
 2   userImage             3000 non-null   object
 3   content               3000 non-null   object
 4   score                 3000 non-null   int64 
 5   thumbsUpCount         3000 non-null   int64 
 6   reviewCreatedVersion  3000 non-null   object
 7   at                    3000 non-null   object
 8   replyContent          2808 non-null   object
 9   repliedAt             2808 non-null   object
 10  appVersion            3000 non-null   object
dtypes: int64(2), object(9)
memory usage: 257.9+ KB


In [10]:
df = app_reviews_df[['content','score']]

In [11]:
# Mengecek data yang kosong atau hilang
df.isnull().sum()

content    0
score      0
dtype: int64

In [12]:
# Menghapus baris yang memiliki duplikat
df.drop_duplicates(inplace=True)

## 3. Text Preprocessing

In [13]:
def cleaningText(text):
    text = re.sub(r'@[A-Za-z0-9]+', '', text) # menghapus mention
    text = re.sub(r'#[A-Za-z0-9]+', '', text) # menghapus hashtag
    text = re.sub(r'RT[\s]', '', text) # menghapus RT
    text = re.sub(r"http\S+", '', text) # menghapus link
    text = re.sub(r'[0-9]+', '', text) # menghapus angka
    text = re.sub(r'[^\w\s]', '', text) # menghapus karakter selain huruf dan angka

    text = text.replace('\n', ' ') # mengganti baris baru dengan spasi
    text = text.translate(str.maketrans('', '', string.punctuation)) # menghapus semua tanda baca
    text = text.strip(' ') # menghapus karakter spasi dari kiri dan kanan teks
    return text

def remove_superscripts_and_subscripts(text):
    # memfilter karakter yang bukan superscript atau subscript
    def is_not_super_or_sub(char):
        # Cek kategori Unicode karakter
        return not (unicodedata.name(char, "").startswith("SUPERSCRIPT") or
                    unicodedata.name(char, "").startswith("SUBSCRIPT"))

    # Filter karakter dari teks
    text = ''.join(filter(is_not_super_or_sub, text))
    return text

def casefoldingText(text): 
    # Mengubah semua karakter dalam teks menjadi huruf kecil
    text = text.lower()
    return text

def tokenizingText(text): 
    # Memecah atau membagi string, teks menjadi daftar token
    text = word_tokenize(text)
    return text

def filteringText(text): 
    # Menghapus stopwords dalam teks
    listStopwords = set(stopwords.words('indonesian'))
    listStopwords1 = set(stopwords.words('english'))
    listStopwords.update(listStopwords1)
    listStopwords.update(['iya','yaa','gak','nya','na','sih','ku',"di","ga","ya","gaa","loh","kah","woi","woii","woy"])
    filtered = []
    for txt in text:
        if txt not in listStopwords:
            filtered.append(txt)
    text = filtered
    return text

def stemmingText(text): 
    # Mengurangi kata ke bentuk dasarnya yang menghilangkan imbuhan awalan dan akhiran atau ke akar kata
    
    # Membuat objek stemmer
    factory = StemmerFactory()
    stemmer = factory.create_stemmer()

    # Memecah teks menjadi daftar kata
    words = text.split()

    # Menerapkan stemming pada setiap kata dalam daftar
    stemmed_words = [stemmer.stem(word) for word in words]

    # Menggabungkan kata-kata yang telah distem
    stemmed_text = ' '.join(stemmed_words)

    return stemmed_text

def toSentence(list_words): # Mengubah daftar kata menjadi kalimat
    sentence = ' '.join(word for word in list_words)
    return sentence

In [14]:
# URL file slang words
url = "https://raw.githubusercontent.com/louisowen6/NLP_bahasa_resources/master/combined_slang_words.txt"

# Mengambil konten file dari URL
response = requests.get(url)
if response.status_code == 200:
    content = response.text
    # Memproses konten ke dalam dictionary
    slangwords = {}
    for line in content.strip().split(","):
        if ":" in line:
            key, value = line.split(":", 1)  # Memisahkan kata slang dan arti
            key = key.strip('{"}') # Menghilangkan tanda kurung kurawal pada key
            value = value.strip('{"}') # Menghilangkan tanda kurung kurawal pada value
            slangwords[key.strip(' "\'')] = value.strip(' "\'') # Menyimpan pasangan key-value pada dictionary slangwords
else:
    print(f"Gagal mengunduh file dari URL. Status code: {response.status_code}")


In [15]:
list(slangwords.items())[:5]

[('@', 'di'),
 ('abis', 'habis'),
 ('ad', 'ada'),
 ('adlh', 'adalah'),
 ('afaik', 'as far as i know')]

In [16]:
def fix_slangwords(text):
    # Memperbaiki kata-kata slang dalam teks
    words = text.split()
    fixed_words = []

    for word in words:
        if word.lower() in slangwords:
            fixed_words.append(slangwords[word.lower()])
        else:
            fixed_words.append(word)

    fixed_text = ' '.join(fixed_words)
    return fixed_text

In [17]:
# Membersihkan teks dan menyimpannya di kolom 'text_clean'
df['text_clean'] = df['content'].apply(cleaningText).apply(remove_superscripts_and_subscripts)

# Mengubah huruf dalam teks menjadi huruf kecil dan menyimpannya di 'text_casefoldingText'
df['text_casefoldingText'] = df['text_clean'].apply(casefoldingText)

# Mengganti kata-kata slang dengan kata-kata standar dan menyimpannya di 'text_slangwords'
df['text_slangwords'] = df['text_casefoldingText'].apply(fix_slangwords)

# Memecah teks menjadi token (kata-kata) dan menyimpannya di 'text_tokenizingText'
df['text_tokenizingText'] = df['text_slangwords'].apply(tokenizingText)

# Menghapus kata-kata stop (kata-kata umum) dan menyimpannya di 'text_stopword'
df['text_stopword'] = df['text_tokenizingText'].apply(filteringText)

# Menggabungkan token-token menjadi kalimat dan menyimpannya di 'text_akhir'
df['text_akhir'] = df['text_stopword'].apply(toSentence)

In [18]:
df.head()

Unnamed: 0,content,score,text_clean,text_casefoldingText,text_slangwords,text_tokenizingText,text_stopword,text_akhir
0,Jujur pake Shopee ini baguss banget sebenernya...,2,Jujur pake Shopee ini baguss banget sebenernya...,jujur pake shopee ini baguss banget sebenernya...,jujur pakai shopee ini baguss banget sebenerny...,"[jujur, pakai, shopee, ini, baguss, banget, se...","[jujur, pakai, shopee, baguss, banget, sebener...",jujur pakai shopee baguss banget sebenernya be...
1,Terus terang saya suka sekali belanja di shope...,3,Terus terang saya suka sekali belanja di shope...,terus terang saya suka sekali belanja di shope...,terus terang saya suka sekali belanja di shope...,"[terus, terang, saya, suka, sekali, belanja, d...","[terang, suka, belanja, shopee, kesini, shopee...",terang suka belanja shopee kesini shopee memba...
2,"Aplikasinya bagus, mudah dipahami. Seeing bela...",5,Aplikasinya bagus mudah dipahami Seeing belanj...,aplikasinya bagus mudah dipahami seeing belanj...,aplikasinya bagus mudah dipahami seeing belanj...,"[aplikasinya, bagus, mudah, dipahami, seeing, ...","[aplikasinya, bagus, mudah, dipahami, seeing, ...",aplikasinya bagus mudah dipahami seeing belanj...
3,1. Pesanan cod roksaya dibatalkan sendiri oleh...,3,Pesanan cod roksaya dibatalkan sendiri oleh si...,pesanan cod roksaya dibatalkan sendiri oleh si...,pesanan cod roksaya dibatalkan sendiri oleh si...,"[pesanan, cod, roksaya, dibatalkan, sendiri, o...","[pesanan, cod, roksaya, dibatalkan, sistem, me...",pesanan cod roksaya dibatalkan sistem melewati...
4,"Aplikasi elit benerin bug shopeepay sulit, tia...",2,Aplikasi elit benerin bug shopeepay sulit tiap...,aplikasi elit benerin bug shopeepay sulit tiap...,aplikasi elit benerin bug shopeepay sulit tiap...,"[aplikasi, elit, benerin, bug, shopeepay, suli...","[aplikasi, elit, benerin, bug, shopeepay, suli...",aplikasi elit benerin bug shopeepay sulit upda...


## 4. Labeling Kata

Analisis sentimen untuk menentukan polaritas setiap review pada data menggunakan model LLM. Model LLM yang digunakan dikonfigurasikan sedemikian rupa sehingga dapat memberikan analisis yang akurat untuk setiap data di dalam dataset. Model LLM disematkan dengan prompt khusus untuk melakukan tugas analisis sentimen.

### 4.1 Membangun Model LLM dengan Tugas Analisis Sentimen

In [16]:
# Mengekstrak kode API agar bisa menggunakan model LLM
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [19]:
from langchain_ollama import ChatOllama

In [20]:
llm = ChatOllama(model="deepseek-r1:1.5b")

In [38]:
# llm = ChatGroq(model='llama-3.3-70b-versatile')

In [21]:
system_template = """
APAPUN KUERI YANG DIBERIKAN CUKUP JAWAB DENGAN SATU KATA
Kategorikan KUERI menjadi negatif atau positif! CUKUP JAWAB DENGAN SATU KATA ANTARA 
\"positif\" DAN \"negatif\"
"""

prompt_template = ChatPromptTemplate([
    ("system", system_template),
    ("human", "{user_input}")
])

In [22]:
# Menyatukan template, model, dan ouput parser dalam satu pipeline
sentiment_analysis = prompt_template | llm | StrOutputParser()

In [23]:
# Fungsi untuk mem-parsing waktu tunggu dari pesan error
def get_wait_time_from_error(error_message):
    try:
        # Cari pola seperti "try again in 1m22.142s"
        match = re.search(r"try again in (\d+)m(\d+\.\d+)s", error_message['error']['message'])
        if match:
            minutes = int(match.group(1))  # Ambil bagian menit
            seconds = float(match.group(2))  # Ambil bagian detik
            return minutes * 60 + seconds  # Total waktu tunggu dalam detik
    except Exception:
        pass
    # Jika parsing gagal, gunakan waktu tunggu default
    return 120  # Default 120 detik

# Callback untuk menampilkan error dan waktu tunggu
def retry_callback(retry_state: RetryCallState):
    error = retry_state.outcome.exception()  # Ambil error dari percobaan terakhir
    if error:
        error_message = str(error)  # Ubah error menjadi string
        wait_time = get_wait_time_from_error(error_message)  # Dapatkan waktu tunggu
        print(f"Attempt {retry_state.attempt_number} failed. Error: {error_message}")
        print(f"Retrying in {wait_time:.2f} seconds...")
        time.sleep(wait_time)  # Tunggu sebelum mencoba lagi

# Dekorator retry dengan callback
@retry(
    stop=stop_after_attempt(10),  # Maksimal 10 percobaan
    wait=wait_fixed(1),  # Waktu tunggu fixed sementara, diabaikan karena manual di callback
    after=retry_callback  # Callback untuk menangani retry
)
def check_polarity(text_input):
    try:
        # Panggil model untuk analisis sentimen
        return sentiment_analysis.invoke({"user_input": text_input})
    except Exception as e:
        # Tangkap error dan lempar kembali untuk ditangani oleh retry
        raise e

In [24]:
test_input = "Sangat direkomendasikan!!! saya selalu beli produk ini setiap bulan karena performanya sangat bisa diandalkan"
check_polarity(test_input)

'<think>\nOkay, so I need to categorize the query "KUERI" as either positive or negative. First, I should understand what "KUERI" stands for. From what I know in English, QR codes are used in various applications like technology and media. They can contain data, images, videos, etc., but they don\'t have a traditional meaning of being something positive or negative.\n\nI\'m not entirely sure how to determine if the query itself is positive or negative. Maybe it\'s about whether there are any issues related to "KUERI." Since QR codes are commonly used and can be manipulated for different purposes, this might lead to some negative aspects. However, I also know that their security and purposefulness are important in many applications.\n\nI\'m leaning towards thinking that the presence of a QR code is neutral because it\'s versatile but not inherently positive or negative by itself. It depends on how it\'s used. So, when someone mentions "KUERI," it could be part of a positive use case whe

In [46]:
check_polarity('produk kau ga jelas, menyesal saya beli di toko penipu ini')

'Negatif'

In [None]:
# Pelabelan data berdasarkan model yang telah dibangun
tqdm.pandas()
for idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing Rows"):
    if pd.isna(row.get("polarity")):  # Skip jika sudah ada nilai
        try:
            df.at[idx, "polarity"] = check_polarity(row["text_akhir"])
        except Exception as e:
            print(f"Error at row {idx}: {e}")
            raise e

Processing Rows:   0%|          | 0/3000 [00:00<?, ?it/s]

Processing Rows:   6%|▌         | 180/3000 [07:21<6:47:43,  8.68s/it]

Attempt 1 failed. Error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01jbefye1nf41bk4v2nwafmptv` service tier `on_demand` on : Limit 100000, Used 100055, Requested 94. Please try again in 2m9.168s. Visit https://console.groq.com/docs/rate-limits for more information.', 'type': '', 'code': 'rate_limit_exceeded'}}
Retrying in 120.00 seconds...


Processing Rows:   6%|▌         | 181/3000 [09:31<35:23:34, 45.20s/it]

Attempt 1 failed. Error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01jbefye1nf41bk4v2nwafmptv` service tier `on_demand` on : Limit 100000, Used 100067, Requested 73. Please try again in 2m1.583s. Visit https://console.groq.com/docs/rate-limits for more information.', 'type': '', 'code': 'rate_limit_exceeded'}}
Retrying in 120.00 seconds...


Processing Rows:   6%|▌         | 182/3000 [11:34<53:40:58, 68.58s/it]

Attempt 1 failed. Error: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01jbefye1nf41bk4v2nwafmptv` service tier `on_demand` on : Limit 100000, Used 100058, Requested 118. Please try again in 2m32.59s. Visit https://console.groq.com/docs/rate-limits for more information.', 'type': '', 'code': 'rate_limit_exceeded'}}
Retrying in 120.00 seconds...


In [32]:
# Mengecek keluaran hasil analisis sentimen
df['polarity'].value_counts()

polarity
negatif                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        

In [45]:
# Menyeragamkan output
df['polarity'] = df['polarity'].apply(lambda x: x.lower())

In [None]:
df['polarity'].value_counts()

### 4.3 Visualisasi Analisis Sentimen

In [None]:
fig, ax = plt.subplots(figsize=(6, 6))
sizes = [count for count in df["polarity"].value_counts()]
labels = list(df["polarity"].value_counts().index)
explode = (0.1, 0, 0)
ax.pie(
    x=sizes,
    labels=labels,
    autopct="%1.2f%%",
    textprops={"fontsize": 14},
)
ax.set_title(f"Sentiment Polarity on df Data \n (total = {len(df)} df)", fontsize=16, pad=20)
plt.show()

In [57]:
positive_reviews = df[['text_akhir','polarity']].loc[df['polarity'] == 'positif']
negative_reviews = df[['text_akhir','polarity']].loc[df['polarity'] == 'negatif']

In [None]:
df.head(3)

In [None]:
# Visualisasi Word Cloud
fig, ax = plt.subplots(1, 2, figsize=(12, 10))

# Word Cloud pada review positif
list_postive_reviews = ""
for row_word in positive_reviews.iloc[:, 0]:
    list_postive_reviews += " " + (row_word)
wordcloud_positive = WordCloud(
    width=800, height=600, background_color="black", colormap="Greens", min_font_size=10
).generate(list_postive_reviews)
ax[0].set_title(
    "Word Cloud of Positive Reviews",
    fontsize=14,
)
ax[0].grid(False)
ax[0].imshow((wordcloud_positive))
fig.tight_layout(pad=0)
ax[0].axis("off")

# Word Cloud pada review negatif
list_negative_reviews = ""
for row_word in negative_reviews.iloc[:, 0]:
    list_negative_reviews += " " + (row_word)
wordcloud_negative = WordCloud(
    width=800, height=600, background_color="black", colormap="Reds", min_font_size=10
).generate(list_negative_reviews)
ax[1].set_title(
    "Word Cloud of Negative Reviews",
    fontsize=14,
)
ax[1].grid(False)
ax[1].imshow((wordcloud_negative))
fig.tight_layout(pad=0)
ax[1].axis("off")

plt.show()

## 5. Ekstraksi Fitur

In [59]:
def makeReviewDict(series):
    words = dict()
    for row in series:
        texts = row.split()
        for text in texts:
            if text not in words:
                words[text] = 1
            else:
                words[text] += 1
    return words       

In [60]:
reviewDict = makeReviewDict(df['text_akhir'])
ln = df['text_akhir'].apply(lambda x: len(x)).quantile(0.8)

In [63]:
df = df[df['polarity']!='netral']

In [131]:
# Ekstraksi Fitur dengan embedding
num_words = 6000
tokenizer = Tokenizer(num_words=num_words) 
tokenizer.fit_on_texts(df['text_akhir'])
X = tokenizer.texts_to_sequences(df['text_akhir'])
X = pad_sequences(X, maxlen=150, padding='post')

In [None]:
# Mengubah tipe data string menjadi biner, 0 untuk negative dan 1 untuk positive
y = df['polarity'].apply(lambda x: 1 if x == 'negatif' else 0)

In [133]:
# X_temp, X_test, y_temp, y_test = train_test_split(X,y, test_size=0.3, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=42)

In [96]:
from tensorflow.keras.layers import BatchNormalization

In [134]:
#Model Building
def create_model():
    model = Sequential()
    model.add(Embedding(num_words, X_train.shape[1])) # Mengubah indeks integer (kata) menjadi vektor dense
    model.add(SpatialDropout1D(0.2)) # Dropout khusus data sekuensial seperti teks
    # model.add(LSTM(256, dropout=0.3, recurrent_dropout=0.3, return_sequences=True))
    model.add(LSTM(256, dropout=0.2, recurrent_dropout=0.2))
    # model.add(Dense(128, activation='relu'))
    # model.add(BatchNormalization())
    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=RMSprop(learning_rate=0.001), loss = 'binary_crossentropy', metrics = ['accuracy'])
    return model

In [135]:
tf.keras.backend.clear_session()

In [136]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)

In [None]:
model = create_model()
model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=1, validation_data=(X_test, y_test), callbacks=[early_stopping])

In [None]:
model.summary()

In [None]:
#Model Testing
print(f'Akurasi test data\t: {model.evaluate(X_test,y_test)[1]*100:.2f} %')
print(f'Akurasi train data\t: {model.evaluate(X_train, y_train)[1]*100:.2f} %')

In [94]:
# # Fungsi untuk membuat model
# def build_model(hp):
#     # model = Sequential()
#     # # Hyperparameter jumlah neuron pada layer pertama
#     # model.add(Dense(units=hp.Int('units_1', min_value=32, max_value=512, step=32),
#     #                 activation=hp.Choice('activation_1', values=['relu', 'tanh', 'sigmoid']),
#     #                 input_dim=20))  # Misalnya 20 fitur pada data input

#     # # Hyperparameter jumlah neuron pada layer kedua (opsional)
#     # model.add(Dense(units=hp.Int('units_2', min_value=32, max_value=256, step=32),
#     #                 activation=hp.Choice('activation_2', values=['relu', 'tanh', 'sigmoid'])))

#     # # Output layer
#     # model.add(Dense(1, activation='sigmoid'))

#     # # Compile model
#     # model.compile(optimizer=hp.Choice('optimizer', values=['adam', 'sgd', 'rmsprop']),
#     #               loss='binary_crossentropy',
#     #               metrics=['accuracy'])
    
#     model.add(Embedding(num_words, X_train.shape[1])) # Mengubah indeks integer (kata) menjadi vektor dense
#     model.add(SpatialDropout1D(0.3)) # Dropout khusus data sekuensial seperti teks
#     model.add(LSTM(units=hp.Int('units_1', min_value=32, max_value=64, step=16), 
#                    dropout=0.3, recurrent_dropout=0.3))
#     model.add(Dense(units=hp.Int('units_2', min_value=32, max_value=64, step=16),
#                     activation=hp.Choice('activation_1', values=['relu', 'tanh', 'sigmoid']),
#                     input_dim=20)) 
#     model.add(Dense(1, activation='sigmoid'))
#     model.compile(optimizer=RMSprop(learning_rate=0.01), loss = 'binary_crossentropy', metrics = ['accuracy'])
#     return model

# # Definisikan tuner
# tuner = Hyperband(
#     build_model,
#     objective='val_accuracy',  # Tujuan optimasi
#     max_epochs=10,             # Maksimal jumlah epoch
#     factor=3,                  # Faktor reduksi pada Hyperband
#     directory='my_dir',        # Direktori untuk menyimpan hasil tuning
#     project_name='hyperparameter_tuning'
# )

# # Callback untuk early stopping
# stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

# # Lakukan pencarian hyperparameter
# tuner.search(X_train, y_train, epochs=50, validation_split=0.2, callbacks=[stop_early])

# # Hasil terbaik
# best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
# print(f"Best Hyperparameters: {best_hps.values}")

# # Buat model dengan hyperparameter terbaik
# model = tuner.hypermodel.build(best_hps)
# history = model.fit(X_train, y_train, epochs=50, validation_split=0.2)

# # Evaluasi model
# loss, accuracy = model.evaluate(X_train, y_train)
# print(f"Final Accuracy: {accuracy}")
