# Import Library

In [46]:
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
!pip install Sastrawi
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')



[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

# Preprocessing

In [47]:
indo_stopwords = set(stopwords.words('indonesian'))
factory = StemmerFactory()
stemmer = factory.create_stemmer()

def cleaning(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+", "", text)
    text = re.sub(r"[^a-zA-Z\s]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

def preprocess(text):
    text = cleaning(text)
    text = stemmer.stem(text)

    text = " ".join([w for w in text.split() if w not in indo_stopwords])
    return text

# Load Data

In [55]:
df = pd.read_csv("/content/data_labeled_250 (2).csv")
df['clean_text'] = df['content'].astype(str).apply(preprocess)
df.head()

Unnamed: 0,userName,content,score,reviewCreatedVersion,at,replyContent,repliedAt,content_casefold,content_tokenized,content_stemmed,content_filtered,label,clean_text
0,Nur kholifah,ya ampun sering banget ngga bisa dibuka,3,11.15.1,2025-10-07 15:09:44,,,ya ampun sering banget ngga bisa dibuka,"['iya', 'ampun', 'sering', 'banget', 'tidak', ...","['iya', 'ampun', 'sering', 'banget', 'tidak', ...",['ampun'],neutral,ya ampun banget ngga buka
1,Ridestiana,kok sekarang malah gak bisa offline sih padaha...,2,11.15.1,2025-10-07 13:34:44,,,kok sekarang malah gak bisa offline sih padaha...,"['kok', 'sekarang', 'malah', 'tidak', 'bisa', ...","['kok', 'sekarang', 'malah', 'tidak', 'bisa', ...","['offline', 'dimasukin', 'daftar', 'offline', ...",negative,gak offline sih udah dimasukin daftar offline ...
2,Gracesiella Amanda,Gw gak masalah sih ini mau di duitin terus tap...,1,11.15.1,2025-10-07 13:13:07,,,gw gak masalah sih ini mau di duitin terus tap...,"['aku', 'tidak', 'masalah', 'sih', 'ini', 'mau...","['aku', 'tidak', 'masalah', 'sih', 'ini', 'mau...","['masalah', 'duitin', 'bobrok', 'edit', 'cerit...",negative,gw gak sih duitin bikin bobrok edit cerita sus...
3,E L I O,ini kenapa sekarang gak bisa naruh link di inf...,1,11.12.2,2025-10-07 08:01:06,,,ini kenapa sekarang gak bisa naruh link di inf...,"['ini', 'kenapa', 'sekarang', 'tidak', 'bisa',...","['ini', 'kenapa', 'sekarang', 'tidak', 'bisa',...","['naruh', 'link', 'akun', 'update', 'naruh', '...",negative,gak naruh link info akun yah update kah plis w...
4,Dian a.l,Ni aplikasi ada masalah apa sih? sinyal gua ba...,1,11.15.1,2025-10-07 06:05:52,,,ni aplikasi ada masalah apa sih? sinyal gua ba...,"['ini', 'aplikasi', 'ada', 'masalah', 'apa', '...","['ini', 'aplikasi', 'ada', 'masalah', 'apa', '...","['masalah', 'sinyal', 'beranda', 'nunjukin', '...",negative,ni aplikasi sih sinyal gua bagus loh beranda n...


# TF-IDF

In [49]:
tfidf = TfidfVectorizer(ngram_range=(1,3))
X = tfidf.fit_transform(df['clean_text'])
y = df['label']

# Split Data

In [50]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Eksperimen

In [51]:
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
pred_dt = dt.predict(X_test)

acc_dt = accuracy_score(y_test, pred_dt)

print("\n=== Decision Tree Result ===")
print(f"Accuracy: {acc_dt*100:.2f}%\n")
print(classification_report(y_test, pred_dt))


=== Decision Tree Result ===
Accuracy: 46.00%

              precision    recall  f1-score   support

    negative       0.60      0.41      0.49        22
     neutral       0.09      0.14      0.11         7
    positive       0.54      0.62      0.58        21

    accuracy                           0.46        50
   macro avg       0.41      0.39      0.39        50
weighted avg       0.50      0.46      0.47        50



In [52]:
nb = MultinomialNB()
nb.fit(X_train, y_train)
pred_nb = nb.predict(X_test)

acc_nb = accuracy_score(y_test, pred_nb)

print("\n=== Naive Bayes Result ===")
print(f"Accuracy: {acc_nb*100:.2f}%\n")
print(classification_report(y_test, pred_nb))


=== Naive Bayes Result ===
Accuracy: 68.00%

              precision    recall  f1-score   support

    negative       0.76      0.73      0.74        22
     neutral       0.00      0.00      0.00         7
    positive       0.62      0.86      0.72        21

    accuracy                           0.68        50
   macro avg       0.46      0.53      0.49        50
weighted avg       0.60      0.68      0.63        50



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [53]:
svm = LinearSVC(C=1.5)
svm.fit(X_train, y_train)
pred_svm = svm.predict(X_test)

acc_svm = accuracy_score(y_test, pred_svm)

print("\n=== SVM (Optimized) ===")
print(f"Accuracy: {acc_svm*100:.2f}%\n")
print(classification_report(y_test, pred_svm))


=== SVM (Optimized) ===
Accuracy: 60.00%

              precision    recall  f1-score   support

    negative       0.75      0.55      0.63        22
     neutral       0.00      0.00      0.00         7
    positive       0.58      0.86      0.69        21

    accuracy                           0.60        50
   macro avg       0.44      0.47      0.44        50
weighted avg       0.57      0.60      0.57        50



In [54]:
lr = LogisticRegression(max_iter=2000)
lr.fit(X_train, y_train)
pred_lr = lr.predict(X_test)

acc_lr = accuracy_score(y_test, pred_lr)

print("\n=== Logistic Regression Result ===")
print(f"Accuracy: {acc_lr*100:.2f}%\n")
print(classification_report(y_test, pred_lr))


=== Logistic Regression Result ===
Accuracy: 66.00%

              precision    recall  f1-score   support

    negative       0.73      0.73      0.73        22
     neutral       0.00      0.00      0.00         7
    positive       0.61      0.81      0.69        21

    accuracy                           0.66        50
   macro avg       0.44      0.51      0.47        50
weighted avg       0.57      0.66      0.61        50



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
