In [5]:

!pip install imbalanced-learn



In [6]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE


In [7]:

df = pd.read_csv('tokopedia_prosesss.csv')

In [8]:
df

Unnamed: 0.1,Unnamed: 0,content,score,text_clean,text_tokenizingText,text_stopword,content_clean,stemmingText,score_label,label
0,0,"Aneh sekali pihak tokopedia, bukannya diperbai...",1,Aneh sekali pihak tokopedia bukannya diperbaik...,"['Aneh', 'sekali', 'pihak', 'tokopedia', 'buka...","['Aneh', 'tokopedia', 'diperbaiki', 'aplikasi'...",Aneh tokopedia diperbaiki aplikasi tiktok Dan ...,aneh tokopedia diperbaiki aplikasi tiktok dan ...,Negative,neutral
1,1,"Kolaborasi & integrasi Tokopedia, Gopay dan Ba...",1,Kolaborasi integrasi Tokopedia Gopay dan Bank...,"['Kolaborasi', 'integrasi', 'Tokopedia', 'Gopa...","['Kolaborasi', 'integrasi', 'Tokopedia', 'Gopa...",Kolaborasi integrasi Tokopedia Gopay Bank Jago...,kolaborasi integrasi tokopedia gopay bank jago...,Negative,Positive
2,2,semakin di update UI/UXnya malah bikin bingung...,1,semakin di update UIUXnya malah bikin bingung ...,"['semakin', 'di', 'update', 'UIUXnya', 'malah'...","['update', 'UIUXnya', 'bikin', 'bingung', 'not...",update UIUXnya bikin bingung notifikasi cek pe...,updat uiuxnya bikin bingung notifikasi cek pes...,Negative,negative
3,3,Masukan untuk team Developer-nya. Tolong dilak...,1,Masukan untuk team Developernya Tolong dilakuk...,"['Masukan', 'untuk', 'team', 'Developernya', '...","['Masukan', 'team', 'Developernya', 'Tolong', ...",Masukan team Developernya Tolong optimasi apli...,masukan team developernya tolong optimasi apli...,Negative,Positive
4,4,tokopedia memang buruk !!! saya belanja order ...,1,tokopedia memang buruk saya belanja order sud...,"['tokopedia', 'memang', 'buruk', 'saya', 'bela...","['tokopedia', 'buruk', 'belanja', 'order', '3'...",tokopedia buruk belanja order 3 harisama penju...,tokopedia buruk belanja order 3 harisama penju...,Negative,negative
...,...,...,...,...,...,...,...,...,...,...
7080,9994,Banyak aplikasi belanja cuma satu yg paling gw...,2,Banyak aplikasi belanja cuma satu yg paling gw...,"['Banyak', 'aplikasi', 'belanja', 'cuma', 'sat...","['Banyak', 'aplikasi', 'belanja', 'yg', 'gw', ...",Banyak aplikasi belanja yg gw suka Tokopedia t...,banyak aplikasi belanja yg gw suka tokopedia t...,Negative,Positive
7081,9996,"Makin kesini ini app makin lemot aja, gk kaya ...",1,Makin kesini ini app makin lemot aja gk kaya s...,"['Makin', 'kesini', 'ini', 'app', 'makin', 'le...","['Makin', 'kesini', 'app', 'lemot', 'aja', 'gk...",Makin kesini app lemot aja gk kaya Ditambah ek...,makin kesini app lemot aja gk kaya ditambah ek...,Negative,negative
7082,9997,"Sdh bertahun2 pakai, Selalu jadi andalan kalo ...",1,Sdh bertahun2 pakai Selalu jadi andalan kalo b...,"['Sdh', 'bertahun2', 'pakai', 'Selalu', 'jadi'...","['Sdh', 'bertahun2', 'pakai', 'Selalu', 'andal...",Sdh bertahun2 pakai Selalu andalan kalo belanj...,sdh bertahun2 pakai selalu andalan kalo belanj...,Negative,negative
7083,9998,Kl tersedia voucher diskon atau lainnya harusn...,2,Kl tersedia voucher diskon atau lainnya harusn...,"['Kl', 'tersedia', 'voucher', 'diskon', 'atau'...","['Kl', 'tersedia', 'voucher', 'diskon', 'dipil...",Kl tersedia voucher diskon dipilih voucher ses...,kl tersedia voucher diskon dipilih voucher ses...,Negative,negative


In [9]:
X = df['stemmingText'].astype(str)
y = df['label']

In [10]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [11]:
tfidf = TfidfVectorizer(max_features=5000)
X_tfidf = tfidf.fit_transform(X)

In [12]:
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_tfidf, y_encoded)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42
)


In [14]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC(),
    "Multinomial Naive Bayes": MultinomialNB()
}

for name, model in models.items():
    print(f"Model: {name}")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"Akurasi: {acc*100:.2f}%")
    print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
    print("-" * 60)

Model: Logistic Regression
Akurasi: 83.10%
              precision    recall  f1-score   support

    Positive       0.93      0.79      0.85       691
    negative       0.82      0.78      0.80       740
     neutral       0.77      0.92      0.84       699

    accuracy                           0.83      2130
   macro avg       0.84      0.83      0.83      2130
weighted avg       0.84      0.83      0.83      2130

------------------------------------------------------------
Model: Random Forest
Akurasi: 88.40%
              precision    recall  f1-score   support

    Positive       0.95      0.86      0.90       691
    negative       0.85      0.85      0.85       740
     neutral       0.86      0.95      0.90       699

    accuracy                           0.88      2130
   macro avg       0.89      0.88      0.88      2130
weighted avg       0.89      0.88      0.88      2130

------------------------------------------------------------
Model: SVM
Akurasi: 87.56%
         