In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

In [10]:
def main(file_name):
    # Load preprocessed CSV
    df = pd.read_csv(file_name)
    df = df[df['sentiment'] != 'Netral']

    # Features and labels
    X = df['teks_final']
    y = df['sentiment']

    # Split train/test
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=82, stratify=y
    )

    # Vectorize text with TF-IDF
    vectorizer = TfidfVectorizer(max_features=5000)
    X_train_vect = vectorizer.fit_transform(X_train)
    X_test_vect = vectorizer.transform(X_test)

    # Define models
    models = {
        "Naive Bayes": MultinomialNB(),
        "SVM": SVC(kernel='linear',class_weight='balanced', probability=True),
    }

    # Train and evaluate
    for name, model in models.items():
        model.fit(X_train_vect, y_train)
        y_pred = model.predict(X_test_vect)
        
        print(f"==== {name} ====")
        print("Accuracy:", accuracy_score(y_test, y_pred))
        print(classification_report(y_test, y_pred))
        print("\n")

In [11]:
main("data_preprocessed.csv")

==== Naive Bayes ====
Accuracy: 0.7954545454545454
              precision    recall  f1-score   support

     Negatif       0.00      0.00      0.00        36
     Positif       0.80      1.00      0.89       140

    accuracy                           0.80       176
   macro avg       0.40      0.50      0.44       176
weighted avg       0.63      0.80      0.70       176





  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


==== SVM ====
Accuracy: 0.8238636363636364
              precision    recall  f1-score   support

     Negatif       0.59      0.47      0.52        36
     Positif       0.87      0.91      0.89       140

    accuracy                           0.82       176
   macro avg       0.73      0.69      0.71       176
weighted avg       0.81      0.82      0.82       176



