In [None]:

# Import Library
import pandas as pd
import re
from sklearn.metrics import classification_report, accuracy_score
import pickle

# Load Dataset
# Pastikan file CSV hasil scraping ulasan aplikasi Maxim sudah tersedia
data = pd.read_csv('taxsee_reviews.csv')

# Data Cleaning dan Preprocessing
def clean_text(text):
    text = re.sub(r'http\S+|www.\S+', '', text)  # Hapus URL
    text = re.sub(r'[^A-Za-z\s]', '', text)      # Hapus karakter non-alfabet
    text = text.lower().strip()                 # Konversi ke huruf kecil dan hapus spasi berlebih
    return text

data['content'] = data['content'].astype(str).apply(clean_text)  # Kolom ulasan adalah 'content'

# Menambahkan Label Sentimen (Contoh: Positif jika skor > 3, Negatif jika skor <= 3)
data['sentiment'] = data['score'].apply(lambda x: 'positive' if x > 3 else 'negative')

# Load Model dan TF-IDF Vectorizer
model = pickle.load(open('maxim_sentiment_model.pkl', 'rb'))
tfidf = pickle.load(open('maxim_tfidf_vectorizer.pkl', 'rb'))

# Transform Data dengan TF-IDF
X = tfidf.transform(data['content'])

# Prediksi Sentimen
predictions = model.predict(X)

# Evaluasi Model
print("Classification Report:")
print(classification_report(data['sentiment'], predictions))
print(f"Accuracy: {accuracy_score(data['sentiment'], predictions) * 100:.2f}%")
