In [None]:
import pickle
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

data = pd.read_csv('cleanedDataPPN12.csv')

# Vectorized
vectorizer = TfidfVectorizer(
    use_idf=True,             # Gunakan Inverse Document Frequency
    strip_accents='ascii',    # Hapus aksen dari karakter
    min_df=1,                 # Kata muncul minimal di 1 dokumen
    max_df=0.95               # Hapus kata yang terlalu umum (>95% dokumen)
)

# 2. Data Splitting
y = data.sentiment_label       # Label target (Positive/Negative)
x = data.full_text           # Data input (teks)

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.3, random_state=33
)

# 3. Vectorization
x_train_vect = vectorizer.fit_transform(x_train)
x_test_vect = vectorizer.transform(x_test)

# Naive Bayes

# 4. Model Training
mnb = MultinomialNB()
mnb.fit(x_train_vect, y_train)

# 5. Prediction
y_pred = mnb.predict(x_test_vect)

# 6. Evaluation
accuracy = accuracy_score(y_test, y_pred)
print("Naive Bayes Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Simpan model dan vectorizer ke file pickle
with open("classifierMNB.pkl", "wb") as model_file:
    pickle.dump(mnb, model_file)

with open("vectorizerMNB.pkl", "wb") as vectorizer_file:
    pickle.dump(vectorizer, vectorizer_file)

In [None]:
%%writefile app.py
import streamlit as st
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer

# Load model dan vectorizer
with open("classifierMNB.pkl", "rb") as model_file:
    classifier = pickle.load(model_file)

with open("vectorizerMNB.pkl", "rb") as vectorizer_file:
    vectorizer = pickle.load(vectorizer_file)

def predict_sentiment(text):
    # Transform input text menggunakan vectorizer
    text_vectorized = vectorizer.transform([text])
    # Prediksi menggunakan model Random Forest
    prediction = classifier.predict(text_vectorized)
    return prediction[0]

def main():
    st.title("Prediksi Sentimen Terhadap Kenaikan PPN 12%")
    st.subheader("Input teks untuk mendapatkan prediksi sentimen: positive, negative, atau neutral.")

    # Input text dari user
    user_input = st.text_input("Masukkan teks:", placeholder="Contoh: Kenaikan PPN ini terlalu tinggi!")

    if st.button("Prediksi"):
        if user_input.strip() == "":
            st.error("Teks tidak boleh kosong!")
        else:
            sentiment = predict_sentiment(user_input)
            st.success(f"Hasil prediksi: **{sentiment}**")

if __name__ == "__main__":
    main()

In [None]:
!pip install streamlit

In [None]:
!wget -q -O - ipv4.icanhazip.com

In [None]:
!streamlit run app.py & npx localtunnel --port 8501