In [35]:
import pickle
import re
import numpy as np
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
import matplotlib.pyplot as plt

# Load semua model
print("Loading models...")

with open('models/preprocessing_tools.pkl', 'rb') as f:
    preprocessing_tools = pickle.load(f)

with open('models/vectorizer.pkl', 'rb') as f:
    vectorizer = pickle.load(f)

with open('models/model_naive_bayes.pkl', 'rb') as f:
    nb_model = pickle.load(f)

with open('models/model_knn.pkl', 'rb') as f:
    knn_model = pickle.load(f)

with open('models/model_decision_tree.pkl', 'rb') as f:
    dt_model = pickle.load(f)


Loading models...


In [36]:
def preprocess_text(text):
    """Preprocess input text"""
    # Akses tools
    slang_dict = preprocessing_tools['slang_dict']
    stemmer = preprocessing_tools['stemmer']
    stopword_remover = preprocessing_tools['stopword_remover']
    
    # Cleaning
    text = str(text).lower()
    text = re.sub(r'@\w+', '', text)  # Remove mentions
    text = re.sub(r'#\w+', '', text)  # Remove hashtags
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'[^\w\s]', ' ', text)  # Remove punctuation
    text = re.sub(r'\d+', '', text)  # Remove numbers
    text = ' '.join(text.split())  # Remove extra whitespace
    
    # Slang normalization
    words = text.split()
    normalized_words = [slang_dict.get(word, word) for word in words]
    text = ' '.join(normalized_words)
    
    # Stopword removal dan stemming
    text = stopword_remover.remove(text)
    text = stemmer.stem(text)
    
    return text

In [37]:
def predict_sentiment(text):
    """Predict sentiment dengan semua model"""
    # Preprocess
    cleaned_text = preprocess_text(text)
    
    # Vectorize
    X = vectorizer.transform([cleaned_text])
    
    # Predict dengan semua model
    predictions = {
        'Naive Bayes': {
            'label': nb_model.predict(X)[0],
            'proba': nb_model.predict_proba(X)[0] if hasattr(nb_model, 'predict_proba') else None
        },
        'k-NN': {
            'label': knn_model.predict(X)[0],
            'proba': knn_model.predict_proba(X)[0] if hasattr(knn_model, 'predict_proba') else None
        },
        'Decision Tree': {
            'label': dt_model.predict(X)[0],
            'proba': dt_model.predict_proba(X)[0] if hasattr(dt_model, 'predict_proba') else None
        }
    }
    
    # Convert labels
    for model in predictions:
        predictions[model]['sentiment'] = 'POSITIVE' if predictions[model]['label'] == 1 else 'NEGATIVE'
        if predictions[model]['proba'] is not None:
            predictions[model]['confidence'] = max(predictions[model]['proba']) * 100
    
    # Voting final
    votes = [pred['label'] for pred in predictions.values()]
    final_vote = 1 if sum(votes) >= 2 else 0
    final_sentiment = 'POSITIVE' if final_vote == 1 else 'NEGATIVE'
    
    return {
        'original_text': text,
        'cleaned_text': cleaned_text,
        'predictions': predictions,
        'final_vote': final_vote,
        'final_sentiment': final_sentiment
    }

In [38]:
# Text input
text_input = widgets.Textarea(
    value='',
    placeholder='Masukkan komentar Instagram disini...',
    description='Komentar:',
    layout=widgets.Layout(width='80%', height='100px')
)

# Analyze button
analyze_button = widgets.Button(
    description='ANALYZE SENTIMENT',
    button_style='success',
    layout=widgets.Layout(width='200px')
)

# Output area
output = widgets.Output()

# Sample comments dropdown
sample_comments = [
    "lagunya bagus banget, suka!",
    "jelek banget sih, ga sesuai ekspetasi",
    "aku suka sama penyanyinya, suaranya merdu",
    "norak banget gaya-gayaannya",
    "semoga selalu sukses dan bahagia",
    "cari sensasi mulu orang ini"
]

sample_dropdown = widgets.Dropdown(
    options=sample_comments,
    value=sample_comments[0],
    description='Contoh:',
    layout=widgets.Layout(width='80%')
)

def on_sample_change(change):
    """Update text input when sample is selected"""
    text_input.value = change.new

def on_analyze_click(b):
    """Handle analyze button click"""
    with output:
        clear_output()
        
        text = text_input.value.strip()
        if not text:
            print("Silakan masukkan komentar terlebih dahulu!")
            return
        
        print("Menganalisis sentimen...")
        
        # Get predictions
        result = predict_sentiment(text)
        
        # Display results
        print("\n" + "="*60)
        print("HASIL ANALISIS SENTIMEN")
        print("="*60)
        print(f"Komentar: {result['original_text']}")
        print(f"Setelah cleaning: {result['cleaned_text']}")
        
        print("\n" + "-"*60)
        print("PREDIKSI 3 MODEL:")
        print("-"*60)
        
        for model_name, pred in result['predictions'].items():
            print(f"\n{model_name}:")
            print(f"  {pred['sentiment']}")
            if 'confidence' in pred:
                print(f"Confidence: {pred['confidence']:.1f}%")
                if pred['proba'] is not None:
                    print(f"Probabilitas: Negatif={pred['proba'][0]:.3f}, Positif={pred['proba'][1]:.3f}")
        
        print("\n" + "-"*60)
        print(f"KEPUTUSAN FINAL: {result['final_sentiment']}")
        print("="*60)
        
        # Visualisasi probabilitas
        if all(pred['proba'] is not None for pred in result['predictions'].values()):
            fig, ax = plt.subplots(figsize=(10, 6))
            
            models = list(result['predictions'].keys())
            neg_probs = [pred['proba'][0] for pred in result['predictions'].values()]
            pos_probs = [pred['proba'][1] for pred in result['predictions'].values()]
            
            x = np.arange(len(models))
            width = 0.35
            
            ax.bar(x - width/2, neg_probs, width, label='Negative', color='#ff6b6b')
            ax.bar(x + width/2, pos_probs, width, label='Positive', color='#51cf66')
            
            ax.set_xlabel('Model')
            ax.set_ylabel('Probability')
            ax.set_title('Probability Distribution per Model')
            ax.set_xticks(x)
            ax.set_xticklabels(models)
            ax.legend()
            ax.grid(True, alpha=0.3)
            
            plt.tight_layout()
            plt.show()

# Connect events
sample_dropdown.observe(on_sample_change, names='value')
analyze_button.on_click(on_analyze_click)

# Display widgets
print("\nINTERACTIVE DEMO")
print("="*50)

display(HTML("<h3>Instagram Comment Sentiment Analyzer</h3>"))
display(HTML("<p>Masukkan komentar Instagram untuk dianalisis sentimennya:</p>"))

display(text_input)
display(sample_dropdown)
display(analyze_button)
display(output)


INTERACTIVE DEMO


Textarea(value='', description='Komentar:', layout=Layout(height='100px', width='80%'), placeholder='Masukkan …

Dropdown(description='Contoh:', layout=Layout(width='80%'), options=('lagunya bagus banget, suka!', 'jelek ban…

Button(button_style='success', description='ANALYZE SENTIMENT', layout=Layout(width='200px'), style=ButtonStyl…

Output()

In [39]:
print("\n" + "="*50)
print("BATCH ANALYSIS")
print("="*50)

batch_comments = [
    "produknya keren, recommend banget",
    "pelayanannya lambat banget, ga worth it",
    "harganya terjangkau dan kualitas ok",
    "capek deh sama drama-drama ginian",
    "sukses selalu untuk kedepannya"
]

print("Menganalisis batch comments...")
print("-"*50)

for i, comment in enumerate(batch_comments, 1):
    result = predict_sentiment(comment)
    
    # Voting dari 3 model
    votes = [pred['label'] for pred in result['predictions'].values()]
    final = 'POSITIVE' if sum(votes) >= 2 else 'NEGATIVE'
    
    print(f"{i}. {comment[:50]}... → {final}")


BATCH ANALYSIS
Menganalisis batch comments...
--------------------------------------------------
1. produknya keren, recommend banget... → POSITIVE
2. pelayanannya lambat banget, ga worth it... → POSITIVE
3. harganya terjangkau dan kualitas ok... → POSITIVE
4. capek deh sama drama-drama ginian... → POSITIVE
5. sukses selalu untuk kedepannya... → POSITIVE


In [40]:
print("\n" + "="*50)
print("PERFORMANCE TEST")
print("="*50)

# Load test data
df = pd.read_csv('instagram_comments_cleaned.csv')
test_comments = df.sample(10, random_state=42)

correct = 0
total = len(test_comments)

print(f"Menguji {total} komentar acak...")
print("-"*50)

for idx, row in test_comments.iterrows():
    result = predict_sentiment(row['Instagram Comment Text'])
    
    # Voting final
    votes = [pred['label'] for pred in result['predictions'].values()]
    final_vote = 1 if sum(votes) >= 2 else 0
    actual = 1 if row['Sentiment'] == 'positive' else 0
    
    if final_vote == actual:
        correct += 1
        symbol = "✅"
    else:
        symbol = "❌"
    
    print(f"{symbol} {row['Instagram Comment Text'][:50]}... → Pred: {'POS' if final_vote==1 else 'NEG'}, Actual: {'POS' if actual==1 else 'NEG'}")

accuracy = correct / total * 100
print("\n" + "-"*50)
print(f"Accuracy pada sampel: {accuracy:.1f}% ({correct}/{total})")
print("="*50)


PERFORMANCE TEST
Menguji 10 komentar acak...
--------------------------------------------------
❌ Bagus dong.. mencari ilmu kapanpun dimanapun.. itu... → Pred: NEG, Actual: POS
✅ Pembagian suaranya pas bnget,pecah suaranya jg cak... → Pred: POS, Actual: POS
✅ Cowok macam anjing cuihhh... Orang kaya tidak akan... → Pred: NEG, Actual: NEG
✅ Sukak cewe cantik dan kmauan blajarnya tinggi ???... → Pred: POS, Actual: POS
✅ Gue gak suka sm ni orang. Idupnya kebanyakan ribut... → Pred: NEG, Actual: NEG
✅ cewe yg bgni yg hrsnya gosong jd abu aja di kebaka... → Pred: NEG, Actual: NEG
✅ Beginilah pasangan suami istri yg normal.. Romanti... → Pred: POS, Actual: POS
✅ Dasar artis abal abal enggk laku di layar kaca...u... → Pred: NEG, Actual: NEG
✅ Cantik, polos, apa adanya tapi keliatan berbakat. ... → Pred: POS, Actual: POS
✅ Ini cwenya yg bego apa cwonya yg pinter ya ?! Emg ... → Pred: NEG, Actual: NEG

--------------------------------------------------
Accuracy pada sampel: 90.0% (9/10)
