# IMDB Sentiment Analysis - Interactive Predictor

This notebook provides an interactive interface for sentiment prediction.

In [1]:
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
import pickle
from IPython.display import display, HTML
import ipywidgets as widgets
from src.models import load_model, predict_with_confidence
from src.preprocessing import clean_text, tokenize_text

import warnings
warnings.filterwarnings('ignore')

## 1. Load Models and Vectorizer

In [2]:
# Load models
print("Loading models...")
nb_model = load_model('../models/naive_bayes_model.pkl')
lr_model = load_model('../models/logistic_regression_model.pkl')

# Load vectorizer
with open('../models/tfidf_vectorizer.pkl', 'rb') as f:
    vectorizer = pickle.load(f)
print("Vectorizer loaded")

print("\nâœ“ All models loaded successfully!")

Loading models...
Model loaded from ../models/naive_bayes_model.pkl
Model loaded from ../models/logistic_regression_model.pkl
Vectorizer loaded

âœ“ All models loaded successfully!


## 2. Prediction Function

In [3]:
def predict_sentiment(review_text):
    """
    Predict sentiment for a given review.
    
    Args:
        review_text: Raw review text
        
    Returns:
        Dictionary with predictions from both models
    """
    if not review_text or len(review_text.strip()) == 0:
        return None
    
    # Preprocess
    cleaned = clean_text(review_text)
    tokens = tokenize_text(cleaned)
    processed = ' '.join(tokens)
    
    # Vectorize
    X = vectorizer.transform([processed])
    
    # Naive Bayes prediction
    nb_pred = predict_with_confidence(nb_model, X)
    nb_sentiment = 'Positive' if nb_pred['predictions'][0] == 1 else 'Negative'
    nb_confidence = nb_pred['confidence'][0]
    nb_proba = nb_pred['probabilities'][0]
    
    # Logistic Regression prediction
    lr_pred = predict_with_confidence(lr_model, X)
    lr_sentiment = 'Positive' if lr_pred['predictions'][0] == 1 else 'Negative'
    lr_confidence = lr_pred['confidence'][0]
    lr_proba = lr_pred['probabilities'][0]
    
    return {
        'naive_bayes': {
            'sentiment': nb_sentiment,
            'confidence': nb_confidence,
            'probabilities': {'negative': nb_proba[0], 'positive': nb_proba[1]}
        },
        'logistic_regression': {
            'sentiment': lr_sentiment,
            'confidence': lr_confidence,
            'probabilities': {'negative': lr_proba[0], 'positive': lr_proba[1]}
        },
        'agreement': nb_sentiment == lr_sentiment,
        'processed_text': processed,
        'token_count': len(tokens)
    }

## 3. Display Results Function

In [4]:
def display_results(results):
    """
    Display prediction results in a formatted way.
    """
    if results is None:
        display(HTML("<p style='color: red;'>Please enter a review text.</p>"))
        return
    
    # Agreement indicator
    agreement_icon = "âœ“" if results['agreement'] else "âœ—"
    agreement_color = "green" if results['agreement'] else "orange"
    
    # Create HTML output
    html = f"""
    <div style='border: 2px solid #ddd; padding: 20px; border-radius: 10px; background-color: #f9f9f9;'>
        <h3 style='margin-top: 0;'>Sentiment Analysis Results</h3>
        
        <div style='margin: 15px 0;'>
            <strong>Processed tokens:</strong> {results['token_count']}
        </div>
        
        <div style='display: flex; gap: 20px; margin: 20px 0;'>
            <!-- Naive Bayes -->
            <div style='flex: 1; border: 2px solid #4CAF50; padding: 15px; border-radius: 8px; background-color: white;'>
                <h4 style='margin-top: 0; color: #4CAF50;'>Naive Bayes</h4>
                <p style='font-size: 24px; font-weight: bold; margin: 10px 0;'>
                    {results['naive_bayes']['sentiment']}
                </p>
                <p style='margin: 5px 0;'>
                    <strong>Confidence:</strong> {results['naive_bayes']['confidence']:.1%}
                </p>
                <div style='margin-top: 10px;'>
                    <div style='font-size: 12px;'>Negative: {results['naive_bayes']['probabilities']['negative']:.1%}</div>
                    <div style='background-color: #ff6b6b; height: 20px; width: {results['naive_bayes']['probabilities']['negative']*100}%; border-radius: 3px;'></div>
                </div>
                <div style='margin-top: 5px;'>
                    <div style='font-size: 12px;'>Positive: {results['naive_bayes']['probabilities']['positive']:.1%}</div>
                    <div style='background-color: #51cf66; height: 20px; width: {results['naive_bayes']['probabilities']['positive']*100}%; border-radius: 3px;'></div>
                </div>
            </div>
            
            <!-- Logistic Regression -->
            <div style='flex: 1; border: 2px solid #2196F3; padding: 15px; border-radius: 8px; background-color: white;'>
                <h4 style='margin-top: 0; color: #2196F3;'>Logistic Regression</h4>
                <p style='font-size: 24px; font-weight: bold; margin: 10px 0;'>
                    {results['logistic_regression']['sentiment']}
                </p>
                <p style='margin: 5px 0;'>
                    <strong>Confidence:</strong> {results['logistic_regression']['confidence']:.1%}
                </p>
                <div style='margin-top: 10px;'>
                    <div style='font-size: 12px;'>Negative: {results['logistic_regression']['probabilities']['negative']:.1%}</div>
                    <div style='background-color: #ff6b6b; height: 20px; width: {results['logistic_regression']['probabilities']['negative']*100}%; border-radius: 3px;'></div>
                </div>
                <div style='margin-top: 5px;'>
                    <div style='font-size: 12px;'>Positive: {results['logistic_regression']['probabilities']['positive']:.1%}</div>
                    <div style='background-color: #51cf66; height: 20px; width: {results['logistic_regression']['probabilities']['positive']*100}%; border-radius: 3px;'></div>
                </div>
            </div>
        </div>
        
        <div style='margin-top: 15px; padding: 10px; background-color: white; border-radius: 5px;'>
            <strong>Model Agreement:</strong> 
            <span style='color: {agreement_color}; font-size: 20px;'>{agreement_icon}</span>
            <span style='margin-left: 10px;'>
                {'Both models agree!' if results['agreement'] else 'Models disagree - review carefully'}
            </span>
        </div>
    </div>
    """
    
    display(HTML(html))

## 4. Interactive Widget

In [5]:
# Create widgets
text_input = widgets.Textarea(
    value='',
    placeholder='Enter your movie review here...',
    description='Review:',
    layout=widgets.Layout(width='90%', height='150px')
)

predict_button = widgets.Button(
    description='Analyze Sentiment',
    button_style='success',
    tooltip='Click to analyze',
    icon='check'
)

output_area = widgets.Output()

def on_predict_click(b):
    with output_area:
        output_area.clear_output()
        results = predict_sentiment(text_input.value)
        display_results(results)

predict_button.on_click(on_predict_click)

# Display interface
display(HTML("<h2>ðŸŽ¬ Movie Review Sentiment Analyzer</h2>"))
display(HTML("<p>Enter a movie review below and click 'Analyze Sentiment' to see predictions from both models.</p>"))
display(text_input)
display(predict_button)
display(output_area)

Textarea(value='', description='Review:', layout=Layout(height='150px', width='90%'), placeholder='Enter your â€¦

Button(button_style='success', description='Analyze Sentiment', icon='check', style=ButtonStyle(), tooltip='Clâ€¦

Output()

## 5. Example Reviews

Try these example reviews:

In [6]:
examples = [
    {
        'title': 'Strongly Positive',
        'text': "This movie is absolutely fantastic! The acting is superb, the plot is engaging, and the cinematography is breathtaking. I highly recommend it to everyone. Best film I've seen this year!"
    },
    {
        'title': 'Strongly Negative',
        'text': "Terrible movie. Waste of time and money. The plot makes no sense, the acting is awful, and I was bored throughout. Don't watch this garbage."
    },
    {
        'title': 'Mixed/Neutral',
        'text': "The movie had some good moments, but overall it was just okay. The acting was decent, but the story was predictable. Not bad, but not great either."
    },
    {
        'title': 'Subtle Positive',
        'text': "A thoughtful and well-crafted film. While it may not appeal to everyone, those who appreciate character-driven stories will find much to enjoy here."
    },
    {
        'title': 'Subtle Negative',
        'text': "Despite the promising premise and talented cast, the film fails to deliver. The pacing is off and the ending feels rushed and unsatisfying."
    }
]

# Display examples
for i, example in enumerate(examples, 1):
    print(f"\n{'='*80}")
    print(f"Example {i}: {example['title']}")
    print(f"{'='*80}")
    print(f"Review: {example['text']}")
    print()
    results = predict_sentiment(example['text'])
    display_results(results)


Example 1: Strongly Positive
Review: This movie is absolutely fantastic! The acting is superb, the plot is engaging, and the cinematography is breathtaking. I highly recommend it to everyone. Best film I've seen this year!




Example 2: Strongly Negative
Review: Terrible movie. Waste of time and money. The plot makes no sense, the acting is awful, and I was bored throughout. Don't watch this garbage.




Example 3: Mixed/Neutral
Review: The movie had some good moments, but overall it was just okay. The acting was decent, but the story was predictable. Not bad, but not great either.




Example 4: Subtle Positive
Review: A thoughtful and well-crafted film. While it may not appeal to everyone, those who appreciate character-driven stories will find much to enjoy here.




Example 5: Subtle Negative
Review: Despite the promising premise and talented cast, the film fails to deliver. The pacing is off and the ending feels rushed and unsatisfying.



## 6. Batch Prediction

In [7]:
def batch_predict(reviews):
    """
    Predict sentiment for multiple reviews.
    
    Args:
        reviews: List of review texts
        
    Returns:
        DataFrame with predictions
    """
    results = []
    
    for review in reviews:
        pred = predict_sentiment(review)
        if pred:
            results.append({
                'review': review[:100] + '...' if len(review) > 100 else review,
                'nb_sentiment': pred['naive_bayes']['sentiment'],
                'nb_confidence': pred['naive_bayes']['confidence'],
                'lr_sentiment': pred['logistic_regression']['sentiment'],
                'lr_confidence': pred['logistic_regression']['confidence'],
                'agreement': pred['agreement']
            })
    
    return pd.DataFrame(results)

# Example batch prediction
batch_reviews = [
    "Amazing movie! Loved every minute of it.",
    "Boring and predictable. Not worth watching.",
    "Great performances but weak storyline.",
    "Masterpiece! A must-watch for everyone.",
    "Disappointing. Expected much better."
]

batch_results = batch_predict(batch_reviews)
print("\nBatch Prediction Results:")
display(batch_results)


Batch Prediction Results:


Unnamed: 0,review,nb_sentiment,nb_confidence,lr_sentiment,lr_confidence,agreement
0,Amazing movie! Loved every minute of it.,Positive,0.740086,Positive,0.978233,True
1,Boring and predictable. Not worth watching.,Negative,0.756728,Negative,0.978923,True
2,Great performances but weak storyline.,Negative,0.527728,Positive,0.666253,False
3,Masterpiece! A must-watch for everyone.,Positive,0.709794,Positive,0.955648,True
4,Disappointing. Expected much better.,Negative,0.792813,Negative,0.97339,True


## 7. Usage Guide

### How to Use:
1. **Interactive Mode**: Use the text area above to enter any movie review
2. **Click Analyze**: Get instant predictions from both models
3. **Review Results**: Compare predictions, confidence scores, and model agreement

### Understanding Results:
- **Sentiment**: Positive or Negative classification
- **Confidence**: How certain the model is (0-100%)
- **Probabilities**: Breakdown of positive vs negative likelihood
- **Agreement**: Whether both models predict the same sentiment

### Tips:
- Longer reviews generally produce more confident predictions
- Strong sentiment words increase confidence
- Mixed reviews may show disagreement between models
- Both models consider context and word combinations (bigrams)

### Model Characteristics:
- **Naive Bayes**: Faster, tends to be more confident
- **Logistic Regression**: Slightly more accurate, better calibrated confidence