In [None]:
import joblib
import sys
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display, clear_output

# Add 'src' to path to import utils
sys.path.append('..')
from src.utils import plot_confusion_matrix, plot_feature_importance

# --- Load Data and Model ---
MODEL_PATH = "../models/best_sentiment_model.pkl"
TEST_DATA_PATH = "../models/test_data.pkl"

model = joblib.load(MODEL_PATH)
X_test, y_test = joblib.load(TEST_DATA_PATH)

print("Model and test data loaded.")
print(f"Model object: {model}")

: 

In [None]:
# --- Model Evaluation ---
print("Generating predictions on test set...")
y_pred = model.predict(X_test)

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['negative', 'positive']))

# Plot confusion matrix
print("Displaying confusion matrix...")
cm = confusion_matrix(y_test, y_pred)
plot_confusion_matrix(cm, labels=['negative', 'positive'])

Generating predictions on test set...

Classification Report:
              precision    recall  f1-score   support

    negative       0.90      0.88      0.89      4961
    positive       0.89      0.90      0.90      5039

    accuracy                           0.89     10000
   macro avg       0.89      0.89      0.89     10000
weighted avg       0.89      0.89      0.89     10000

Displaying confusion matrix...


In [None]:
# --- Feature Importance Analysis ---
# This cell shows WHICH words the model uses to decide.

# We need to get the feature names (words) from the 'tfidf' step of the pipeline
# and the coefficients from the 'clf' (LogisticRegression) step.

try:
    vectorizer = model.named_steps['tfidf']
    classifier = model.named_steps['clf']
    
    feature_names = vectorizer.get_feature_names_out()
    
    print("Plotting feature importance...")
    plot_feature_importance(classifier, feature_names)
    
except Exception as e:
    print(f"Could not plot feature importance: {e}")
    print("This plot only works if the best model is a linear classifier (like LogisticRegression).")

Plotting feature importance...


In [None]:
# --- Test on New Reviews ---

my_reviews = [
    "This movie was absolutely fantastic, a must-see!",
    "I've seen better. The plot was predictable and boring.",
    "It was not bad, but also not good. Just average.",
    "A waste of time and money. I want a refund."
]

# Use the loaded pipeline to predict
my_predictions = model.predict(my_reviews)
my_probabilities = model.predict_proba(my_reviews)

labels = {0: 'Negative', 1: 'Positive'}

for review, pred_idx, probs in zip(my_reviews, my_predictions, my_probabilities):
    prediction = labels[pred_idx]
    confidence = probs[pred_idx] * 100
    print(f"\nReview: '{review}'")
    print(f"-> Prediction: {prediction} (Confidence: {confidence:.2f}%)")


Review: 'This movie was absolutely fantastic, a must-see!'
-> Prediction: Positive (Confidence: 81.29%)

Review: 'I've seen better. The plot was predictable and boring.'
-> Prediction: Negative (Confidence: 99.88%)

Review: 'It was not bad, but also not good. Just average.'
-> Prediction: Negative (Confidence: 94.57%)

Review: 'A waste of time and money. I want a refund.'
-> Prediction: Negative (Confidence: 99.99%)


In [None]:
print("--- Analyzing High-Confidence Errors ---")

# 1. Get prediction probabilities from the model
y_proba = model.predict_proba(X_test)

# 2. Get the probability of the *predicted* class
y_pred_probs = np.max(y_proba, axis=1)

# 3. Create a results DataFrame
df_results = pd.DataFrame({
    'review': X_test,
    'true_label': y_test,
    'predicted_label': y_pred,
    'predicted_prob': y_pred_probs
})

# 4. Find misclassifications
errors_df = df_results[df_results['true_label'] != df_results['predicted_label']]

print(f"Total errors: {len(errors_df)} out of {len(df_results)}")

# 5. Find the *most confident* errors
# We sort by predicted_prob in descending order
confident_errors = errors_df.sort_values(by='predicted_prob', ascending=False)

# 6. Display the top 5 most confident errors
print("\n--- TOP 5 MOST CONFIDENT ERRORS (Model was sure but wrong) ---")

pd.set_option('display.max_colwidth', 400) # Show more text

for i, row in confident_errors.head(5).iterrows():
    print(f"Prediction: {labels[row['predicted_label']]}\t (Confidence: {row['predicted_prob']:.2f})")
    print(f"Actual Label: {labels[row['true_label']]}")
    print(f"Review: {row['review']}\n")
    print("-" * 50)

--- Analyzing High-Confidence Errors ---
Total errors: 1056 out of 10000

--- TOP 5 MOST CONFIDENT ERRORS (Model was sure but wrong) ---
Prediction: Positive	 (Confidence: 1.00)
Actual Label: Negative
Review: I've almost forever been against the inclusion of songs in a movie. My belief was that the quality of the film would automatically be improved if only those extremely annoying songs would be axed. However, things have quickly changed after watching that horrible Black (no songs) & this movie, Page 3 (plenty of songs). While Black was weak to an extreme, Page 3 delivers a gripping story with some strong acting & good direction. The songs were almost incidental & blended in almost seamlessly with the film. There certainly weren't any women getting sprayed with water for no apparent reason from mysterious water sources while gyrating wildly on the streets at night.<br /><br />I was pleasantly surprised with the bold and unabashed approach used by the director. There was no glossing o

In [None]:
print("--- Analyzing Low-Confidence / Borderline Reviews ---")

# 1. Calculate the 'margin' - how close the probability is to 0.5
# A low margin means high uncertainty.
df_results['margin'] = np.abs(df_results['predicted_prob'] - 0.5)

# 2. Sort by this margin in *ascending* order to find the most uncertain reviews
most_uncertain_reviews = df_results.sort_values(by='margin', ascending=True)

# 3. Display the top 5 most uncertain reviews
print("\n--- TOP 5 MOST UNCERTAIN REVIEWS (Closest to 50/50) ---")

for i, row in most_uncertain_reviews.head(5).iterrows():
    pred_label = labels[row['predicted_label']]
    true_label = labels[row['true_label']]
    
    print(f"Prediction: {pred_label}\t (Confidence: {row['predicted_prob']:.2f})")
    print(f"Actual Label: {true_label}")
    print(f"Review: {row['review']}\n")
    print("-" * 50)

--- Analyzing Low-Confidence / Borderline Reviews ---

--- TOP 5 MOST UNCERTAIN REVIEWS (Closest to 50/50) ---
Prediction: Positive	 (Confidence: 0.50)
Actual Label: Positive
Review: While sleeping, Mr. Eko is assigned by his brother Yemi (Adetokumboh McCormack) in a dream to go with John Locke to disclose the meaning of the "?" symbol. With the pretext of chasing Henry, Mr. Eko brings John with him and they find a second hatch called "Pearl" underground the question mark symbol marked on the field, where a video explains that the other hatch is a psychological experiment and people behavior pressing the buttons of the computer every 108 minutes are actually subjects. Meanwhile, Jack unsuccessfully tries to save Libby.<br /><br />In this episode, John Locke loses his faith in the island when he finds that they have been monitored in the hatch. The disgusting Michael sees the anguishing Libby wishing that she was dead, while Hurley, Jack, Kate and Sawyer are suffering her pain, in a dee

In [None]:
print("--- Distribution of Prediction Probabilities ---")

# We use the probabilities for the *predicted* class
fig = px.histogram(df_results, 
                   x='predicted_prob', 
                   color='true_label', 
                   marginal='box',
                   barmode='overlay',
                   nbins=50,
                   title='Distribution of Model Confidence')

fig.update_layout(xaxis_title="Confidence (Probability of Predicted Class)",
                  yaxis_title="Count")
fig.show()

--- Distribution of Prediction Probabilities ---


In [None]:
# --- Create UI Elements ---
print("Creating interactive prediction widget...")
print("This allows you to test the model live.")

# Text area for input
text_input = widgets.Textarea(
    value='This movie was fantastic! The acting was superb.',
    placeholder='Type your movie review here...',
    description='Review:',
    disabled=False,
    layout={'width': '90%', 'height': '100px'}
)

# Button to submit
predict_button = widgets.Button(
    description='Analyze Sentiment',
    button_style='success', 
    tooltip='Click to predict',
    icon='check'
)

# Output area for the result
output_area = widgets.Output()

# --- Define the prediction function ---
# This function will run when the button is clicked
def on_button_clicked(b):
    # Get the review text
    review = text_input.value
    
    # Predict using the loaded model
    # model.predict expects a list, so we pass [review]
    prediction_idx = model.predict([review])[0]
    probabilities = model.predict_proba([review])[0]
    
    # Get the label and confidence
    prediction_label = labels[prediction_idx]
    confidence = probabilities[prediction_idx] * 100
    
    # Display the result in the output area
    with output_area:
        clear_output(wait=True) # Clear previous results
        
        if prediction_label == 'Positive':
            print(f"Prediction: POSITIVE üòÉ (Confidence: {confidence:.2f}%)")
        else:
            print(f"Prediction: NEGATIVE üòû (Confidence: {confidence:.2f}%)")

# --- Link button and function, then display ---
predict_button.on_click(on_button_clicked)

# Display the widgets
display(text_input, predict_button, output_area)

Creating interactive prediction widget...
This allows you to test the model live.


NameError: name 'widgets' is not defined