In [None]:
# Import required libraries
import os
import sys
import joblib
import pandas as pd
import numpy as np
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

print("Libraries imported successfully!")

## 1. Load Trained Model

In [None]:
# Load model
model_path = "../models/sentiment_model.pkl"

if not os.path.exists(model_path):
    print(f"Model file not found at {model_path}")
    print("Please train the model first using the training notebook or script.")
else:
    model = joblib.load(model_path)
    print(f"Model loaded successfully from {model_path}")
    print(f"\nModel pipeline steps:")
    for name, step in model.named_steps.items():
        print(f"  - {name}: {type(step).__name__}")

## 2. Single Text Prediction

In [None]:
# Test with a single text
text = "This product is absolutely amazing! I love it!"

prediction = model.predict([text])[0]
probabilities = model.predict_proba([text])[0]

print(f"Text: {text}")
print(f"\nPrediction: {prediction.upper()}")
print(f"\nProbabilities:")
print(f"  Negative: {probabilities[0]:.4f}")
print(f"  Positive: {probabilities[1]:.4f}")
print(f"\nConfidence: {max(probabilities):.2%}")

## 3. Batch Predictions

In [None]:
# Test with multiple texts
test_texts = [
    "Excellent product! Highly recommend!",
    "Terrible experience. Very disappointed.",
    "Good quality for the price.",
    "Don't waste your money on this.",
    "Best purchase I've made this year!",
    "Poor quality. Broke after one use.",
    "Love it! Works perfectly.",
    "Not worth it. Save your money.",
    "Outstanding performance and value!",
    "Completely useless product."
]

# Make predictions
predictions = model.predict(test_texts)
probabilities = model.predict_proba(test_texts)

# Create results dataframe
results_df = pd.DataFrame({
    'Text': test_texts,
    'Prediction': predictions,
    'Confidence': [max(p) for p in probabilities],
    'Negative_Prob': [p[0] for p in probabilities],
    'Positive_Prob': [p[1] for p in probabilities]
})

# Display results
print("Batch Prediction Results:")
print("=" * 100)
display(results_df)

In [None]:
# Summary statistics
print("\nPrediction Summary:")
print(f"Total texts: {len(test_texts)}")
print(f"Positive predictions: {sum(predictions == 'positive')}")
print(f"Negative predictions: {sum(predictions == 'negative')}")
print(f"\nAverage confidence: {results_df['Confidence'].mean():.2%}")
print(f"Min confidence: {results_df['Confidence'].min():.2%}")
print(f"Max confidence: {results_df['Confidence'].max():.2%}")

## 4. Interactive Prediction

In [None]:
# Interactive prediction function
def predict_sentiment(text: str) -> None:
    """Predict sentiment for a given text."""
    if not text.strip():
        print("Please enter some text.")
        return
    
    prediction = model.predict([text])[0]
    probabilities = model.predict_proba([text])[0]
    confidence = max(probabilities) * 100
    
    # Emoji based on sentiment
    emoji = "ðŸ˜Š" if prediction == "positive" else "ðŸ˜ž"
    
    print("\n" + "="*80)
    print(f"Text: {text}")
    print(f"\n{emoji} Sentiment: {prediction.upper()}")
    print(f"Confidence: {confidence:.1f}%")
    print(f"\nProbabilities:")
    print(f"  Negative: {probabilities[0]:.1%}")
    print(f"  Positive: {probabilities[1]:.1%}")
    print("="*80 + "\n")

# Example usage
predict_sentiment("This is wonderful! I'm so happy with it!")

In [None]:
# Try your own text
# Uncomment and modify the line below to test your own text
# predict_sentiment("Your custom text here")

## 5. Test with Seldon-Compatible Format

In [None]:
# Simulate Seldon input format
def predict_seldon_format(texts: list) -> dict:
    """Make predictions in Seldon-compatible format."""
    predictions = model.predict(texts)
    probabilities = model.predict_proba(texts)
    
    # Format similar to Seldon response
    return {
        "data": {
            "names": ["negative", "positive"],
            "ndarray": predictions.tolist()
        },
        "meta": {
            "probabilities": probabilities.tolist()
        }
    }

# Test
test_input = ["Great product!", "Terrible quality."]
result = predict_seldon_format(test_input)

print("Seldon-format prediction:")
import json
print(json.dumps(result, indent=2))

## 6. Model Information

In [None]:
# Display model information
print("MODEL INFORMATION")
print("=" * 80)
print(f"Model path: {model_path}")
print(f"Model type: {type(model).__name__}")
print(f"\nPipeline steps:")
for i, (name, step) in enumerate(model.named_steps.items(), 1):
    print(f"  {i}. {name}: {type(step).__name__}")
    if hasattr(step, 'get_params'):
        params = step.get_params()
        if name == 'tfidf':
            print(f"     - Max features: {params.get('max_features')}")
            print(f"     - N-gram range: {params.get('ngram_range')}")
        elif name == 'classifier':
            print(f"     - Algorithm: {type(step).__name__}")
print("=" * 80)