In [1]:
# Import required libraries
import os

import joblib
import pandas as pd
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

print("Libraries imported successfully!")

Libraries imported successfully!


## 1. Load Trained Model

In [2]:
# Load model
model_path = "../models/sentiment_model.pkl"

if not os.path.exists(model_path):
    print(f"Model file not found at {model_path}")
    print("Please train the model first using the training notebook or script.")
else:
    model = joblib.load(model_path)
    print(f"Model loaded successfully from {model_path}")
    print("\nModel pipeline steps:")
    for name, step in model.named_steps.items():
        print(f"  - {name}: {type(step).__name__}")

Model loaded successfully from ../models/sentiment_model.pkl

Model pipeline steps:
  - tfidf: TfidfVectorizer
  - classifier: LogisticRegression


## 2. Single Text Prediction

In [3]:
# Test with a single text
text = "This product is absolutely amazing! I love it!"

prediction = model.predict([text])[0]
probabilities = model.predict_proba([text])[0]

print(f"Text: {text}")
print(f"\nPrediction: {prediction.upper()}")
print("\nProbabilities:")
print(f"  Negative: {probabilities[0]:.4f}")
print(f"  Positive: {probabilities[1]:.4f}")
print(f"\nConfidence: {max(probabilities):.2%}")

Text: This product is absolutely amazing! I love it!

Prediction: POSITIVE

Probabilities:
  Negative: 0.0170
  Positive: 0.0170

Confidence: 96.60%


## 3. Batch Predictions

In [4]:
# Test with multiple texts
test_texts = [
    "Excellent product! Highly recommend!",
    "Terrible experience. Very disappointed.",
    "Good quality for the price.",
    "Don't waste your money on this.",
    "Best purchase I've made this year!",
    "Poor quality. Broke after one use.",
    "Love it! Works perfectly.",
    "Not worth it. Save your money.",
    "Outstanding performance and value!",
    "Completely useless product."
]

# Make predictions
predictions = model.predict(test_texts)
probabilities = model.predict_proba(test_texts)

# Create results dataframe
results_df = pd.DataFrame({
    'Text': test_texts,
    'Prediction': predictions,
    'Confidence': [max(p) for p in probabilities],
    'Negative_Prob': [p[0] for p in probabilities],
    'Positive_Prob': [p[1] for p in probabilities]
})

# Display results
print("Batch Prediction Results:")
print("=" * 100)
display(results_df)

Batch Prediction Results:


Unnamed: 0,Text,Prediction,Confidence,Negative_Prob,Positive_Prob
0,Excellent product! Highly recommend!,positive,0.778449,0.143564,0.077987
1,Terrible experience. Very disappointed.,negative,0.856062,0.856062,0.089819
2,Good quality for the price.,neutral,0.636698,0.199373,0.636698
3,Don't waste your money on this.,negative,0.88647,0.88647,0.054752
4,Best purchase I've made this year!,positive,0.900833,0.049708,0.049459
5,Poor quality. Broke after one use.,negative,0.908097,0.908097,0.03364
6,Love it! Works perfectly.,positive,0.722881,0.09847,0.178649
7,Not worth it. Save your money.,negative,0.817878,0.817878,0.058647
8,Outstanding performance and value!,positive,0.842422,0.080375,0.077204
9,Completely useless product.,negative,0.622108,0.622108,0.1859


In [5]:
# Summary statistics
print("\nPrediction Summary:")
print(f"Total texts: {len(test_texts)}")
print(f"Positive predictions: {sum(predictions == 'positive')}")
print(f"Negative predictions: {sum(predictions == 'negative')}")
print(f"\nAverage confidence: {results_df['Confidence'].mean():.2%}")
print(f"Min confidence: {results_df['Confidence'].min():.2%}")
print(f"Max confidence: {results_df['Confidence'].max():.2%}")


Prediction Summary:
Total texts: 10
Positive predictions: 4
Negative predictions: 5

Average confidence: 79.72%
Min confidence: 62.21%
Max confidence: 90.81%


## 4. Interactive Prediction

In [6]:
# Interactive prediction function
def predict_sentiment(text: str) -> None:
    """Predict sentiment for a given text."""
    if not text.strip():
        print("Please enter some text.")
        return

    prediction = model.predict([text])[0]
    probabilities = model.predict_proba([text])[0]
    confidence = max(probabilities) * 100

    # Emoji based on sentiment
    emoji = "ðŸ˜Š" if prediction == "positive" else "ðŸ˜ž"

    print("\n" + "="*80)
    print(f"Text: {text}")
    print(f"\n{emoji} Sentiment: {prediction.upper()}")
    print(f"Confidence: {confidence:.1f}%")
    print("\nProbabilities:")
    print(f"  Negative: {probabilities[0]:.1%}")
    print(f"  Positive: {probabilities[1]:.1%}")
    print("="*80 + "\n")

# Example usage
predict_sentiment("This is wonderful! I'm so happy with it!")


Text: This is wonderful! I'm so happy with it!

ðŸ˜Š Sentiment: POSITIVE
Confidence: 77.3%

Probabilities:
  Negative: 10.8%
  Positive: 12.0%



In [7]:
# Try your own text
# Uncomment and modify the line below to test your own text
# predict_sentiment("Your custom text here")

## 5. Test with Seldon-Compatible Format

In [8]:
# Simulate Seldon input format
def predict_seldon_format(texts: list) -> dict:
    """Make predictions in Seldon-compatible format."""
    predictions = model.predict(texts)
    probabilities = model.predict_proba(texts)

    # Format similar to Seldon response
    return {
        "data": {
            "names": ["negative", "positive"],
            "ndarray": predictions.tolist()
        },
        "meta": {
            "probabilities": probabilities.tolist()
        }
    }

# Test
test_input = ["Great product!", "Terrible quality."]
result = predict_seldon_format(test_input)

print("Seldon-format prediction:")
import json

print(json.dumps(result, indent=2))

Seldon-format prediction:
{
  "data": {
    "names": [
      "negative",
      "positive"
    ],
    "ndarray": [
      "neutral",
      "negative"
    ]
  },
  "meta": {
    "probabilities": [
      [
        0.11396328636917727,
        0.48745992575282143,
        0.3985767878780013
      ],
      [
        0.8940487369631132,
        0.048701224693808345,
        0.05725003834307856
      ]
    ]
  }
}


## 6. Model Information

In [9]:
# Display model information
print("MODEL INFORMATION")
print("=" * 80)
print(f"Model path: {model_path}")
print(f"Model type: {type(model).__name__}")
print("\nPipeline steps:")
for i, (name, step) in enumerate(model.named_steps.items(), 1):
    print(f"  {i}. {name}: {type(step).__name__}")
    if hasattr(step, 'get_params'):
        params = step.get_params()
        if name == 'tfidf':
            print(f"     - Max features: {params.get('max_features')}")
            print(f"     - N-gram range: {params.get('ngram_range')}")
        elif name == 'classifier':
            print(f"     - Algorithm: {type(step).__name__}")
print("=" * 80)

MODEL INFORMATION
Model path: ../models/sentiment_model.pkl
Model type: Pipeline

Pipeline steps:
  1. tfidf: TfidfVectorizer
     - Max features: 5000
     - N-gram range: (1, 2)
  2. classifier: LogisticRegression
     - Algorithm: LogisticRegression
