In [None]:
# 1. Setup and Imports
import pandas as pd
import spacy
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix

# Load models and vectorizer
svm_model = joblib.load('models/svm_model.pkl')
logreg_model = joblib.load('models/logreg_model.pkl')
rf_model = joblib.load('models/rf_model.pkl')
xgb_model = joblib.load('models/xgb_model.pkl')
tfidf_vectorizer = joblib.load('models/tfidf_vectorizer.pkl')

# 2. Load Dataset
# For the notebook, you can load the same dataset used for training
df = pd.DataFrame({
    'text': ['I love this product!', 'This is the worst thing I have ever bought.', ...],  # sample data
    'sentiment': ['positive', 'negative', ...]
})

# 3. Data Preprocessing
# Preprocess the text and apply TF-IDF transformation
nlp = spacy.load('en_core_web_sm')

def preprocess_text_spacy(text):
    doc = nlp(text.lower())
    return ' '.join([token.lemma_ for token in doc if token.is_alpha])

df['cleaned_text'] = df['text'].apply(preprocess_text_spacy)
X = df['cleaned_text']
y = df['sentiment']

X_tfidf = tfidf_vectorizer.transform(X).toarray()

# 4. Model Performance Evaluation

# Predict sentiment using each model
svm_pred = svm_model.predict(X_tfidf)
logreg_pred = logreg_model.predict(X_tfidf)
rf_pred = rf_model.predict(X_tfidf)
xgb_pred = xgb_model.predict(X_tfidf)

# Evaluate models using classification report and confusion matrix
print("SVM Classification Report:\n", classification_report(y, svm_pred))
print("Logistic Regression Classification Report:\n", classification_report(y, logreg_pred))

# Confusion matrix visualization
def plot_confusion_matrix(y_true, y_pred, model_name):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6,6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Negative', 'Positive'], yticklabels=['Negative', 'Positive'])
    plt.title(f'Confusion Matrix: {model_name}')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

# Plot confusion matrices
plot_confusion_matrix(y, svm_pred, "SVM")
plot_confusion_matrix(y, logreg_pred, "Logistic Regression")
plot_confusion_matrix(y, rf_pred, "Random Forest")
plot_confusion_matrix(y, xgb_pred, "XGBoost")

# 5. Model Comparison
# Compare the accuracy of the models
models = ['SVM', 'Logistic Regression', 'Random Forest', 'XGBoost']
accuracy_scores = [
    (svm_pred == y).mean(),
    (logreg_pred == y).mean(),
    (rf_pred == y).mean(),
    (xgb_pred == y).mean()
]

# Visualize model performance
plt.bar(models, accuracy_scores)
plt.xlabel('Model')
plt.ylabel('Accuracy')
plt.title('Model Comparison')
plt.show()

# 6. Predict Custom Text
# Example of how to use the models for a custom text prediction
text = "I absolutely love this product!"
processed_text = preprocess_text_spacy(text)
text_tfidf = tfidf_vectorizer.transform([processed_text]).toarray()

# Predictions from all models
svm_pred = svm_model.predict(text_tfidf)
logreg_pred = logreg_model.predict(text_tfidf)
rf_pred = rf_model.predict(text_tfidf)
xgb_pred = xgb_model.predict(text_tfidf)

predictions = {
    "SVM": svm_pred[0],
    "Logistic Regression": logreg_pred[0],
    "Random Forest": rf_pred[0],
    "XGBoost": xgb_pred[0]
}

print("Predictions for the text:", predictions)
