# Sentiment Analysis with Multiple Models

This Jupyter notebook demonstrates sentiment analysis using various machine learning models, including Support Vector Machine (SVM), Logistic Regression, Random Forest, and XGBoost. The notebook also includes steps for hyperparameter tuning, model training, and evaluation.

In [1]:
!pip install nltk scikit-learn pandas
!pip install spacy
!python -m spacy download en_core_web_sm

In [2]:
# Step 1: Import necessary libraries
import spacy
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [3]:
# Load SpaCy model
nlp = spacy.load('en_core_web_sm')


In [4]:
# Dataset
data = {
    'text': [
        'I love this product!', 'This is the worst thing I have ever bought.', 'I am so happy with my purchase!',
        'Not worth the money.', 'Great value for the price.', 'Amazing quality!', 'Will definitely buy again.',
        'Very disappointed.', 'Highly recommend this!', 'I will never buy this again.', 'Fantastic experience!',
        'The worst customer service ever.', 'Absolutely love it!', 'It broke after a week.', 'Superb quality!',
        'Not happy with the purchase.', 'Perfect for my needs!', 'This was a complete waste of money.',
        'Very satisfied with the result.', 'I would not recommend this to anyone.', 'So easy to use!', 'Terrible quality.',
        'I am really impressed.', 'Could be better.', 'I’m so pleased with this product!', 'Really bad product.',
        'Very useful and practical.', 'Completely unsatisfied.', 'Exceeded my expectations!', 'The quality is awful.',
        'Good product but overpriced.', 'It’s amazing!', 'Waste of time and money.', 'I am in love with this!',
        'Don’t waste your money.', 'Will purchase again.', 'One of the worst things I’ve ever bought.',
        'So convenient and easy to use.', 'Really great value.', 'This is my favorite product!', 'I regret this purchase.',
        'Exceptional service!', 'It broke so quickly.', 'I would not buy this again.', 'Perfect for my family.',
        'Not durable at all.', 'The best I’ve ever used.', 'It’s okay, but could be better.', 'This is a must-have.',
        'Definitely worth the money.', 'This is a piece of junk.', 'Love it so much!', 'Horrible experience.',
        'Just what I needed!', 'It’s a scam!', 'I’m so happy with this.', 'I don’t recommend it.', 'Worth every penny.',
        'Horrible quality.', 'Excellent product!', 'I would definitely not recommend this.', 'Best purchase ever!',
        'Very disappointed with this product.', 'I am thrilled with it!', 'It didn’t work as expected.',
        'Great purchase!', 'This is just what I was looking for.', 'Good, but not great.', 'Amazing performance.',
        'It broke the first time I used it.', 'Love the design!', 'Not worth the price.', 'It’s okay.',
        'Just perfect for me!', 'Very poor quality.', 'Fantastic product!', 'Does not live up to the hype.',
        'A great addition to my collection.', 'Extremely bad purchase.', 'Very happy with this.', 'Horrible.',
        'I can’t stop using it!', 'Really disappointing.', 'Amazing features and performance!', 'This product is terrible.',
        'I absolutely love it!', 'It’s really bad.', 'Totally worth the cost.', 'Poor quality and workmanship.',
        'I use it every day!', 'Not as expected.', 'I recommend this to everyone.', 'Extremely overpriced.',
        'Best decision I ever made!', 'I am unhappy with the quality.', 'Perfect for my needs!', 'Awful.',
        'Highly recommend this product.', 'Wouldn’t buy this again.', 'Incredible product.', 'This is so bad.',
        'So glad I bought it!', 'Very low quality.', 'This product works wonderfully!', 'Would not recommend.',
        'Great buy!', 'Really bad experience.', 'Love it!', 'The product is defective.'
    ],
    'sentiment': [
        'positive', 'negative', 'positive', 'negative', 'positive', 'positive', 'positive', 'negative', 'positive', 'negative',
        'positive', 'negative', 'positive', 'negative', 'positive', 'positive', 'negative', 'positive', 'negative', 'positive',
        'positive', 'negative', 'positive', 'negative', 'positive', 'negative', 'positive', 'negative', 'positive', 'negative',
        'positive', 'positive', 'negative', 'positive', 'negative', 'positive', 'positive', 'negative', 'positive', 'negative',
        'positive', 'negative', 'positive', 'negative', 'positive', 'positive', 'negative', 'positive', 'negative', 'positive',
        'positive', 'negative', 'positive', 'positive', 'positive', 'negative', 'positive', 'positive', 'negative', 'positive',
        'positive', 'negative', 'positive', 'negative', 'positive', 'negative', 'positive', 'positive', 'negative', 'positive',
        'positive', 'negative', 'positive', 'negative', 'positive', 'positive', 'negative', 'positive', 'negative', 'positive',
        'positive', 'negative', 'positive', 'negative'
    ]
}
# Preprocess the dataset
data['text'] = data['text'][:100]
data['sentiment'] = data['sentiment'][:100]

# Convert the 'sentiment' column to numeric labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(data['sentiment'])

# Create DataFrame
df = pd.DataFrame(data)


In [5]:
# Step 3: Split the dataset into train and test sets (80% train, 20% test)
X = df['text']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Preprocess the text using SpaCy
def preprocess_text_spacy(text):
    text = text.lower()
    doc = nlp(text)
    words = [token.lemma_ for token in doc if token.text.isalpha()]
    return ' '.join(words)

# Apply preprocessing
X_train_cleaned = X_train.apply(preprocess_text_spacy)
X_test_cleaned = X_test.apply(preprocess_text_spacy)

# Step 5: Convert text data into numerical features using TF-IDF
vectorizer = TfidfVectorizer(max_features=1000)
X_train_tfidf = vectorizer.fit_transform(X_train_cleaned)
X_test_tfidf = vectorizer.transform(X_test_cleaned)


In [6]:
# Step 6: Train multiple models and evaluate their performance
# Support Vector Machine (SVM)
svm = SVC(kernel='linear', random_state=42)
svm.fit(X_train_tfidf, y_train)
svm_pred = svm.predict(X_test_tfidf)
svm_accuracy = accuracy_score(y_test, svm_pred)
print('SVM Accuracy:', svm_accuracy)
print(classification_report(y_test, svm_pred))

# Logistic Regression
log_reg = LogisticRegression(random_state=42)
log_reg.fit(X_train_tfidf, y_train)
log_reg_pred = log_reg.predict(X_test_tfidf)
log_reg_accuracy = accuracy_score(y_test, log_reg_pred)
print('Logistic Regression Accuracy:', log_reg_accuracy)
print(classification_report(y_test, log_reg_pred))

# Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train_tfidf, y_train)
rf_pred = rf.predict(X_test_tfidf)
rf_accuracy = accuracy_score(y_test, rf_pred)
print('Random Forest Accuracy:', rf_accuracy)
print(classification_report(y_test, rf_pred))

# XGBoost
xgb_model = xgb.XGBClassifier(random_state=42)
xgb_model.fit(X_train_tfidf, y_train)
xgb_pred = xgb_model.predict(X_test_tfidf)
xgb_accuracy = accuracy_score(y_test, xgb_pred)
print('XGBoost Accuracy:', xgb_accuracy)
print(classification_report(y_test, xgb_pred))