In [1]:
!pip install nltk scikit-learn pandas
!pip install spacy
!python -m spacy download en_core_web_sm

In [2]:
import spacy
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
nlp = spacy.load('en_core_web_sm')

In [3]:
data = {
    'text': [
        'I love this product!', 'This is the worst thing I have ever bought.', 'I am so happy with my purchase!',
        'Not worth the money.', 'Great value for the price.', 'Amazing quality!', 'Will definitely buy again.',
        'Very disappointed.', 'Highly recommend this!', 'I will never buy this again.', 'Fantastic experience!',
        'The worst customer service ever.', 'Absolutely love it!', 'It broke after a week.', 'Superb quality!',
        'Not happy with the purchase.', 'Perfect for my needs!', 'This was a complete waste of money.',
        'Very satisfied with the result.', 'I would not recommend this to anyone.', 'So easy to use!', 'Terrible quality.',
        'I am really impressed.', 'Could be better.', 'I’m so pleased with this product!', 'Really bad product.',
        'Very useful and practical.', 'Completely unsatisfied.', 'Exceeded my expectations!', 'The quality is awful.',
        'Good product but overpriced.', 'It’s amazing!', 'Waste of time and money.', 'I am in love with this!',
        'Don’t waste your money.', 'Will purchase again.', 'One of the worst things I’ve ever bought.',
        'So convenient and easy to use.', 'Really great value.', 'This is my favorite product!', 'I regret this purchase.',
        'Exceptional service!', 'It broke so quickly.', 'I would not buy this again.', 'Perfect for my family.',
        'Not durable at all.', 'The best I’ve ever used.', 'It’s okay, but could be better.', 'This is a must-have.',
        'Definitely worth the money.', 'This is a piece of junk.', 'Love it so much!', 'Horrible experience.',
        'Just what I needed!', 'It’s a scam!', 'I’m so happy with this.', 'I don’t recommend it.', 'Worth every penny.',
        'Horrible quality.', 'Excellent product!', 'I would definitely not recommend this.', 'Best purchase ever!',
        'Very disappointed with this product.', 'I am thrilled with it!', 'It didn’t work as expected.',
        'Great purchase!', 'This is just what I was looking for.', 'Good, but not great.', 'Amazing performance.',
        'It broke the first time I used it.', 'Love the design!', 'Not worth the price.', 'It’s okay.',
        'Just perfect for me!', 'Very poor quality.', 'Fantastic product!', 'Does not live up to the hype.',
        'A great addition to my collection.', 'Extremely bad purchase.', 'Very happy with this.', 'Horrible.',
        'I can’t stop using it!', 'Really disappointing.', 'Amazing features and performance!', 'This product is terrible.',
        'I absolutely love it!', 'It’s really bad.', 'Totally worth the cost.', 'Poor quality and workmanship.',
        'I use it every day!', 'Not as expected.', 'I recommend this to everyone.', 'Extremely overpriced.',
        'Best decision I ever made!', 'I am unhappy with the quality.', 'Perfect for my needs!', 'Awful.',
        'Highly recommend this product.', 'Wouldn’t buy this again.', 'Incredible product.', 'This is so bad.',
        'So glad I bought it!', 'Very low quality.', 'This product works wonderfully!', 'Would not recommend.',
        'Great buy!', 'Really bad experience.', 'Love it!', 'The product is defective.'
    ],
    'sentiment': [
        'positive', 'negative', 'positive', 'negative', 'positive', 'positive', 'positive', 'negative', 'positive', 'negative',
        'positive', 'negative', 'positive', 'negative', 'positive', 'positive', 'negative', 'positive', 'negative', 'positive',
        'positive', 'negative', 'positive', 'negative', 'positive', 'negative', 'positive', 'negative', 'positive', 'negative',
        'positive', 'positive', 'negative', 'positive', 'negative', 'positive', 'positive', 'negative', 'positive', 'negative',
        'positive', 'negative', 'positive', 'negative', 'positive', 'positive', 'negative', 'positive', 'negative', 'positive',
        'positive', 'negative', 'positive', 'positive', 'positive', 'negative', 'positive', 'positive', 'negative', 'positive',
        'positive', 'negative', 'positive', 'negative', 'positive', 'positive', 'negative', 'positive', 'negative', 'positive',
        'positive', 'negative', 'positive', 'negative'
    ]
}
data['text'] = data['text'][:100]
data['sentiment'] = data['sentiment'][:100]


In [4]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(data['sentiment'])
df = pd.DataFrame(data)
X = df['text']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
def preprocess_text_spacy(text):
    text = text.lower()
    doc = nlp(text)
    words = [token.lemma_ for token in doc if token.text.isalpha()]
    return ' '.join(words)
X_train_cleaned = X_train.apply(preprocess_text_spacy)
X_test_cleaned = X_test.apply(preprocess_text_spacy)
tfidf = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf.fit_transform(X_train_cleaned).toarray()
X_test_tfidf = tfidf.transform(X_test_cleaned).toarray()

In [5]:
param_grid_svm = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}
grid_search_svm = GridSearchCV(SVC(), param_grid_svm, cv=5)
grid_search_svm.fit(X_train_tfidf, y_train)
best_params_svm = grid_search_svm.best_params_
y_pred_svm = grid_search_svm.predict(X_test_tfidf)
print('SVM Model Accuracy:', accuracy_score(y_test, y_pred_svm))
print('SVM Confusion Matrix:', confusion_matrix(y_test, y_pred_svm))
print('SVM Classification Report:', classification_report(y_test, y_pred_svm))

In [6]:
param_grid_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}
grid_search_rf = GridSearchCV(RandomForestClassifier(), param_grid_rf, cv=5)
grid_search_rf.fit(X_train_tfidf, y_train)
best_params_rf = grid_search_rf.best_params_
y_pred_rf = grid_search_rf.predict(X_test_tfidf)
print('Random Forest Model Accuracy:', accuracy_score(y_test, y_pred_rf))
print('Random Forest Confusion Matrix:', confusion_matrix(y_test, y_pred_rf))
print('Random Forest Classification Report:', classification_report(y_test, y_pred_rf))

In [7]:
param_grid_lr = {
    'C': [0.1, 1, 10],
    'solver': ['liblinear', 'saga']
}
grid_search_lr = GridSearchCV(LogisticRegression(), param_grid_lr, cv=5)
grid_search_lr.fit(X_train_tfidf, y_train)
best_params_lr = grid_search_lr.best_params_
y_pred_lr = grid_search_lr.predict(X_test_tfidf)
print('Logistic Regression Model Accuracy:', accuracy_score(y_test, y_pred_lr))
print('Logistic Regression Confusion Matrix:', confusion_matrix(y_test, y_pred_lr))
print('Logistic Regression Classification Report:', classification_report(y_test, y_pred_lr))

In [8]:
param_grid_xgb = {
    'n_estimators': [50, 100],
    'learning_rate': [0.01, 0.1],
    'max_depth': [3, 6, 10]
}
grid_search_xgb = GridSearchCV(xgb.XGBClassifier(), param_grid_xgb, cv=5)
grid_search_xgb.fit(X_train_tfidf, y_train)
best_params_xgb = grid_search_xgb.best_params_
y_pred_xgb = grid_search_xgb.predict(X_test_tfidf)
print('XGBoost Model Accuracy:', accuracy_score(y_test, y_pred_xgb))
print('XGBoost Confusion Matrix:', confusion_matrix(y_test, y_pred_xgb))
print('XGBoost Classification Report:', classification_report(y_test, y_pred_xgb))