In [1]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [2]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('punkt-tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Error loading punkt-tab: Package 'punkt-tab' not found in
[nltk_data]     index


False

In [3]:
reviews = [
    # Positive reviews
    "This product is amazing! I love it so much.",
    "Excellent quality and fast delivery. Highly recommended!",
    "Works perfectly and exceeded my expectations.",
    "Great value for money. Will buy again.",
    "Outstanding product with fantastic features.",
    "Very satisfied with this purchase. Five stars!",
    "Best product I've ever bought. Perfect!",
    "Awesome quality and great customer service.",

    # Negative reviews
    "Terrible product. Waste of money.",
    "Poor quality and stopped working after one day.",
    "Very disappointed with this purchase.",
    "Does not work as described. Avoid this product.",
    "Worst product ever. Complete garbage.",
    "Broken upon arrival. Very angry.",
    "Poor customer service and bad quality.",

    # Neutral reviews
    "The product is okay. Nothing special.",
    "It works but could be better.",
    "Average product for the price.",
    "Not bad, but not great either.",
    "Does the job but has some flaws.",
    "Mixed feelings about this product."
]

sentiments = ['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive',
              'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative',
              'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral']

In [4]:
stemmer = PorterStemmer()
stop_words = set(stopwords.words('english'))


In [5]:
def preprocess_text(text):
  text = text.lower()
  words = word_tokenize(text)
  words = [stemmer.stem(word) for word in words if word not in stop_words]
  return ' '.join(words)

In [7]:
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [9]:
preprocesses_reviews = [preprocess_text(review) for review in reviews]
vectorizer = CountVectorizer(max_features = 1000)
x = vectorizer.fit_transform(preprocesses_reviews)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(x, sentiments, test_size=0.3, random_state=42)

In [11]:
model = MultinomialNB()
model.fit(X_train, y_train)

# Make predictions on test set
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy:.2f}")

# Detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Model Accuracy: 0.14

Classification Report:
              precision    recall  f1-score   support

    negative       0.00      0.00      0.00         2
     neutral       0.00      0.00      0.00         2
    positive       0.20      0.33      0.25         3

    accuracy                           0.14         7
   macro avg       0.07      0.11      0.08         7
weighted avg       0.09      0.14      0.11         7



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [12]:
print("TESTING WITH NEW REVIEWS")
new_reviews = [
    "This is the best product ever! I'm so happy with my purchase.",
    "Terrible quality, broke immediately. Very disappointed.",
    "The product is okay, but I expected more features for the price.",
    "Absolutely love it! Works perfectly and looks great.",
    "Not worth the money. Poor construction and cheap materials.",
    "It's fine, does what it's supposed to but nothing special."
]
new_reviews_preprocessed = [preprocess_text(review) for review in new_reviews]
new_reviews_vectorized = vectorizer.transform(new_reviews_preprocessed)

TESTING WITH NEW REVIEWS


In [13]:
pred = model.predict(new_reviews_vectorized)
probabs = model.predict_proba(new_reviews_vectorized)

In [15]:
sentiment_classes = model.classes_
for i, review in enumerate(new_reviews):
    print(f"\nReview: {review}")
    print(f"Predicted Sentiment: {pred[i]}")


Review: This is the best product ever! I'm so happy with my purchase.
Predicted Sentiment: positive

Review: Terrible quality, broke immediately. Very disappointed.
Predicted Sentiment: negative

Review: The product is okay, but I expected more features for the price.
Predicted Sentiment: positive

Review: Absolutely love it! Works perfectly and looks great.
Predicted Sentiment: positive

Review: Not worth the money. Poor construction and cheap materials.
Predicted Sentiment: negative

Review: It's fine, does what it's supposed to but nothing special.
Predicted Sentiment: neutral
