In [13]:
# Sentiment Analysis of Product Reviews

# Objective:
#This project aims to classify customer reviews into positive, negative, or neutral categories using a hybrid sentiment analysis approach. By leveraging Natural Language Processing (NLP) techniques, the goal is to help businesses better understand customer attitudes and enhance their products and services.

In [21]:

""" **Project Workflow:**

1. **Data Collection**
2. **Data Preprocessing**
3. **Sentiment Labeling (Hybrid Approach)**
4. **Feature Engineering**
5. **Model Training & Evaluation**
6. **Prediction & Testing** """

' **Project Workflow:**\n\n1. **Data Collection**\n2. **Data Preprocessing**\n3. **Sentiment Labeling (Hybrid Approach)**\n4. **Feature Engineering**\n5. **Model Training & Evaluation**\n6. **Prediction & Testing** '

In [23]:
import pandas as pd

In [25]:
# Example Product Reviews Data
reviews_data = {
    'review': [
        "The product is fantastic and works perfectly.",
        "Absolutely horrible experience, will never buy again!",
        "It's okay, not too good but not bad either.",
        "This is the best purchase I've made this year!",
        "Waste of money, the item broke within a week.",
        "Decent performance but overpriced.",
        "Fast delivery and excellent service.",
        "Terrible customer service experience.",
    ]
}

In [29]:
df = pd.DataFrame(reviews_data)
df

Unnamed: 0,review
0,The product is fantastic and works perfectly.
1,"Absolutely horrible experience, will never buy..."
2,"It's okay, not too good but not bad either."
3,This is the best purchase I've made this year!
4,"Waste of money, the item broke within a week."
5,Decent performance but overpriced.
6,Fast delivery and excellent service.
7,Terrible customer service experience.


In [31]:
df.head()

Unnamed: 0,review
0,The product is fantastic and works perfectly.
1,"Absolutely horrible experience, will never buy..."
2,"It's okay, not too good but not bad either."
3,This is the best purchase I've made this year!
4,"Waste of money, the item broke within a week."


In [37]:
import re
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to C:\Users\Manish
[nltk_data]     Tailor\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [39]:
# Function to clean text
def clean_text(text):
    # Remove punctuation and special characters
    text = re.sub(r'[^a-zA-Z ]', '', text)
    # Convert to lowercase
    text = text.lower()
    # Remove stopwords
    text = ' '.join(word for word in text.split() if word not in stop_words)
    return text

# Apply cleaning function
df['cleaned_review'] = df['review'].apply(clean_text)
print("Cleaned Reviews:\n", df[['review', 'cleaned_review']].head())

Cleaned Reviews:
                                               review  \
0      The product is fantastic and works perfectly.   
1  Absolutely horrible experience, will never buy...   
2        It's okay, not too good but not bad either.   
3     This is the best purchase I've made this year!   
4      Waste of money, the item broke within a week.   

                             cleaned_review  
0         product fantastic works perfectly  
1  absolutely horrible experience never buy  
2                      okay good bad either  
3               best purchase ive made year  
4        waste money item broke within week  


In [41]:
# **3. Sentiment Labeling (Hybrid Approach)**

#**Rule-Based Sentiment Analysis:**
"""We use TextBlob to assign a sentiment polarity score.
- Positive score: `positive`
- Negative score: `negative`
- Near-zero score: `neutral`"""

'We use TextBlob to assign a sentiment polarity score.\n- Positive score: `positive`\n- Negative score: `negative`\n- Near-zero score: `neutral`'

In [47]:
!pip install textblob
from textblob import TextBlob

def get_sentiment(text):
    sentiment = TextBlob(text).sentiment.polarity
    if sentiment > 0.1:
        return 'positive'
    elif sentiment < -0.1:
        return 'negative'
    else:
        return 'neutral'

Collecting textblob
  Downloading textblob-0.19.0-py3-none-any.whl.metadata (4.4 kB)
Collecting nltk>=3.9 (from textblob)
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Downloading textblob-0.19.0-py3-none-any.whl (624 kB)
   ---------------------------------------- 0.0/624.3 kB ? eta -:--:--
    --------------------------------------- 10.2/624.3 kB ? eta -:--:--
   -- ------------------------------------ 41.0/624.3 kB 653.6 kB/s eta 0:00:01
   ----- --------------------------------- 92.2/624.3 kB 871.5 kB/s eta 0:00:01
   --------- ------------------------------ 153.6/624.3 kB 1.0 MB/s eta 0:00:01
   ------------- -------------------------- 204.8/624.3 kB 1.0 MB/s eta 0:00:01
   ----------------- ---------------------- 276.5/624.3 kB 1.1 MB/s eta 0:00:01
   -------------------- ------------------- 327.7/624.3 kB 1.1 MB/s eta 0:00:01
   ------------------------ --------------- 389.1/624.3 kB 1.2 MB/s eta 0:00:01
   ---------------------------- ----------- 450.6/624.3 kB 1.

In [49]:
# Apply sentiment labeling
df['sentiment'] = df['review'].apply(get_sentiment)
print("Dataset with Sentiment Labels:\n", df)

Dataset with Sentiment Labels:
                                               review  \
0      The product is fantastic and works perfectly.   
1  Absolutely horrible experience, will never buy...   
2        It's okay, not too good but not bad either.   
3     This is the best purchase I've made this year!   
4      Waste of money, the item broke within a week.   
5                 Decent performance but overpriced.   
6               Fast delivery and excellent service.   
7              Terrible customer service experience.   

                             cleaned_review sentiment  
0         product fantastic works perfectly  positive  
1  absolutely horrible experience never buy  negative  
2                      okay good bad either  positive  
3               best purchase ive made year  positive  
4        waste money item broke within week  negative  
5             decent performance overpriced  positive  
6           fast delivery excellent service  positive  
7      terrible

In [55]:
# **4. Feature Engineering (TF-IDF Vectorization)**

#We use TF-IDF to convert textual data into numerical features suitable for machine learning models.
from sklearn.feature_extraction.text import TfidfVectorizer

# Vectorize the cleaned text
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['cleaned_review'])
y = df['sentiment']

In [61]:
# **5. Model Training & Evaluation:**

#We use the Naive Bayes classifier for text classification.
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train the model
model = MultinomialNB()
model.fit(X_train, y_train)

In [63]:
# Evaluate the model
y_pred = model.predict(X_test)
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Confusion Matrix:
 [[0 1]
 [0 2]]
Classification Report:
               precision    recall  f1-score   support

    negative       0.00      0.00      0.00         1
    positive       0.67      1.00      0.80         2

    accuracy                           0.67         3
   macro avg       0.33      0.50      0.40         3
weighted avg       0.44      0.67      0.53         3



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [71]:
# **6. Prediction & Testing:**
#Test the model with new product reviews.
# Example test reviews
sample_reviews = [
    "The delivery was late, but the product quality is superb.",
    "Horrible service and defective product.",
    "It's an okay purchase, nothing special.",
]

In [77]:
# Clean and vectorize the sample reviews
sample_cleaned = [clean_text(text) for text in sample_reviews]
sample_vectorized = vectorizer.transform(sample_cleaned)

In [79]:
# Make predictions
sample_predictions = model.predict(sample_vectorized)
for review, sentiment in zip(sample_reviews, sample_predictions):
    print(f"Review: '{review}' => Predicted Sentiment: {sentiment}")

Review: 'The delivery was late, but the product quality is superb.' => Predicted Sentiment: positive
Review: 'Horrible service and defective product.' => Predicted Sentiment: positive
Review: 'It's an okay purchase, nothing special.' => Predicted Sentiment: positive


In [83]:
# **Future Enhancements:**
'''1. Use advanced models like Support Vector Machines (SVM) or BERT for better accuracy.
2. Perform hyperparameter tuning to improve model performance.
3. Create a web interface for businesses to input reviews and get instant sentiment feedback.
4. Visualize sentiment trends using dashboards.'''

#This project demonstrates the use of NLP techniques and hybrid sentiment analysis to classify customer feedback, helping businesses gain insights to improve products and services.


'1. Use advanced models like Support Vector Machines (SVM) or BERT for better accuracy.\n2. Perform hyperparameter tuning to improve model performance.\n3. Create a web interface for businesses to input reviews and get instant sentiment feedback.\n4. Visualize sentiment trends using dashboards.'