# Sentimental Analyzing Based on Product Review

In [37]:
import pandas as pd
import numpy as np

# Importing the Data

In [38]:
df = pd.read_csv(r"C:\Users\rajes\Downloads\Produc_review.csv")
df.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [39]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 900 entries, 0 to 899
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Review  900 non-null    object
 1   Liked   900 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 14.2+ KB


# Removing Expression and stop Words

In [40]:
import re 
length = len(df['Review'])

stopwords =set(['the', 'is', 'in', 'and', 'to', 'a', 'of', 'for', 'it', 'not', 'this', 'that', 'with', 'as', 'on', 'at', 'by', 'an', 'be'])

clean_review = []
for i in range(length):
    review = re.sub('[^a-zA-Z]',' ',df['Review'][i])
    review = review.lower()
    review = review.split()
    review = [ word for word in review if word not in stopwords]
    review = ' '.join(review)
    clean_review.append(review)
    
df['Clean review'] = clean_review
df.head(3)

Unnamed: 0,Review,Liked,Clean review
0,Wow... Loved this place.,1,wow loved place
1,Crust is not good.,0,crust good
2,Not tasty and the texture was just nasty.,0,tasty texture was just nasty


# Using naive bayes for probability Classification

In [41]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import GradientBoostingClassifier, StackingClassifier
from sklearn.model_selection import RandomizedSearchCV, KFold
from sklearn.model_selection import train_test_split
import numpy as np

X = df['Clean review']
y = df['Liked']

vectorizer = CountVectorizer()
X_vec = vectorizer.fit_transform(X).toarray()


X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)

param_distributions = {
    'alpha': np.logspace(-4, 4, 10),
    'fit_prior': [True, False]
}

kf = KFold(n_splits=6, shuffle=True, random_state=42)

mnb = MultinomialNB()
random_search = RandomizedSearchCV(mnb, param_distributions=param_distributions, cv=kf, scoring='accuracy', verbose=1, random_state=42)
random_search.fit(X_train, y_train)
best_nb_model = random_search.best_estimator_

print("Best Naive Bayes Model Parameters:", random_search.best_params_)


gbr = GradientBoostingClassifier()
gbr_param_distributions = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}
gbr_random_search = RandomizedSearchCV(gbr, param_distributions=gbr_param_distributions, cv=kf, scoring='accuracy', verbose=1, random_state=42)
gbr_random_search.fit(X_train, y_train)
best_gbr_model = gbr_random_search.best_estimator_

print("Best Gradient Boosting Model Parameters:", gbr_random_search.best_params_)


stacking_clf = StackingClassifier(
    estimators=[('mnb', best_nb_model), ('gbr', best_gbr_model)],
    final_estimator=GradientBoostingClassifier() 
)



stacking_clf.fit(X_train, y_train)
y_pred = stacking_clf.predict(X_test)




Fitting 6 folds for each of 10 candidates, totalling 60 fits
Best Naive Bayes Model Parameters: {'fit_prior': True, 'alpha': 0.3593813663804626}
Fitting 6 folds for each of 10 candidates, totalling 60 fits
Best Gradient Boosting Model Parameters: {'n_estimators': 300, 'max_depth': 7, 'learning_rate': 0.1}


# Cross Valiadtion

In [42]:
from sklearn.model_selection import cross_val_score

stacking_clf_cv_scores = cross_val_score(stacking_clf, X_train, y_train, cv=kf, scoring='accuracy')

print(f"Stacking Classifier CV Scores: {stacking_clf_cv_scores}")
print(f"Mean : {np.mean(stacking_clf_cv_scores):.2f}")

Stacking Classifier CV Scores: [0.7        0.79166667 0.71666667 0.75       0.73333333 0.725     ]
Mean : 0.74


# Model Evaluation

In [43]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")
acc_per = accuracy * 100
print(f"Accuracy Percentage: {acc_per:.2f}")

print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)


Accuracy: 0.77
Precision: 0.80
Recall: 0.76
F1-score: 0.78
Accuracy Percentage: 76.67

Confusion Matrix:
[[64 18]
 [24 74]]

Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.78      0.75        82
           1       0.80      0.76      0.78        98

    accuracy                           0.77       180
   macro avg       0.77      0.77      0.77       180
weighted avg       0.77      0.77      0.77       180



# Adding new Reviews for new Sentimental Analyzing

In [44]:
def predict_sentiment(review):
    review_cleaned = re.sub('[^a-zA-Z]', ' ', review).lower().split()
    review_cleaned = ' '.join([word for word in review_cleaned if word not in stopwords])
    review_vec = vectorizer.transform([review_cleaned]).toarray()
    prediction = stacking_clf.predict(review_vec)
    return 'Positive' if prediction[0] == 1 else 'Negative'

new_review = "I absolutely love this product"
print(f"{new_review} - Sentiment: {predict_sentiment(new_review)}")

new_review = "This was a terrible purchase"
print(f"{new_review} - Sentiment: {predict_sentiment(new_review)}")

I absolutely love this product - Sentiment: Positive
This was a terrible purchase - Sentiment: Negative
