# Sentimental Analyzing Based on Product Review

In [79]:
import pandas as pd
import numpy as np

# Importing the Data

In [80]:
df = pd.read_csv(r"C:\Users\rajes\Downloads\Produc_review.csv")
# df.head()

In [81]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 900 entries, 0 to 899
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Review  900 non-null    object
 1   Liked   900 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 14.2+ KB


# Removing Expression and stop Words

In [82]:
import re 
length = len(df['Review'])

stopwords =set(['the', 'is', 'in', 'and', 'to', 'a', 'of', 'for', 'it', 'not', 'this', 'that', 'with', 'as', 'on', 'at', 'by', 'an', 'be'])

clean_review = []
for i in range(length):
    review = re.sub('[^a-zA-Z]',' ',df['Review'][i])
    review = review.lower()
    review = review.split()
    review = [ word for word in review if word not in stopwords]
    review = ' '.join(review)
    clean_review.append(review)
    
df['Clean review'] = clean_review
df.head(3)

Unnamed: 0,Review,Liked,Clean review
0,Wow... Loved this place.,1,wow loved place
1,Crust is not good.,0,crust good
2,Not tasty and the texture was just nasty.,0,tasty texture was just nasty


# Using naive bayes for probability Classification

In [92]:
from sklearn.naive_bayes import GaussianNB
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,f1_score,precision_score,recall_score,confusion_matrix,classification_report

X = df['Clean review']
y = df['Liked']

# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X)

tfidf = TfidfVectorizer()
X_tfidf = tfidf.fit_transform(X).toarray()

X_train,X_test,y_train,y_test = train_test_split(X_tfidf,y, test_size = 0.2,random_state = 42)

nb = GaussianNB()
nb.fit(X_train,y_train)

y_pred = nb.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_test)  
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")
acc_per  = accuracy * 100
print(f"Accuracy: {acc_per:.2f}")

print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)


Accuracy: 0.70
Precision: 1.00
Recall: 0.67
F1-score: 0.71
Accuracy: 70.00

Confusion Matrix:
[[60 22]
 [32 66]]

Classification Report:
              precision    recall  f1-score   support

           0       0.65      0.73      0.69        82
           1       0.75      0.67      0.71        98

    accuracy                           0.70       180
   macro avg       0.70      0.70      0.70       180
weighted avg       0.71      0.70      0.70       180



# Adding new Reviews for new Sentimental Analyzing

In [97]:
def predict_sentiment(review):
    review_cleaned = re.sub('[^a-zA-Z]', ' ', review).lower().split()
    review_cleaned = ' '.join([word for word in review_cleaned if word not in stopwords])
    review_tfidf = tfidf.transform([review_cleaned]).toarray()
    prediction = nb.predict(review_tfidf)
    return 'Positive' if prediction[0] == 1 else 'Negative'

new_review = "I absolutely love this product"
print(f"{new_review} - Sentiment: {predict_sentiment(new_review)}")

new_review = "This was a terrible purchase"
print(f"{new_review} - Sentiment: {predict_sentiment(new_review)}")

I absolutely love this product - Sentiment: Positive
This was a terrible purchase - Sentiment: Negative
