In [3]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
import re
import pickle

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, classification_report


from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

plt.style.use('bmh')
sns.set_style('darkgrid')

In [4]:
df = pd.read_csv("reviews_badminton.csv")
df.head()


Unnamed: 0,Reviewer Name,Review Title,Place of Review,Up Votes,Down Votes,Month,Review text,Ratings
0,Kamal Suresh,Nice product,"Certified Buyer, Chirakkal",889.0,64.0,Feb 2021,"Nice product, good quality, but price is now r...",4
1,Flipkart Customer,Don't waste your money,"Certified Buyer, Hyderabad",109.0,6.0,Feb 2021,They didn't supplied Yonex Mavis 350. Outside ...,1
2,A. S. Raja Srinivasan,Did not meet expectations,"Certified Buyer, Dharmapuri",42.0,3.0,Apr 2021,Worst product. Damaged shuttlecocks packed in ...,1
3,Suresh Narayanasamy,Fair,"Certified Buyer, Chennai",25.0,1.0,,"Quite O. K. , but nowadays the quality of the...",3
4,ASHIK P A,Over priced,,147.0,24.0,Apr 2016,Over pricedJust â?¹620 ..from retailer.I didn'...,1


In [5]:
df = df[["Review Title", "Review text", "Ratings"]]
df.shape


(8518, 3)

In [6]:
def create_sentiment(rating):
    if rating >= 4:
        return 1
    elif rating <= 2:
        return 0
    else:
        return np.nan

df["sentiment"] = df["Ratings"].apply(create_sentiment)
df = df.dropna(subset=["sentiment"])
df["sentiment"] = df["sentiment"].astype(int)

df["sentiment"].value_counts()


sentiment
1    6826
0    1077
Name: count, dtype: int64

In [7]:
df["review"] = (
    df["Review Title"].fillna("") + " " + df["Review text"].fillna("")
)

df[["review", "sentiment"]].head()


Unnamed: 0,review,sentiment
0,"Nice product Nice product, good quality, but p...",1
1,Don't waste your money They didn't supplied Yo...,0
2,Did not meet expectations Worst product. Damag...,0
4,Over priced Over pricedJust â?¹620 ..from reta...,0
5,Mind-blowing purchase Good quality product. De...,1


In [8]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r"read more", "", text)
    text = re.sub(r"[^a-z\s]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

df["clean_review"] = df["review"].apply(clean_text)
df["clean_review"].head()


0    nice product nice product good quality but pri...
1    don t waste your money they didn t supplied yo...
2    did not meet expectations worst product damage...
4    over priced over pricedjust from retailer i di...
5    mind blowing purchase good quality product del...
Name: clean_review, dtype: object

In [9]:
X = df["clean_review"]
y = df["sentiment"]

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

len(X_train), len(X_test)


(6322, 1581)

In [10]:
vectorizer = TfidfVectorizer(
    max_features=6000,
    ngram_range=(1, 2),
    min_df=4
)

X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


In [11]:
model = LogisticRegression(max_iter=1200)
model.fit(X_train_vec, y_train)


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1200


In [12]:
y_pred = model.predict(X_test_vec)

print("F1 Score:", f1_score(y_test, y_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))


F1 Score: 0.9610853266690468

Classification Report:

              precision    recall  f1-score   support

           0       0.86      0.59      0.70       215
           1       0.94      0.99      0.96      1366

    accuracy                           0.93      1581
   macro avg       0.90      0.79      0.83      1581
weighted avg       0.93      0.93      0.93      1581



In [13]:
with open("sentiment_model.pkl", "wb") as f:
    pickle.dump((vectorizer, model), f)

print("Model saved as sentiment_model.pkl")


Model saved as sentiment_model.pkl


In [14]:
def predict_sentiment(review):
    review_clean = clean_text(review)
    vec = vectorizer.transform([review_clean])
    pred = model.predict(vec)[0]
    return "Positive" if pred == 1 else "Negative"

predict_sentiment("Very bad quality, not worth the money")


'Negative'