In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


In [26]:
df = pd.read_csv("womens_clothing_ecommerce_reviews.csv")
print(df.head())

                                         Review Text  sentiment
0  Absolutely wonderful - silky and sexy and comf...          1
1  Love this dress!  it's sooo pretty.  i happene...          1
2  I love, love, love this jumpsuit. it's fun, fl...          1
3  This shirt is very flattering to all due to th...          1
4  I love tracy reese dresses, but this one is no...         -1


In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19818 entries, 0 to 19817
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Review Text  19818 non-null  object
 1   sentiment    19818 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 309.8+ KB


In [28]:
X = df['Review Text']
y = df['sentiment']

#'satisfy=y' ensures that the propoption of positive and negative
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("\nConverting text to numericsl features using Bag-of-words...")

vectorizer = CountVectorizer(stop_words='english')

X_train_bow = vectorizer.fit_transform(X_train)

X_test_bow = vectorizer.transform(X_test)

print("Text successfully converted to feature vectors")




Converting text to numericsl features using Bag-of-words...
Text successfully converted to feature vectors


In [29]:
model = LogisticRegression(max_iter=2000)

model.fit(X_train_bow, y_train)
print("Model training complete")

Model training complete


In [30]:
y_pred = model.predict(X_test_bow)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f} ({accuracy:.2%})")

Model Accuracy: 0.9294 (92.94%)


In [34]:
reviews = [
    "I have ordered twice now and both times the clothes exceeded my expectations. The fabric is soft, the colors are vibrant, and the sizing was spot on. I love that I can get trendy outfits without breaking the bank. Will definitely order again!",
    "My dress arrived with a tiny defect, but when I contacted support, they responded quickly and sent me a replacement in just a few days. The new one was perfect. Super impressed with how they handled it!",
    "I ordered a blouse that looked silky and elegant in the pictures, but in person, the material felt cheap and the fit was off. Disappointed, especially since I was really looking forward to wearing it for a special event.",
    "Placed an order over 3 weeks ago and still haven’t received it. I emailed customer service twice with no reply. Very frustrating experience. I won’t be shopping here again unless they improve their communication."
]

reviews_bow = vectorizer.transform(reviews)
new_predictions = model.predict(reviews_bow)

for i in range(len(reviews)):
    print(f"Review: {reviews[i]}")
    print(f"predicted sentiment: {new_predictions[i]}\n")

Review: I have ordered twice now and both times the clothes exceeded my expectations. The fabric is soft, the colors are vibrant, and the sizing was spot on. I love that I can get trendy outfits without breaking the bank. Will definitely order again!
predicted sentiment: 1

Review: My dress arrived with a tiny defect, but when I contacted support, they responded quickly and sent me a replacement in just a few days. The new one was perfect. Super impressed with how they handled it!
predicted sentiment: 1

Review: I ordered a blouse that looked silky and elegant in the pictures, but in person, the material felt cheap and the fit was off. Disappointed, especially since I was really looking forward to wearing it for a special event.
predicted sentiment: -1

Review: Placed an order over 3 weeks ago and still haven’t received it. I emailed customer service twice with no reply. Very frustrating experience. I won’t be shopping here again unless they improve their communication.
predicted senti