In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Load the dataset
data = pd.read_csv('product_reviews.csv')

In [3]:
data.head()

Unnamed: 0,ReviewID,ReviewText,Rating,Sentiment
0,R001,I absolutely love this product! It works perfe...,5,Positive
1,R002,"The product is okay, but I've seen better. It ...",3,Neutral
2,R003,I'm really disappointed with this purchase. Th...,1,Negative
3,R004,This is by far the best thing I've bought all ...,5,Positive
4,R005,"The product is decent, but nothing special. I ...",3,Neutral


In [4]:
# Preprocess the data
# For simplicity, we assume the text is already clean (no missing values, etc.)
# Convert text data into numerical features using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_features=500)
X = vectorizer.fit_transform(data['ReviewText'])

In [5]:
# Encode the target variable (Sentiment)
y = data['Sentiment'].map({'Negative': 0, 'Neutral': 1, 'Positive': 2})

In [6]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
# Train a Logistic Regression model
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)

In [10]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=['Negative', 'Neutral', 'Positive'], zero_division=0)

In [11]:
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)

Accuracy: 0.16666666666666666
Classification Report:
              precision    recall  f1-score   support

    Negative       0.00      0.00      0.00         3
     Neutral       0.17      1.00      0.29         1
    Positive       0.00      0.00      0.00         2

    accuracy                           0.17         6
   macro avg       0.06      0.33      0.10         6
weighted avg       0.03      0.17      0.05         6



In [12]:
# Predict the Sentiment of a new review
new_reviews = ["This product is amazing and works perfectly!", 
               "I'm not happy with the purchase, it doesn't work as expected.",
               "The product is okay, but not great."]

In [13]:
# Transform the new reviews into the same TF-IDF format
new_reviews_transformed = vectorizer.transform(new_reviews)

In [14]:
# Predict sentiment
predictions = model.predict(new_reviews_transformed)
predicted_sentiments = ['Negative' if p == 0 else 'Neutral' if p == 1 else 'Positive' for p in predictions]

In [15]:
for review, sentiment in zip(new_reviews, predicted_sentiments):
    print(f'Review: "{review}" - Predicted Sentiment: {sentiment}')

Review: "This product is amazing and works perfectly!" - Predicted Sentiment: Neutral
Review: "I'm not happy with the purchase, it doesn't work as expected." - Predicted Sentiment: Positive
Review: "The product is okay, but not great." - Predicted Sentiment: Neutral
