In [None]:
# Importing essential libraries
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt


In [None]:
# Loading the dataset from CSV file
df = pd.read_csv("amazon_reviews.csv")


In [None]:
# Splitting input text & sentiment labels
df["sentiment"] = df["rating"].apply(
    lambda r: "Positive" if r >= 4 else ("Neutral" if r == 3 else "Negative")
)


In [None]:
# Convert review text to lowercase

df["clean"] = df["review_text"].str.lower()
df["clean"] = df["clean"].apply(lambda x: re.sub(r"[^a-zA-Z ]", "", x))


In [None]:
# Convert cleaned text into TF-IDF feature vectors


tfidf = TfidfVectorizer(stop_words='english')
X = tfidf.fit_transform(df["clean"])
y = df["sentiment"]

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


In [None]:
# Build and train model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)


In [None]:


# Model prediction & accuracy
pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, pred))


In [None]:
# Simple sentiment count visualization
df["sentiment"].value_counts().plot(kind="bar")
plt.title("Sentiment Count")
plt.xlabel("Sentiment")
plt.ylabel("Count")
plt.show()
