In [1]:
# 1. Import Libraries
import pandas as pd
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report


In [2]:
# 2. Sample Dataset Creation
reviews = [
    "I love this product, it is wonderful and exactly as described",
    "Absolutely fantastic! Best purchase I've made.",
    "This is the greatest thing ever, highly recommend it.",
    "I am very pleased with this product, will buy again!",
    "What an amazing product! Exceeded my expectations.",
    "Terrible. It broke after one use, do not buy this.",
    "I hate this. Worst product ever, complete waste of money.",
    "Very disappointed. The item is defective and low quality.",
    "Awful experience, product didn't work at all.",
    "Not worth the money. I will never buy this again.",
    "Quite good, I'm happy with the service and the product.",
    "The product is okay, nothing special but it works as expected.",
    "Simply outstanding. I love the results this gives me!",
    "Excellent quality, very satisfied with this purchase.",
    "Not worth the hype, returns to sender.",
    "Dreadful quality, not recommended.",
    "One of the worst purchases I have made."
]

In [3]:
sentiments = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0]

df = pd.DataFrame({'review': reviews, 'sentiment': sentiments})


In [4]:
# 3. Text Cleaning
stopwords = {
    'i', 'me', 'my', 'we', 'our', 'you', 'your', 'he', 'him', 'his', 'she', 'her',
    'it', 'its', 'they', 'them', 'what', 'this', 'that', 'these', 'those',
    'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
    'have', 'has', 'had', 'having',
    'do', 'does', 'did', 'doing',
    'a', 'an', 'the', 'and', 'but', 'if', 'or', 'as',
    'of', 'at', 'by', 'for', 'with', 'about', 'against',
    'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under',
    'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where',
    'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most',
    'other', 'some', 'such', 'only', 'own', 'same', 'so', 'than', 'too', 'very',
    'can', 'will', 'just', 'should', 'now'
}


In [5]:
def clean_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    tokens = text.split()
    tokens = [w for w in tokens if w not in stopwords]
    return " ".join(tokens)

df['cleaned'] = df['review'].apply(clean_text)

In [6]:
# 4. TF-IDF Vectorization
tfidf = TfidfVectorizer(max_features=20)
X = tfidf.fit_transform(df['cleaned'])
y = df['sentiment'].values

In [7]:
# 5. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# 6. Model Training
model = LogisticRegression()
model.fit(X_train, y_train)

In [9]:
# 7. Prediction
y_pred = model.predict(X_test)


In [10]:
# 8. Evaluation
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

In [11]:
print("=== Evaluation Metrics ===")
print(f"Accuracy: {acc:.2f}")
print(f"Precision: {prec:.2f}")
print(f"Recall: {rec:.2f}")
print(f"F1-score: {f1:.2f}")
print("\nConfusion Matrix:\n", cm)
print("\nClassification Report:\n", classification_report(y_test, y_pred))


=== Evaluation Metrics ===
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1-score: 1.00

Confusion Matrix:
 [[2 0]
 [0 2]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         2

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4



Conclusion:
We performed sentiment analysis using TF-IDF vectorization and Logistic Regression. After preprocessing the review text, the model achieved high accuracy and balanced precision, recall, and F1-score. This shows that the approach is effective for classifying customer reviews as positive or negative.