In [2]:
import pandas as pd

# 50 good feedback samples
good_feedback = [
    "Great product quality",
    "Very satisfied with the product",
    "Excellent build and design",
    "Works perfectly as expected",
    "Highly recommended product",
    "Good value for money",
    "Product exceeded expectations",
    "Amazing performance",
    "Easy to use and reliable",
    "Very happy with this purchase",
    "Solid product and well built",
    "Customer support was helpful",
    "Product is durable and strong",
    "Fast delivery and good packaging",
    "The product works great",
    "Nice design and quality",
    "Very useful product",
    "Impressed with the performance",
    "Great features for the price",
    "Product feels premium",
    "No issues so far",
    "Works smoothly",
    "Satisfied customer",
    "Product is worth buying",
    "Good experience overall",
    "Excellent product quality",
    "Highly satisfied",
    "Reliable and efficient",
    "Product works flawlessly",
    "Build quality is excellent",
    "Product is easy to install",
    "Very convenient to use",
    "Amazing quality",
    "Great customer experience",
    "Product performs well",
    "Happy with the purchase",
    "Value for price",
    "Strong and durable",
    "Top quality product",
    "Product is impressive",
    "Good product performance",
    "Exceeded my expectations",
    "Product is awesome",
    "Good finish and quality",
    "Very reliable product",
    "Worth the money",
    "Product meets expectations",
    "Smooth performance",
    "Excellent value",
    "Really good product"
]

# 50 bad feedback samples
bad_feedback = [
    "Poor product quality",
    "Very disappointed with the product",
    "Stopped working after few days",
    "Bad build quality",
    "Not worth the price",
    "Terrible product experience",
    "Product is defective",
    "Very poor performance",
    "Waste of money",
    "Not satisfied at all",
    "Product broke quickly",
    "Cheap material used",
    "Product does not work",
    "Extremely disappointed",
    "Low quality product",
    "Product failed completely",
    "Very bad experience",
    "Unreliable product",
    "Product is useless",
    "Not recommended",
    "Poor design",
    "Product stopped functioning",
    "Very low quality",
    "Worst product ever",
    "Product overheats",
    "Bad customer support",
    "Product arrived damaged",
    "Disappointed with purchase",
    "Product has many issues",
    "Does not meet expectations",
    "Very poor build",
    "Product is faulty",
    "Product feels cheap",
    "Terrible quality",
    "Not working properly",
    "Very bad product",
    "Product performance is poor",
    "Completely dissatisfied",
    "Low durability",
    "Product is problematic",
    "Bad experience overall",
    "Product is disappointing",
    "Poor value for money",
    "Product quality is bad",
    "Not useful at all",
    "Product broke easily",
    "Worst purchase",
    "Product does not last",
    "Unhappy with the product",
    "Product is terrible"
]

# Combine texts and labels
texts = good_feedback + bad_feedback
labels = ["good"] * 50 + ["bad"] * 50

# Create DataFrame
df_feedback = pd.DataFrame({
    "Text": texts,
    "Label": labels
})

print(df_feedback.head())
print("\nTotal samples:", len(df_feedback))


                              Text Label
0            Great product quality  good
1  Very satisfied with the product  good
2       Excellent build and design  good
3      Works perfectly as expected  good
4       Highly recommended product  good

Total samples: 100


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(
    max_features = 300,
    lowercase = True,
    stop_words="english"  
)
X = vectorizer.fit_transform(df_feedback["Text"])

# inspect results
print("Shape of feature matrix:", X.shape)
print("First 10 feature names:", vectorizer.get_feature_names_out()[:10])

Shape of feature matrix: (100, 95)
First 10 feature names: ['amazing' 'arrived' 'awesome' 'bad' 'broke' 'build' 'built' 'buying'
 'cheap' 'completely']


In [4]:
from sklearn.model_selection import train_test_split

y = df_feedback["Label"]

X_train,X_test,y_train,y_test = train_test_split(
    X,
    y,
    test_size=0.25,
    random_state=42,
    stratify=y
)

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score

model = LogisticRegression(
        max_iter = 1000,
        class_weight= "balanced"
    ).fit(X_train, y_train)

y_pred = model.predict(X_test)

# Metrics
precision = precision_score(y_test, y_pred, pos_label="good")
recall = recall_score(y_test, y_pred, pos_label="good")
f1 = f1_score(y_test, y_pred, pos_label="good")

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Precision: 0.6363636363636364
Recall: 0.5833333333333334
F1-score: 0.6086956521739131
