In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer # Bag of Words
from sklearn.model_selection import train_test_split # Train-test split
from sklearn.linear_model import LogisticRegression # Logistic Regression model
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix # Model evaluation metrics

In [None]:
# as data set use a sample of text and the associated sentiment; about 200 rows
# [ text, sentiment ] the sentiment can be positive or negative or neutral
texts = [
    ["I love this product, it's amazing!", "positive"],
    ["This is the worst service I've ever experienced.", "negative"],
    ["It's okay, not great but not terrible either.", "neutral"],
    ["Absolutely fantastic! Highly recommend it to everyone.", "positive"],
    ["I hate waiting in long lines, it's so frustrating.", "negative"],
    ["The food was decent, nothing special.", "neutral"],
    ["Best purchase I've made this year!", "positive"],
    ["Terrible quality, I want a refund!", "negative"],
    ["Mediocre experience, I expected more.", "neutral"],
    ["This is a game changer, I can't believe how good it is!", "positive"],
    ["I will never buy from this brand again, they are awful.", "negative"],
    ["It's just fine, not worth the hype.", "neutral"],
    ["Incredible value for the price, I'm very satisfied.", "positive"],
    ["The customer service was unhelpful and rude.", "negative"],
    ["Average product, I wouldn't recommend it to others.", "neutral"],
    ["This is the best thing I've ever bought!", "positive"],
    ["I'm disappointed with the quality, it broke after one use.", "negative"],
    ["It's a decent option if you're looking for something basic.", "neutral"],
    ["I can't get enough of this, it's so good!", "positive"],
    ["This is a scam, I feel cheated.", "negative"],
    ["It's okay, but I've seen better.", "neutral"],
    ["I'm thrilled with my purchase, it's perfect!", "positive"],
    ["The delivery was late and the package was damaged.", "negative"],
    ["It's a standard product, nothing extraordinary.", "neutral"],
    ["This is a must-have for anyone who loves quality.", "positive"],
    ["I regret buying this, it's a waste of money.", "negative"],
    ["It's a reliable choice, I use it every day.", "neutral"],
    ["I'm so happy with this, it exceeded my expectations!", "positive"],
    ["The product arrived broken, I'm very upset.", "negative"],
    ["It's a simple solution to a common problem.", "neutral"],
    ["This is a fantastic investment, I highly recommend it.", "positive"],
    ["I can't believe how bad this is, I'm returning it.", "negative"],
    ["It's a basic product that does its job well.", "neutral"],
    ["I'm extremely satisfied with this purchase, it's worth every penny!", "positive"],
    ["This is the worst experience I've ever had, I will never shop here again.", "negative"],
    ["It's an average product, nothing to write home about.", "neutral"],
    ["This is a revolutionary product, it has changed my life!", "positive"],
    ["I feel ripped off, this is not worth the price.", "negative"],
    ["It's a decent product, but I've seen better options.", "neutral"],
    ["I'm in love with this product, it's perfect for me!", "positive"],
    ["This is a complete disaster, I want my money back.", "negative"],
    ["It's a good option if you're looking for something affordable.", "neutral"],
    ["This is a great addition to my collection, I'm very pleased!", "positive"],
    ["I can't stand how bad this is, it's a total letdown.", "negative"],
    ["It's an okay product, it gets the job done.", "neutral"],
    ["I'm thrilled with the quality, it's top-notch!", "positive"],
    ["This is the worst purchase I've ever made, I regret it.", "negative"],
    ["It's a standard product, nothing special about it.", "neutral"],
    ["I'm so impressed with this, it's exceeded my expectations!", "positive"],
    ["I can't believe how terrible this is, I'm so disappointed.", "negative"],
    ["It's a basic product, but it works well enough.", "neutral"],
    ["This is a fantastic product, I can't recommend it enough!", "positive"],
    ["I feel cheated, this is not worth the money at all.", "negative"],
    ["It's an average product, it does what it's supposed to.", "neutral"],
    ["I'm very happy with this purchase, it's exactly what I needed!", "positive"],
    ["This is a complete waste of money, I will never buy from this brand again.", "negative"],
    ["It's a decent product, but I've seen better alternatives.", "neutral"],
    ["I'm absolutely in love with this, it's perfect for my needs!", "positive"],
    ["This is a nightmare, I can't believe how bad it is.", "negative"],
    ["It's a reliable product, I use it every day without issues.", "neutral"],
    ["I'm so satisfied with this, it's worth every cent!", "positive"],
    ["I regret buying this, it's a total disappointment.", "negative"],
    ["It's an okay product, but I've seen better quality elsewhere.", "neutral"],
    ["This is a must-have for anyone who values quality and performance!", "positive"],
    ["I can't believe how bad this is, it's a total scam.", "negative"],
    ["It's a basic product, but it serves its purpose well.", "neutral"],
    ["I'm extremely pleased with this purchase, it's fantastic!", "positive"],
    ["This is the worst experience I've ever had with a product, I'm so upset.", "negative"],
    ["It's an average product, it does what it's supposed to do.", "neutral"],
    ["This is a revolutionary product, it has made my life so much easier!", "positive"],
    ["I feel ripped off, this is not worth the hype at all.", "negative"],
    ["It's a decent product, but I've seen better options on the market.", "neutral"],
    ["I'm in love with this product, it's exactly what I was looking for!", "positive"],
    ["This is a complete disaster, I want my money back immediately.", "negative"],
    ["It's a good option if you're looking for something budget-friendly.", "neutral"],
    ["This is a great product, I'm very satisfied with my purchase!", "positive"],
    ["I can't believe how bad this is, it's a total disappointment.", "negative"],
    ["I'm so happy with this, it's exceeded my expectations!", "positive"],
    ["This is the worst product I've ever bought, I regret it completely.", "negative"],
    ["It's a decent product, it does what it's supposed to do.", "neutral"],
    ["I'm thrilled with this, it's perfect for my needs!", "positive"],
    ["I feel cheated, this is not worth the money at all.", "negative"],
    ["It's an okay product, it gets the job done.", "neutral"],
    ["This is a fantastic investment, I highly recommend it!", "positive"],
    ["I can't believe how terrible this is, I'm so disappointed.", "negative"],
    ["It's a basic product, but it works well enough.", "neutral"],
    ["I'm extremely satisfied with this purchase, it's worth every penny!", "positive"],
    ["This is the worst experience I've ever had, I will never shop here again.", "negative"],
    ["It's an average product, nothing to write home about.", "neutral"],
    ["This is a revolutionary product, it has changed my life!", "positive"],
    ["I feel ripped off, this is not worth the price.", "negative"],
    ["It's a decent product, but I've seen better options.", "neutral"],
    ["I'm in love with this product, it's perfect for me!", "positive"],
    ["This is a complete disaster, I want my money back.", "negative"],
    ["It's a good option if you're looking for something affordable.", "neutral"],
    ["This is a great addition to my collection, I'm very pleased!", "positive"],
    ["I can't stand how bad this is, it's a total letdown.", "negative"],
    ["It's an okay product, it gets the job done.", "neutral"],
    ["I'm thrilled with the quality, it's top-notch!", "positive"],
    ["This is the worst purchase I've ever made, I regret it.", "negative"],
    ["It's a standard product, nothing special about it.", "neutral"],
    ["I'm so impressed with this, it's exceeded my expectations!", "positive"],
    ["I can't believe how terrible this is, I'm so disappointed.", "negative"],
    ["It's a basic product, but it works well enough.", "neutral"],
    ["This is a fantastic product, I can't recommend it enough!", "positive"],
    ["I feel cheated, this is not worth the money at all.", "negative"],
    ["It's an average product, it does what it's supposed to.", "neutral"],
    ["I'm very happy with this purchase, it's exactly what I needed!", "positive"],
    ["This is a complete waste of money, I will never buy from this brand again.", "negative"],
    ["It's a decent product, but I've seen better alternatives.", "neutral"],
    ["I'm absolutely in love with this, it's perfect for my needs!", "positive"],
    ["This is a nightmare, I can't believe how bad it is.", "negative"],
    ["It's a reliable product, I use it every day without issues.", "neutral"],
    ["I'm so satisfied with this, it's worth every cent!", "positive"],
    ["I regret buying this, it's a total disappointment.", "negative"],
    ["It's an okay product, but I've seen better quality elsewhere.", "neutral"],
    ["This is a must-have for anyone who values quality and performance!", "positive"],
    ["I can't believe how bad this is, it's a total scam.", "negative"],
    ["It's a basic product, but it serves its purpose well.", "neutral"],
    ["I'm extremely pleased with this purchase, it's fantastic!", "positive"],
    ["This is the worst experience I've ever had with a product, I'm so upset.", "negative"],
    ["It's an average product, it does what it's supposed to do.", "neutral"]
]


In [None]:
data = pd.DataFrame(texts, columns=["text", "sentiment"])
data


In [None]:
# shuffle the data
data = data.sample(frac=1, random_state=42).reset_index(drop=True)
data

In [None]:
x = data["text"]
y = data["sentiment"]

In [None]:
# create the Bag of Words model
countvec = CountVectorizer()
countvec_fit = countvec.fit_transform(x)
# convert to DataFrame for better readability
bag_of_words = pd.DataFrame(countvec_fit.toarray(), columns=countvec.get_feature_names_out())
bag_of_words

In [None]:
# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(bag_of_words, y, test_size=0.3)

# What is Logistic Regression?

**Logistic Regression** is a supervised machine learning algorithm used for classification tasks. Unlike linear regression, which predicts continuous values, logistic regression predicts the probability that a given input belongs to a particular class (e.g., positive, negative, or neutral sentiment).

- It models the relationship between input features and the probability of a specific outcome using the logistic (sigmoid) function:

  $$
  \sigma(z) = \frac{1}{1 + e^{-z}}
  $$

- The output is always between 0 and 1, making it ideal for binary and multiclass classification.
- Logistic regression is widely used for text classification, spam detection, medical diagnosis, and more due to its simplicity and interpretability.

In [None]:
# random_state is used to ensure reproducibility:
#  random_state=1 means that the random number generator will produce the same results each time the code is run
#  random_state=42 is used to shuffle the data 
lr = LogisticRegression(random_state=1).fit(X_train, y_train)

In [None]:
y_pred_lr = lr.predict(X_test)
# evaluate the model
accuracy = accuracy_score(y_test, y_pred_lr)
accuracy
# print the classification report
print(classification_report(y_test, y_pred_lr))
# print the confusion matrix
print(confusion_matrix(y_test, y_pred_lr))
# print the accuracy score
print(f"Accuracy: {accuracy:.2f}")

# Naive Bayes

Naive Bayes is another classification algorithm that is often compared with logistic regression. While both can be used for similar tasks, they have different underlying assumptions and mechanisms.

# Multipolinomial Naive Bayes

**Multipolinomial Naive Bayes** is a variant of the Naive Bayes algorithm that is specifically designed for classification tasks where the features are discrete counts, such as word counts in text classification problems. It assumes that the features follow a multinomial distribution, which is suitable for modeling the frequency of events or occurrences.


In [None]:
from sklearn.naive_bayes import MultinomialNB

In [None]:
nb = MultinomialNB().fit(X_train, y_train)
y_pred_nb = nb.predict(X_test)
# evaluate the Naive Bayes model
accuracy_nb = accuracy_score(y_test, y_pred_nb)
# print the classification report for Naive Bayes
print(classification_report(y_test, y_pred_nb))
# print the confusion matrix for Naive Bayes
print(confusion_matrix(y_test, y_pred_nb))
# print the accuracy score for Naive Bayes
print(f"Accuracy (Naive Bayes): {accuracy_nb:.2f}")

# Linear Support Vector Machine (SVM)

**Linear Support Vector Machine (SVM)** is a powerful classification algorithm that aims to find the optimal hyperplane that separates different classes in the feature space. It works by maximizing the margin between the closest data points of different classes, known as support vectors. The key characteristics of linear SVM include the following:
- **Linear Decision Boundary**: Linear SVM constructs a linear decision boundary (hyperplane) to separate classes in the feature space.
- **Support Vectors**: It focuses on the data points that are closest to the decision boundary, known as support vectors. These points are critical for defining the hyperplane.
- **Margin Maximization**: The algorithm aims to maximize the margin between the support vectors of different classes, ensuring that the decision boundary is as far away from the closest points of each class as possible 

In [None]:
from sklearn.linear_model import SGDClassifier
sgd = SGDClassifier().fit(X_train, y_train)
y_pred_sgd = sgd.predict(X_test)
# evaluate the SGD model
accuracy_sgd = accuracy_score(y_test, y_pred_sgd)
# print the classification report for SGD
print(classification_report(y_test, y_pred_sgd))
# print the confusion matrix for SGD
print(confusion_matrix(y_test, y_pred_sgd))
# print the accuracy score for SGD
print(f"Accuracy (SGD): {accuracy_sgd:.2f}")
