In [1]:
import logging
import joblib
import os
import math
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from sklearn.feature_extraction.text import TfidfVectorizer
from typing import List, Tuple, Set

# Configure logging
logging.basicConfig(
    filename="model_evaluation.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
)

# Constants
LOGGING_ENABLED = True
SAVE_MODELS = True

2025-01-08 17:18:28.659033: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load data (Assuming positive-reviews.txt and negative-reviews.txt exist)
def load_data(positive_file: str, negative_file: str) -> Tuple[List[str], List[str]]:
    with open(positive_file, "r") as pos_file:
        positive_reviews = pos_file.readlines()
    with open(negative_file, "r") as neg_file:
        negative_reviews = neg_file.readlines()
    return positive_reviews, negative_reviews


In [3]:
# Feature Extraction
def extract_features(
    reviews: List[str],
    positive_words: Set[str],
    negative_words: Set[str],
    tfidf_vectorizer: TfidfVectorizer,
) -> np.ndarray:
    features = []
    tfidf_matrix = tfidf_vectorizer.transform(reviews)
    for i, review in enumerate(reviews):
        # Count of positive words
        positive_count = sum(1 for word in review.split() if word in positive_words)
        # Count of negative words
        negative_count = sum(1 for word in review.split() if word in negative_words)
        # Presence of the word "no"
        contains_no = int("no" in review.split())
        # Count of first and second pronouns
        pronouns = {"I", "me", "my", "you", "your"}
        pronoun_count = sum(1 for word in review.split() if word in pronouns)
        # Presence of "!"
        contains_I = int("!" in review.split())
        # Logarithm of the length of the review
        review_length_log = math.log(len(review.split()) + 1)
        # TF-IDF features
        tfidf_features = tfidf_matrix[i].toarray()[0]
        # Append all features
        features.append(
            [
                positive_count,
                negative_count,
                contains_no,
                pronoun_count,
                contains_I,
                review_length_log,
                *tfidf_features,
            ]
        )
    return np.array(features)


In [None]:
# Feature Extraction without TF-IDF
def extract_features_without_tfidf(
    reviews: List[str],
    positive_words: Set[str],
    negative_words: Set[str],
) -> np.ndarray:
    features = []
    for review in reviews:
        # Count of positive words
        positive_count = sum(1 for word in review.split() if word in positive_words)
        # Count of negative words
        negative_count = sum(1 for word in review.split() if word in negative_words)
        # Presence of the word "no"
        contains_no = int("no" in review.split())
        # Count of first and second pronouns
        pronouns = {"I", "me", "my", "you", "your"}
        pronoun_count = sum(1 for word in review.split() if word in pronouns)
        # Presence of "!"
        contains_mark = int("!" in review.split())
        # Logarithm of the length of the review
        review_length_log = math.log(len(review.split()) + 1)
        # Append all features
        features.append(
            [
                positive_count,
                negative_count,
                contains_no,
                pronoun_count,
                contains_mark,
                review_length_log,
            ]
        )
    return np.array(features)


In [5]:
# Load positive and negative word lists
def load_word_lists(
    positive_file: str, negative_file: str
) -> Tuple[Set[str], Set[str]]:
    with open(positive_file, "r") as pos_file:
        positive_words = set(pos_file.read().splitlines())
    with open(negative_file, "r") as neg_file:
        negative_words = set(neg_file.read().splitlines())
    return positive_words, negative_words


In [6]:
positive_reviews: List[str]
negative_reviews: List[str]
positive_reviews, negative_reviews = load_data(
    "positive-reviews.txt", "negative-reviews.txt"
)
positive_reviews, negative_reviews

(['Size, Size, ans Size.\n',
  'Excellent quality, speedy printing, low cost\n',
  'Cheap, good quality, small size\n',
  'Attractive design, satisfying features, the backlight!\n',
  'Pretty much has every feature you could possibly need, great look\n',
  'Ease of use, small size and reliability\n',
  'cheap,good print quality\n',
  'takes clear shots\n',
  'Small, Light, Stylish, backlight\n',
  'Absolutely Outstanding Photo Printing Results.  Easy to Use.\n',
  'Great signal, durable, compact\n',
  'Compact, easy to navigate menu,\n',
  'Light, Feature Packed, Customizable, Easily fits in pocket\n',
  'small and lightweight, easy to use\n',
  'Easy to use feature loaded.\n',
  'Excellent quality, user friendly, compact\n',
  'Honestly the best phone I have owned. Voice dialing!\n',
  'Takes great photos\n',
  'very clear photos\n',
  'None to speak of.\n',
  'Colorful, voice recognition\n',
  'None!\n',
  'Style, Features.\n',
  'whitish blue backlight color, animated menus, clear s

In [7]:
positive_words: Set[str]
negative_words: Set[str]
positive_words, negative_words = load_word_lists(
    "positive-words.txt", "negative-words.txt"
)

positive_words, negative_words

({'fast-paced',
  'pamperedness',
  'roomier',
  'terrific',
  'groundbreaking',
  'modern',
  'upgraded',
  'attractive',
  'genius',
  'outstrip',
  'aspire',
  'warmhearted',
  'awesomeness',
  'imaginative',
  'gladly',
  'obsessions',
  'stellar',
  'booming',
  'recover',
  'guiltless',
  'low-cost',
  'flatter',
  'cool',
  'impressive',
  'gladness',
  'ready',
  'gentle',
  'best-known',
  'truthful',
  'liking',
  'willingly',
  'powerful',
  'unfettered',
  'breathlessness',
  'hooray',
  'robust',
  'restructuring',
  'exaltation',
  'exquisitely',
  'reverently',
  'romantic',
  'noteworthy',
  'well-received',
  'thrifty',
  'spellbound',
  'fond',
  'exalt',
  'amazed',
  'crisper',
  'energize',
  'simpler',
  'amicable',
  'soft',
  'reassure',
  'engaging',
  'well-rounded',
  'kid-friendly',
  'reforming',
  'invincibility',
  'suave',
  'humility',
  'gentlest',
  'godsend',
  'masters',
  'gratefully',
  'improvements',
  'revolutionary',
  'authentic',
  'courage'

In [8]:
# Label data (1 for positive, 0 for negative)
positive_labels: List[int]
negative_labels: List[int]
positive_labels = [1] * len(positive_reviews)
negative_labels = [0] * len(negative_reviews)
positive_labels, negative_labels

([1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,


In [9]:
# Combine data and labels
all_reviews: List[str]
all_labels: List[int]
all_reviews = positive_reviews + negative_reviews
all_labels = positive_labels + negative_labels
all_reviews, all_labels

(['Size, Size, ans Size.\n',
  'Excellent quality, speedy printing, low cost\n',
  'Cheap, good quality, small size\n',
  'Attractive design, satisfying features, the backlight!\n',
  'Pretty much has every feature you could possibly need, great look\n',
  'Ease of use, small size and reliability\n',
  'cheap,good print quality\n',
  'takes clear shots\n',
  'Small, Light, Stylish, backlight\n',
  'Absolutely Outstanding Photo Printing Results.  Easy to Use.\n',
  'Great signal, durable, compact\n',
  'Compact, easy to navigate menu,\n',
  'Light, Feature Packed, Customizable, Easily fits in pocket\n',
  'small and lightweight, easy to use\n',
  'Easy to use feature loaded.\n',
  'Excellent quality, user friendly, compact\n',
  'Honestly the best phone I have owned. Voice dialing!\n',
  'Takes great photos\n',
  'very clear photos\n',
  'None to speak of.\n',
  'Colorful, voice recognition\n',
  'None!\n',
  'Style, Features.\n',
  'whitish blue backlight color, animated menus, clear s

In [10]:
# Split into training and testing sets (80% train, 20% test)
X_train: List[str]
X_test: List[str]
y_train: List[int]
y_test: List[int]
X_train, X_test, y_train, y_test = train_test_split(
    all_reviews, all_labels, test_size=0.2, random_state=42
)

X_train, X_test, y_train, y_test

(['5 Megapixel, fully manual or auto, nightshot, 4x optical zoom.\n',
  'Quick, Efficient, and Amazing Printer\n',
  'Print Quality and Conservation of Ink\n',
  'Cheap...easy to set up and use\n',
  'faceplate falls off when dropped\n',
  'Very small size, nice finish, anti-glare screen, easy - just point-and-shoot\n',
  'Smearing\n',
  'Expensive ink\n',
  'Looks great, awesome display, lots of features\n',
  'Tricky shape to squeeze against shoulder, no vibrate option.  #13; #10;\n',
  'Battery Life using the digital viewfinder is extremely low.\n',
  'Compact, nice zoom, sliding cover, and picture quality.\n',
  'Picture quality, ease of use, feel of camera\n',
  'Uses a lot of power\n',
  'Great price, HP reliability\n',
  'Can be expensive to run\n',
  'This phone is HORRIBLE! There are no pros...\n',
  'Eats batteries like a HUMMER! Needs a neck strap! Fragile toggle back button\n',
  'low voice volume, terrible camera, phone locks up and drops calls\n',
  'Gorgeous, hi-resoluti

In [11]:
# Initialize TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer()
tfidf_vectorizer.fit(all_reviews)

In [12]:
# Extract features
X_train_features: np.ndarray
X_test_features: np.ndarray
X_train_features = extract_features(
    X_train, positive_words, negative_words, tfidf_vectorizer
)
X_test_features = extract_features(
    X_test, positive_words, negative_words, tfidf_vectorizer
)

X_train_features, X_test_features

(array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 array([[0., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]))

In [13]:
# Extract features without TF-IDF
X_train_features_no_tfidf = extract_features_without_tfidf(
    X_train, positive_words, negative_words
)
X_test_features_no_tfidf = extract_features_without_tfidf(
    X_test, positive_words, negative_words
)

X_train_features_no_tfidf, X_test_features_no_tfidf

(array([[0.        , 0.        , 0.        , 0.        , 0.        ,
         2.39789527],
        [0.        , 0.        , 0.        , 0.        , 0.        ,
         1.79175947],
        [0.        , 0.        , 0.        , 0.        , 0.        ,
         1.94591015],
        ...,
        [0.        , 0.        , 0.        , 0.        , 0.        ,
         0.69314718],
        [0.        , 0.        , 0.        , 0.        , 0.        ,
         1.94591015],
        [0.        , 0.        , 0.        , 0.        , 0.        ,
         1.60943791]]),
 array([[0.        , 0.        , 0.        , 0.        , 0.        ,
         0.69314718],
        [1.        , 0.        , 0.        , 0.        , 0.        ,
         2.56494936],
        [0.        , 0.        , 1.        , 0.        , 0.        ,
         2.7080502 ],
        ...,
        [0.        , 0.        , 0.        , 0.        , 0.        ,
         2.30258509],
        [0.        , 0.        , 0.        , 0.        , 0.   

In [14]:
# Convert labels to categorical for DNN
y_train_categorical: np.ndarray
y_test_categorical: np.ndarray
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

y_train_categorical, y_test_categorical

(array([[0., 1.],
        [0., 1.],
        [0., 1.],
        ...,
        [1., 0.],
        [0., 1.],
        [0., 1.]], dtype=float32),
 array([[1., 0.],
        [0., 1.],
        [1., 0.],
        ...,
        [0., 1.],
        [0., 1.],
        [1., 0.]], dtype=float32))

In [15]:
# Ensure the models directory exists
os.makedirs("models", exist_ok=True)

In [16]:
# Train and evaluate a Logistic Regression model
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train_features, y_train)
log_reg_predictions = log_reg.predict(X_test_features)
log_reg_accuracy = accuracy_score(y_test, log_reg_predictions)
if LOGGING_ENABLED:
    logging.info(f"Logistic Regression Accuracy: {log_reg_accuracy:.2f}")
if SAVE_MODELS:
    joblib.dump(log_reg, "models/log_reg_model.pkl")

log_reg_predictions, log_reg_accuracy

(array([0, 1, 0, ..., 1, 1, 0]), 0.927375)

In [17]:
# Train and evaluate a Logistic Regression model without TF-IDF
log_reg_no_tfidf = LogisticRegression(max_iter=1000)
log_reg_no_tfidf.fit(X_train_features_no_tfidf, y_train)
log_reg_no_tfidf_predictions = log_reg_no_tfidf.predict(X_test_features_no_tfidf)
log_reg_no_tfidf_accuracy = accuracy_score(y_test, log_reg_no_tfidf_predictions)
if LOGGING_ENABLED:
    logging.info(
        f"Logistic Regression without TF-IDF Accuracy: {log_reg_no_tfidf_accuracy:.2f}"
    )
if SAVE_MODELS:
    joblib.dump(log_reg_no_tfidf, "models/log_reg_model_no_tfidf.pkl")

log_reg_no_tfidf_predictions, log_reg_no_tfidf_accuracy

(array([0, 1, 0, ..., 0, 0, 0]), 0.70275)

In [18]:
# Train and evaluate a Naive Bayes model
nb_model = MultinomialNB()
nb_model.fit(X_train_features, y_train)
nb_predictions = nb_model.predict(X_test_features)
nb_accuracy = accuracy_score(y_test, nb_predictions)
if LOGGING_ENABLED:
    logging.info(f"Naive Bayes Accuracy: {nb_accuracy:.2f}")
if SAVE_MODELS:
    joblib.dump(nb_model, "models/nb_model.pkl")

nb_predictions, nb_accuracy

(array([0, 1, 0, ..., 1, 1, 0]), 0.901625)

In [19]:
# Train and evaluate a Naive Bayes model without TF-IDF
nb_model_no_tfidf = MultinomialNB()
nb_model_no_tfidf.fit(X_train_features_no_tfidf, y_train)
nb_no_tfidf_predictions = nb_model_no_tfidf.predict(X_test_features_no_tfidf)
nb_no_tfidf_accuracy = accuracy_score(y_test, nb_no_tfidf_predictions)
if LOGGING_ENABLED:
    logging.info(f"Naive Bayes without TF-IDF Accuracy: {nb_no_tfidf_accuracy:.2f}")
if SAVE_MODELS:
    joblib.dump(nb_model_no_tfidf, "models/nb_model_no_tfidf.pkl")

nb_no_tfidf_predictions, nb_no_tfidf_accuracy

(array([0, 1, 0, ..., 0, 0, 0]), 0.70225)

In [20]:
# Train and evaluate a Random Forest model
rf_model = RandomForestClassifier()
rf_model.fit(X_train_features, y_train)
rf_predictions = rf_model.predict(X_test_features)
rf_accuracy = accuracy_score(y_test, rf_predictions)
if LOGGING_ENABLED:
    logging.info(f"Random Forest Accuracy: {rf_accuracy:.2f}")
if SAVE_MODELS:
    joblib.dump(rf_model, "models/rf_model.pkl")

rf_predictions, rf_accuracy

(array([0, 1, 0, ..., 1, 1, 0]), 0.926)

In [21]:
# Train and evaluate a Random Forest model without TF-IDF
rf_model_no_tfidf = RandomForestClassifier()
rf_model_no_tfidf.fit(X_train_features_no_tfidf, y_train)
rf_no_tfidf_predictions = rf_model_no_tfidf.predict(X_test_features_no_tfidf)
rf_no_tfidf_accuracy = accuracy_score(y_test, rf_no_tfidf_predictions)
if LOGGING_ENABLED:
    logging.info(f"Random Forest without TF-IDF Accuracy: {rf_no_tfidf_accuracy:.2f}")
if SAVE_MODELS:
    joblib.dump(rf_model_no_tfidf, "models/rf_model_no_tfidf.pkl")

rf_no_tfidf_predictions, rf_no_tfidf_accuracy

(array([0, 1, 0, ..., 1, 0, 0]), 0.70725)

In [22]:
# Define and compile the DNN model
dnn_model = Sequential()
dnn_model.add(Dense(64, input_dim=X_train_features.shape[1], activation="relu"))
dnn_model.add(Dense(32, activation="relu"))
dnn_model.add(Dense(2, activation="softmax"))

dnn_model.compile(
    loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
)

# Train the DNN model
dnn_model.fit(
    X_train_features, y_train_categorical, epochs=10, batch_size=32, verbose=0
)

# Evaluate the DNN model
dnn_loss, dnn_accuracy = dnn_model.evaluate(
    X_test_features, y_test_categorical, verbose=0
)
if LOGGING_ENABLED:
    logging.info(f"DNN Accuracy: {dnn_accuracy:.2f}")
if SAVE_MODELS:
    dnn_model.save("models/dnn_model.keras")

dnn_loss, dnn_accuracy

(0.28837355971336365, 0.9333750009536743)

In [23]:
# Define and compile the DNN model without TF-IDF
dnn_model_no_tfidf = Sequential()
dnn_model_no_tfidf.add(
    Dense(64, input_dim=X_train_features_no_tfidf.shape[1], activation="relu")
)
dnn_model_no_tfidf.add(Dense(32, activation="relu"))
dnn_model_no_tfidf.add(Dense(2, activation="softmax"))

dnn_model_no_tfidf.compile(
    loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
)

# Train the DNN model without TF-IDF
dnn_model_no_tfidf.fit(
    X_train_features_no_tfidf, y_train_categorical, epochs=10, batch_size=32, verbose=0
)

# Evaluate the DNN model without TF-IDF
dnn_no_tfidf_loss, dnn_no_tfidf_accuracy = dnn_model_no_tfidf.evaluate(
    X_test_features_no_tfidf, y_test_categorical, verbose=0
)
if LOGGING_ENABLED:
    logging.info(f"DNN without TF-IDF Accuracy: {dnn_no_tfidf_accuracy:.2f}")
if SAVE_MODELS:
    dnn_model_no_tfidf.save("models/dnn_model_no_tfidf.keras")

dnn_no_tfidf_loss, dnn_no_tfidf_accuracy

(0.5216898918151855, 0.7041249871253967)

In [24]:
def load_model_and_test(
    model_path: str,
    test_file: str,
    positive_words: Set[str],
    negative_words: Set[str],
    tfidf_vectorizer: TfidfVectorizer = None,
    use_tfidf: bool = True,
) -> None:
    # Load the model
    if model_path.endswith(".pkl"):
        model = joblib.load(model_path)
    elif model_path.endswith(".keras"):
        model = tf.keras.models.load_model(model_path)
    else:
        raise ValueError("Unsupported model format")

    # Load test data
    with open(test_file, "r") as file:
        test_reviews = file.readlines()

    # Extract features
    if use_tfidf:
        test_features = extract_features(
            test_reviews, positive_words, negative_words, tfidf_vectorizer
        )
    else:
        test_features = extract_features_without_tfidf(
            test_reviews, positive_words, negative_words
        )

    # Predict
    if model_path.endswith(".keras"):
        predictions = model.predict(test_features)
        predictions = np.argmax(predictions, axis=1)
    else:
        predictions = model.predict(test_features)

    # Print predictions
    for review, prediction in zip(test_reviews, predictions):
        print(
            f"Review: {review.strip()}\nPrediction: {'Positive' if prediction == 1 else 'Negative'}\n"
        )


In [25]:
# Test the Logistic Regression model
load_model_and_test(
    "models/log_reg_model.pkl",
    "test-reviews.txt",
    positive_words,
    negative_words,
    tfidf_vectorizer,
    use_tfidf=True,
)


Review: Great product! Highly recommend it.
Prediction: Positive

Review: Terrible experience, would not buy again.
Prediction: Negative

Review: Absolutely loved it, exceeded my expectations.
Prediction: Positive

Review: The quality was poor and it broke after a week.
Prediction: Negative

Review: Fantastic service and fast delivery.
Prediction: Positive

Review: Very disappointed, not worth the money.
Prediction: Negative

Review: Amazing value for the price, very satisfied.
Prediction: Positive

Review: The item arrived damaged and customer service was unhelpful.
Prediction: Negative

Review: Exceeded my expectations, will definitely purchase again.
Prediction: Negative

Review: Not as described, very unhappy with the purchase.
Prediction: Negative



In [26]:
# Test the Logistic Regression model without TF-IDF
load_model_and_test(
    "models/log_reg_model_no_tfidf.pkl",
    "test-reviews.txt",
    positive_words,
    negative_words,
    use_tfidf=False,
)


Review: Great product! Highly recommend it.
Prediction: Positive

Review: Terrible experience, would not buy again.
Prediction: Negative

Review: Absolutely loved it, exceeded my expectations.
Prediction: Positive

Review: The quality was poor and it broke after a week.
Prediction: Negative

Review: Fantastic service and fast delivery.
Prediction: Positive

Review: Very disappointed, not worth the money.
Prediction: Positive

Review: Amazing value for the price, very satisfied.
Prediction: Negative

Review: The item arrived damaged and customer service was unhelpful.
Prediction: Negative

Review: Exceeded my expectations, will definitely purchase again.
Prediction: Negative

Review: Not as described, very unhappy with the purchase.
Prediction: Negative



In [27]:

# Test the Naive Bayes model
load_model_and_test(
    "models/nb_model.pkl",
    "test-reviews.txt",
    positive_words,
    negative_words,
    tfidf_vectorizer,
    use_tfidf=True,
)


Review: Great product! Highly recommend it.
Prediction: Positive

Review: Terrible experience, would not buy again.
Prediction: Negative

Review: Absolutely loved it, exceeded my expectations.
Prediction: Positive

Review: The quality was poor and it broke after a week.
Prediction: Negative

Review: Fantastic service and fast delivery.
Prediction: Positive

Review: Very disappointed, not worth the money.
Prediction: Positive

Review: Amazing value for the price, very satisfied.
Prediction: Positive

Review: The item arrived damaged and customer service was unhelpful.
Prediction: Negative

Review: Exceeded my expectations, will definitely purchase again.
Prediction: Negative

Review: Not as described, very unhappy with the purchase.
Prediction: Negative



In [28]:

# Test the Naive Bayes model without TF-IDF
load_model_and_test(
    "models/nb_model_no_tfidf.pkl",
    "test-reviews.txt",
    positive_words,
    negative_words,
    use_tfidf=False,
)


Review: Great product! Highly recommend it.
Prediction: Positive

Review: Terrible experience, would not buy again.
Prediction: Negative

Review: Absolutely loved it, exceeded my expectations.
Prediction: Positive

Review: The quality was poor and it broke after a week.
Prediction: Negative

Review: Fantastic service and fast delivery.
Prediction: Positive

Review: Very disappointed, not worth the money.
Prediction: Positive

Review: Amazing value for the price, very satisfied.
Prediction: Negative

Review: The item arrived damaged and customer service was unhelpful.
Prediction: Negative

Review: Exceeded my expectations, will definitely purchase again.
Prediction: Negative

Review: Not as described, very unhappy with the purchase.
Prediction: Negative



In [29]:

# Test the Random Forest model
load_model_and_test(
    "models/rf_model.pkl",
    "test-reviews.txt",
    positive_words,
    negative_words,
    tfidf_vectorizer,
    use_tfidf=True,
)


Review: Great product! Highly recommend it.
Prediction: Positive

Review: Terrible experience, would not buy again.
Prediction: Negative

Review: Absolutely loved it, exceeded my expectations.
Prediction: Positive

Review: The quality was poor and it broke after a week.
Prediction: Negative

Review: Fantastic service and fast delivery.
Prediction: Positive

Review: Very disappointed, not worth the money.
Prediction: Negative

Review: Amazing value for the price, very satisfied.
Prediction: Positive

Review: The item arrived damaged and customer service was unhelpful.
Prediction: Negative

Review: Exceeded my expectations, will definitely purchase again.
Prediction: Negative

Review: Not as described, very unhappy with the purchase.
Prediction: Negative



In [30]:

# Test the Random Forest model without TF-IDF
load_model_and_test(
    "models/rf_model_no_tfidf.pkl",
    "test-reviews.txt",
    positive_words,
    negative_words,
    use_tfidf=False,
)


Review: Great product! Highly recommend it.
Prediction: Positive

Review: Terrible experience, would not buy again.
Prediction: Positive

Review: Absolutely loved it, exceeded my expectations.
Prediction: Positive

Review: The quality was poor and it broke after a week.
Prediction: Negative

Review: Fantastic service and fast delivery.
Prediction: Positive

Review: Very disappointed, not worth the money.
Prediction: Positive

Review: Amazing value for the price, very satisfied.
Prediction: Negative

Review: The item arrived damaged and customer service was unhelpful.
Prediction: Negative

Review: Exceeded my expectations, will definitely purchase again.
Prediction: Negative

Review: Not as described, very unhappy with the purchase.
Prediction: Negative



In [31]:

# Test the DNN model
load_model_and_test(
    "models/dnn_model.keras",
    "test-reviews.txt",
    positive_words,
    negative_words,
    tfidf_vectorizer,
    use_tfidf=True,
)


Review: Great product! Highly recommend it.
Prediction: Positive

Review: Terrible experience, would not buy again.
Prediction: Negative

Review: Absolutely loved it, exceeded my expectations.
Prediction: Positive

Review: The quality was poor and it broke after a week.
Prediction: Negative

Review: Fantastic service and fast delivery.
Prediction: Positive

Review: Very disappointed, not worth the money.
Prediction: Negative

Review: Amazing value for the price, very satisfied.
Prediction: Positive

Review: The item arrived damaged and customer service was unhelpful.
Prediction: Negative

Review: Exceeded my expectations, will definitely purchase again.
Prediction: Negative

Review: Not as described, very unhappy with the purchase.
Prediction: Negative



In [32]:

# Test the DNN model without TF-IDF
load_model_and_test(
    "models/dnn_model_no_tfidf.keras",
    "test-reviews.txt",
    positive_words,
    negative_words,
    use_tfidf=False,
)


Review: Great product! Highly recommend it.
Prediction: Positive

Review: Terrible experience, would not buy again.
Prediction: Positive

Review: Absolutely loved it, exceeded my expectations.
Prediction: Positive

Review: The quality was poor and it broke after a week.
Prediction: Negative

Review: Fantastic service and fast delivery.
Prediction: Positive

Review: Very disappointed, not worth the money.
Prediction: Positive

Review: Amazing value for the price, very satisfied.
Prediction: Positive

Review: The item arrived damaged and customer service was unhelpful.
Prediction: Negative

Review: Exceeded my expectations, will definitely purchase again.
Prediction: Negative

Review: Not as described, very unhappy with the purchase.
Prediction: Negative

