In [1]:
# Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer


Dropped 845 rows with missing 'Review Text' values.
Logistic Regression Model:
Accuracy: 0.89
Confusion Matrix:
[[ 428  384]
 [ 132 3585]]
Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.53      0.62       812
           1       0.90      0.96      0.93      3717

    accuracy                           0.89      4529
   macro avg       0.83      0.75      0.78      4529
weighted avg       0.88      0.89      0.88      4529


Decision Tree Model:
Accuracy: 0.81
Confusion Matrix:
[[ 389  423]
 [ 425 3292]]
Classification Report:
              precision    recall  f1-score   support

           0       0.48      0.48      0.48       812
           1       0.89      0.89      0.89      3717

    accuracy                           0.81      4529
   macro avg       0.68      0.68      0.68      4529
weighted avg       0.81      0.81      0.81      4529



In [None]:

# Load data
df = pd.read_csv('Womens Clothing E-Commerce Reviews.csv')
df.head()


In [None]:

# Check for missing values in the 'Review Text' column
missing_values = df['Review Text'].isnull().sum()
if missing_values > 0:
    df = df.dropna(subset=['Review Text'])  # Drop rows with missing values in 'Review Text'
    print(f"Dropped {missing_values} rows with missing 'Review Text' values.")


In [None]:

# Define target variables
X = df['Review Text']
y = df['Recommended IND']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

# Use TfidfVectorizer to convert text data into numerical features
vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [None]:

# Initialize and train a logistic regression model
logistic_model = LogisticRegression()
logistic_model.fit(X_train_tfidf, y_train)

# Initialize and train a decision tree model
decision_tree_model = DecisionTreeClassifier(random_state=42)
decision_tree_model.fit(X_train_tfidf, y_train)


In [None]:

# Make predictions on the test set for both models
logistic_predictions = logistic_model.predict(X_test_tfidf)
decision_tree_predictions = decision_tree_model.predict(X_test_tfidf)

# Evaluate the logistic regression model
logistic_accuracy = accuracy_score(y_test, logistic_predictions)
logistic_conf_matrix = confusion_matrix(y_test, logistic_predictions)
logistic_class_report = classification_report(y_test, logistic_predictions)


In [None]:

print("Logistic Regression Model:")
print(f'Accuracy: {logistic_accuracy:.2f}')
print(f'Confusion Matrix:\n{logistic_conf_matrix}')
print(f'Classification Report:\n{logistic_class_report}')

# Evaluate the decision tree model
decision_tree_accuracy = accuracy_score(y_test, decision_tree_predictions)
decision_tree_conf_matrix = confusion_matrix(y_test, decision_tree_predictions)
decision_tree_class_report = classification_report(y_test, decision_tree_predictions)

print("\nDecision Tree Model:")
print(f'Accuracy: {decision_tree_accuracy:.2f}')
print(f'Confusion Matrix:\n{decision_tree_conf_matrix}')
print(f'Classification Report:\n{decision_tree_class_report}')


In [2]:
# Example prompt for testing the models
prompt = ["This is a great product! I highly recommend it."]

# Transform the prompt using the TfidfVectorizer
prompt_tfidf = vectorizer.transform(prompt)

# Make predictions for both models
logistic_prediction = logistic_model.predict(prompt_tfidf)
decision_tree_prediction = decision_tree_model.predict(prompt_tfidf)

# Print the predictions
print("Logistic Regression Model Prediction:", logistic_prediction[0])
print("Decision Tree Model Prediction:", decision_tree_prediction[0])


Logistic Regression Model Prediction: 1
Decision Tree Model Prediction: 1
