In [None]:
# Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer


In [None]:

# Load data
df = pd.read_csv('Womens Clothing E-Commerce Reviews.csv')
df.head()

In [None]:
# Check for missing values in the 'Review Text' column
missing_values = df['Review Text'].isnull().sum()
if missing_values > 0:
    df = df.dropna(subset=['Review Text'])  # Drop rows with missing values in 'Review Text'
    print(f"Dropped {missing_values} rows with missing 'Review Text' values.")

In [None]:

# Define target variables
X = df['Review Text']
y = df['Recommended IND']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

# Use TfidfVectorizer to convert text data into numerical features
vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [None]:

# Initialize and train a logistic regression model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)


In [None]:

# Make predictions on the test set
predictions = model.predict(X_test_tfidf)


In [None]:

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
conf_matrix = confusion_matrix(y_test, predictions)
class_report = classification_report(y_test, predictions)


In [None]:

print(f'Accuracy: {accuracy:.2f}')
print(f'Confusion Matrix:\n{conf_matrix}')
print(f'Classification Report:\n{class_report}')
