In [16]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
import joblib

# Load dataset
data_path = r"C:\Users\spoorthi\Downloads\DT-10-Fake-Review-Detection-in-E-Commerce-1\fake reviews dataset.csv"
df = pd.read_csv(data_path)

# Print column names to verify
print("Columns in dataset:", df.columns)

# Ensure correct column names
text_column = 'text' if 'text' in df.columns else df.columns[0]
label_column = 'label' if 'label' in df.columns else df.columns[1]

# Extract text and labels
texts = df[text_column].astype(str).values
labels = df[label_column].values

# Encode labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Convert text to TF-IDF features
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
X = tfidf_vectorizer.fit_transform(texts)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# Train Naive Bayes model
model = MultinomialNB()
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy:.4f}')

# Ensure directory exists
save_dir = 'saved_models'
os.makedirs(save_dir, exist_ok=True)

# Save model
joblib.dump(model, os.path.join(save_dir, 'naive_bayes_model.pkl'))
joblib.dump(tfidf_vectorizer, os.path.join(save_dir, 'tfidf_vectorizer.pkl'))

print("Model training complete and saved in 'saved_models' directory")


Columns in dataset: Index(['category', 'rating', 'label', 'text_'], dtype='object')
Model Accuracy: 0.4891
Model training complete and saved in 'saved_models' directory
