In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt

# Read the data
df = pd.read_csv('WELFake_Dataset.csv')

# Drop rows with missing values in the 'text' column
df = df.dropna(subset=['text'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

# Create TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)

# Transform the training and testing data
tfidf_train = tfidf_vectorizer.fit_transform(X_train.astype('U'))
tfidf_test = tfidf_vectorizer.transform(X_test.astype('U'))

# Train the Multinomial Naive Bayes classifier
nb_classifier = MultinomialNB()
nb_classifier.fit(tfidf_train, y_train)
nb_pred = nb_classifier.predict(tfidf_test)

# Train the Logistic Regression classifier
lr_classifier = LogisticRegression(max_iter=5000)
lr_classifier.fit(tfidf_train, y_train)
lr_pred = lr_classifier.predict(tfidf_test)

# Train the Decision Tree classifier
dt_classifier = DecisionTreeClassifier()
dt_classifier.fit(tfidf_train, y_train)
dt_pred = dt_classifier.predict(tfidf_test)

# Calculate precision, recall, f1-score, and accuracy for each technique
nb_report = classification_report(y_test, nb_pred, output_dict=True)
lr_report = classification_report(y_test, lr_pred, output_dict=True)
dt_report = classification_report(y_test, dt_pred, output_dict=True)

# Extract precision, recall, f1-score, and accuracy values
precision = [nb_report['weighted avg']['precision'], lr_report['weighted avg']['precision'], dt_report['weighted avg']['precision']]
recall = [nb_report['weighted avg']['recall'], lr_report['weighted avg']['recall'], dt_report['weighted avg']['recall']]
f1_score = [nb_report['weighted avg']['f1-score'], lr_report['weighted avg']['f1-score'], dt_report['weighted avg']['f1-score']]
accuracy = [accuracy_score(y_test, nb_pred), accuracy_score(y_test, lr_pred), accuracy_score(y_test, dt_pred)]

# Comparison graph - Bar Plot
labels = ['Multinomial NB', 'Logistic Regression', 'Decision Tree']

# Bar positions for different metrics
bar_positions = np.arange(len(labels))
bar_width = 0.2

# Colors for the bars
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']

# Plotting the metrics as bar plots with custom colors
plt.figure(figsize=(6, 6))
plt.bar(bar_positions - bar_width, precision, width=bar_width, label='Precision', color=colors[0])
plt.bar(bar_positions, recall, width=bar_width, label='Recall', color=colors[1])
plt.bar(bar_positions + bar_width, f1_score, width=bar_width, label='F1-Score', color=colors[2])
plt.bar(bar_positions + (2 * bar_width), accuracy, width=bar_width, label='Accuracy', color=colors[3])
plt.xlabel('Technique')
plt.ylabel('Score')
plt.title('Classification Metrics Comparison')
plt.xticks(bar_positions, labels)
plt.legend()
plt.show()
