In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report


In [3]:
# Load your dataset
df = pd.read_csv("/content/drive/MyDrive/MINI PROJECT/DATASET/augmented_dataset1.csv")  # Replace with your dataset path
X = df['Text data']  # Replace with your text column name
y = df['Label']      # Replace with your label column name



In [4]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=1)



In [5]:
# Count Vectorizer with Naive Bayes
count_vectorizer = CountVectorizer()
X_train_counts = count_vectorizer.fit_transform(X_train)
X_test_counts = count_vectorizer.transform(X_test)

In [6]:
naive_bayes_count = MultinomialNB()
naive_bayes_count.fit(X_train_counts, y_train)
y_pred_count = naive_bayes_count.predict(X_test_counts)

In [7]:

# Evaluate the Count Vectorizer model
accuracy_count = accuracy_score(y_test, y_pred_count)
precision_count = precision_score(y_test, y_pred_count, average='weighted')
recall_count = recall_score(y_test, y_pred_count, average='weighted')
f1_score_count = f1_score(y_test, y_pred_count, average='weighted')
classification_report_count = classification_report(y_test, y_pred_count)

print("Count Vectorizer with Naive Bayes")
print("------------------------------------------------")
print("Accuracy:", accuracy_count)
print("Precision:", precision_count)
print("Recall:", recall_count)
print("F1 Score:", f1_score_count)
print("Classification Report:\n", classification_report_count)

Count Vectorizer with Naive Bayes
------------------------------------------------
Accuracy: 0.8078626799557033
Precision: 0.8502973940375035
Recall: 0.8078626799557033
F1 Score: 0.8069723445047041
Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.84      0.86      1194
           1       0.68      0.95      0.79      1236
           2       0.99      0.62      0.77      1182

    accuracy                           0.81      3612
   macro avg       0.85      0.81      0.81      3612
weighted avg       0.85      0.81      0.81      3612



In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:

# TF-IDF Vectorizer with Naive Bayes
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)


In [9]:
naive_bayes_tfidf = MultinomialNB()
naive_bayes_tfidf.fit(X_train_tfidf, y_train)
y_pred_tfidf = naive_bayes_tfidf.predict(X_test_tfidf)

In [10]:
# Evaluate the TF-IDF Vectorizer model
accuracy_tfidf = accuracy_score(y_test, y_pred_tfidf)
precision_tfidf = precision_score(y_test, y_pred_tfidf, average='weighted')
recall_tfidf = recall_score(y_test, y_pred_tfidf, average='weighted')
f1_score_tfidf = f1_score(y_test, y_pred_tfidf, average='weighted')
classification_report_tfidf = classification_report(y_test, y_pred_tfidf)

print("\nTF-IDF Vectorizer with Naive Bayes")
print("------------------------------------------------")
print("Accuracy:", accuracy_tfidf)
print("Precision:", precision_tfidf)
print("Recall:", recall_tfidf)
print("F1 Score:", f1_score_tfidf)
print("Classification Report:\n", classification_report_tfidf)


TF-IDF Vectorizer with Naive Bayes
------------------------------------------------
Accuracy: 0.9086378737541528
Precision: 0.9111489983753304
Recall: 0.9086378737541528
F1 Score: 0.9091354359319005
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.88      0.90      1194
           1       0.85      0.92      0.89      1236
           2       0.96      0.92      0.94      1182

    accuracy                           0.91      3612
   macro avg       0.91      0.91      0.91      3612
weighted avg       0.91      0.91      0.91      3612

