In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
data = pd.read_csv("Embedded_HateSpeechDetection.csv")
X=data.drop(columns=['Label','Text','Tokens'])
y=data['Label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Naive Bayes Model:
Naive Bayes models typically have fewer hyperparameters to tune compared to other models like SVM or XGBoost. However, it is still beneficial to tune these hyperparameters to achieve optimal performance.

In [10]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, roc_auc_score, confusion_matrix, precision_recall_curve, auc
# Train the model
gnb_clf = GaussianNB()
gnb_clf.fit(X_train, y_train)

# Make predictions
y_pred = gnb_clf.predict(X_test)

# Evaluate the classifier
print("Gaussian Naive Bayes")
# Precision
precision = precision_score(y_test, y_pred)
print("Precision:", precision)

# Recall
recall = recall_score(y_test, y_pred)
print("Recall:", recall)


# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# ROC-AUC
roc_auc = roc_auc_score(y_test, y_pred)
print("ROC-AUC Score:", roc_auc)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

Gaussian Naive Bayes
Precision: 0.5166112956810631
Recall: 0.6542776998597476
Accuracy: 0.6119318181818182
ROC-AUC Score: 0.6186861278668365
Confusion Matrix:
 [[1221  873]
 [ 493  933]]


In [9]:
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import GaussianNB

# Define the hyperparameter grid
param_grid = {
    'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(GaussianNB(), param_grid, cv=5, scoring='accuracy')

# Fit the model
grid_search.fit(X_train, y_train)

# Get the best model
best_model = grid_search.best_estimator_

# Make predictions
y_pred = best_model.predict(X_test)

# Evaluate the classifier
print("Best Gaussian Naive Bayes using Grid Search")
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC-AUC Score:", roc_auc_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Best Gaussian Naive Bayes using Grid Search
Precision: 0.5166112956810631
Recall: 0.6542776998597476
Accuracy: 0.6119318181818182
ROC-AUC Score: 0.6186861278668365
Confusion Matrix:
 [[1221  873]
 [ 493  933]]
