## Importing the Traing and Testing split dataset and Performing TF-IDF Transformation on them to use for the ML Model

In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer

train_df = pd.read_csv('Train_data.csv')
test_df = pd.read_csv('Test_data.csv')

X_train = train_df['clean_text']
y_train = train_df['IsHatespeech']
X_test = test_df['clean_text']
y_test = test_df['IsHatespeech']

tfidf_vectorizer = TfidfVectorizer(max_features=2500)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

### Support Vector Model
#### This model was used to set a standard benchmark score for the dataset. Here the model was Hypertunned and best fit parameters were tasted and the parameter which seemed best were used to train the model. We considered the Accuracy and F1 score to evaluate the performance of the model.

In [27]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score, precision_score, recall_score, f1_score

svm_model = SVC(C=1, gamma='scale', kernel='rbf', random_state=42, probability=True)

svm_model.fit(X_train_tfidf, y_train)

y_pred_svm = svm_model.predict(X_test_tfidf)
y_pred_svm_proba = svm_model.predict_proba(X_test_tfidf)[:, 1]

print("SVM Performance with TF-IDF:")
print("Classification Report:\n", classification_report(y_test, y_pred_svm))
print("Accuracy:", accuracy_score(y_test, y_pred_svm))
print("AUC-ROC:", roc_auc_score(y_test, y_pred_svm_proba))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_svm))
print("F1 Score:", f1_score(y_test, y_pred_svm))
print("Precision:",precision_score(y_test, y_pred_svm))
print("Recall:", recall_score(y_test, y_pred_svm))

SVM Performance with TF-IDF:
Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.73      0.76       944
           1       0.74      0.80      0.77       918

    accuracy                           0.76      1862
   macro avg       0.77      0.76      0.76      1862
weighted avg       0.77      0.76      0.76      1862

Accuracy: 0.7636949516648764
AUC-ROC: 0.8351375272331156
Confusion Matrix:
 [[686 258]
 [182 736]]
F1 Score: 0.7698744769874477
Precision: 0.7404426559356136
Recall: 0.8017429193899782
