In [16]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Load your dataset (replace 'data.csv' with your dataset file)
data = pd.read_csv('/content/modified_dataset.csv')

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data['tweet_text'], data['label'], test_size=0.2, random_state=42)

# Text preprocessing using TF-IDF vectorization
tfidf_vectorizer = TfidfVectorizer(max_features=10000, stop_words='english')
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

# Create and train a simple Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train_tfidf, y_train)

# Make predictions
y_pred = classifier.predict(X_test_tfidf)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(classification_report(y_test, y_pred))

# Save the classifier (optional)
import joblib
joblib.dump(classifier, 'cyberbullying_classifier.pkl')

# To make predictions in the future, load the classifier and use it like this:
loaded_classifier = joblib.load('cyberbullying_classifier.pkl')
new_text = ["Some text to classify as cyberbullying or not"]
new_text_tfidf = tfidf_vectorizer.transform(new_text)
prediction = loaded_classifier.predict(new_text_tfidf)


Accuracy: 0.8591047279589056
                   precision    recall  f1-score   support

   cyber_bullying       0.86      0.99      0.92      7915
not_cyberbullying       0.78      0.24      0.37      1624

         accuracy                           0.86      9539
        macro avg       0.82      0.61      0.64      9539
     weighted avg       0.85      0.86      0.83      9539



In [22]:
loaded_classifier = joblib.load('cyberbullying_classifier.pkl')
new_text = ["I will kill you little boy"]
new_text_tfidf = tfidf_vectorizer.transform(new_text)
prediction = loaded_classifier.predict(new_text_tfidf)
print(prediction)

['cyber_bullying']


In [19]:
loaded_classifier = joblib.load('cyberbullying_classifier.pkl')
new_text = ["In other words #mcdonalds your food was crap"]
new_text_tfidf = tfidf_vectorizer.transform(new_text)
prediction = loaded_classifier.predict(new_text_tfidf)
print(prediction)

['not_cyberbullying']


In [21]:
loaded_classifier = joblib.load('cyberbullying_classifier.pkl')
new_text = ["I love eating, yayyyy"]
new_text_tfidf = tfidf_vectorizer.transform(new_text)
prediction = loaded_classifier.predict(new_text_tfidf)
print(prediction)

['cyber_bullying']
