In [38]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report, accuracy_score
import tkinter as tk
from tkinter import messagebox

# Step 1: Load the Dataset from CSV
df = pd.read_csv('SwahiliHateSpeech.csv')

# Check for and handle NaN values
if df.isnull().values.any():
    print("Dataset contains NaN values. Handling them...")
    df = df.dropna()  # Drop rows with NaN values

# Prepare features and labels
X = df['Text']  # Feature: Text column
y = df['Label']  # Label: Hate Speech or Non-Hate

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2: Create and Train the Model
model = make_pipeline(CountVectorizer(), MultinomialNB())
model.fit(X_train, y_train)

# Step 3: Evaluate the Model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Step 4: Develop Input Interface Using Tkinter
def classify_text():
    user_input = text_entry.get("1.0", "end-1c")  # Get text from entry widget
    prediction = model.predict([user_input])  # Make prediction
    messagebox.showinfo("Prediction Result", f"Matokeo: {prediction[0]}")  # Display result in a message box

# Create the main window
root = tk.Tk()
root.title("Kiswahili Hate Speech Detection")

# Create a label
label = tk.Label(root, text="Andika ujumbe wako hapa:")
label.pack()

# Create a text entry widget
text_entry = tk.Text(root, height=10, width=50)
text_entry.pack()

# Create a button to submit input
submit_button = tk.Button(root, text="Tathmini", command=classify_text)
submit_button.pack()

# Run the application
root.mainloop()

Accuracy: 0.62

Classification Report:
               precision    recall  f1-score   support

"Hate Speech"       0.00      0.00      0.00         3
   "Non-Hate"       0.11      0.50      0.18         2
  Hate Speech       0.77      0.91      0.83        11
     Non-Hate       0.91      0.56      0.69        18

     accuracy                           0.62        34
    macro avg       0.45      0.49      0.43        34
 weighted avg       0.74      0.62      0.65        34

