In [1]:
import tkinter as tk
from tkinter import Text, Button, Label, messagebox
import pandas as pd
import regex as re
import joblib

In [8]:
def lowercasing(text):
    if isinstance(text, str):
        text = text.lower()
    return text

def data_deidentification(text):
    if isinstance(text, str):
        return re.sub(r'@\w+\:?', '', text)
    else:
        return text

def remove_hashtags(text):
    if isinstance(text, str):
        return re.sub(r'#\w+', '', text)
    else:
        return text

def remove_urls(text):
    if isinstance(text, str):
        return re.sub(r'https?://\S+', '', text)
    else:
        return text

def remove_numbers(text):
    if isinstance(text, str):
        return re.sub(r'\d+', '', text)
    else:
        return text

def remove_extra_spaces(text):
    if isinstance(text, str):
        return re.sub(r'\s+', ' ', text.strip())
    else:
        return text

def contraction_expansion(text):
    contractions = { 
        "won't": "will not",
        "'cause": "because",
        "can't": "cannot",
        "what's": "what is",
        "don't": "do not",
        "aren't": "are not",
        "isn't": "is not",
        "%": " percent",
        "that's": "that is",
        "doesn't": "does not",
        "he's": "he is",
        "she's": "she is",
        "it's": "it is",
        "n't": " not",
        "'ve": " have",
        "'s": " is",
        "’s": "",
        "'re": " are",
        "'d": " would",
        "'ll": " will",
        "'m": " am"
    }
    for contraction, replacement in contractions.items(): 
        text = text.replace(contraction, replacement)
    return text

def punctuations_and_abbreviations(text):
    library = [
        (r"w/", " with "),
        (r"w/o", "without"),
        (r"(\d+)(k)", r"\g<1>000"),
        (r":", " : "),
        (r" u s ", " american "),
        (r"\0s", "0"),
        (r" 9 11 ", "911"),
        (r"e - mail", "email"),
        (r"j k", "jk"),
        (r"\s{2,}", " "),
        (r"amp;", "and"),
        (r"g2g", "gtg"),
        (r"2moro", "tomorrow"),
        (r"b4", "before"),
        (r"2nite", "tonight"),
        (r"2day", "today"),
        (r"4U", "for you"),
        (r"4get", "forget"),
        (r"2morrow", "tomorrow"),
        (r"2be", "because"),
        (r"l8r", "later"),
        (r",", " "),
        (r"\.", " "),
        (r"!", " "),
        (r";", " "),
        (r"-", " "),
        (r":", " "),
        (r"\/", " "),
        (r"%", " "),
        (r"&", " "),
        (r"\^", " ^ "),
        (r"\+", " + "),
        (r"\-", " - "),
        (r"\=", " = "),
        (r"[^A-Za-z0-9^,!.\/+-=]", " "), 
    ]
    for pattern, replacement in library:
        text = re.sub(pattern, replacement, text)
    return text
def name_removal(text):
    candidate = {
    #presidential candidates
    "president": " ",
    "rodrigo": " ",
    "'roa": " ",
    "duterte": " ",
    "du30": " ", #du30 
    "prrd": " ", 
    "rody": " ",
    "digong": " ",
    "binay": " ", 
    "jojo": " ",
    "jejo": " ",
    "jejomar": " ",
    "b1nay": " ",  #b1nay
    "mar": " ", 
    "roxas": " ",
    "grace": " ",
    "poe": " ",
    "miriam": " ",
    "defensor": " ",
    "santiago": " ",
    #vice presidential candidates
    "alan": " ", 
    "peter": " ", 
    "cayetano": " ", 
    "apc": " ", #abbreviation for alan peter cayetano
    "leni": " ",    
    "robredo": " ",
    "francis": " ",
    "escudero": " ", 
    "chiz": " ", 
    "honasan": " ", 
    "gringo": " ", 
    "gregorio": " ",
    "bongbong": " ",
    "ferdinand": " ",
    "marcos": " ",
    "bbm": " ",
    "antonio": " ",
    "trillanes": " ",
    "vice": " ", 
    "vp": " ",
    "villar": " ",
    "erap" : " ",
    "alma" : " ",
    "moreno": " ",
    "djp": " ",
    "senator": " ",
    "daniel": " ",
    "padilla": " ",
    "abi": " ",
    "abby": " ",
    "zapanta": " ",
    "mds": " ",
    }
    # Replace candidate names
    for name, replacement in candidate.items():
        text = re.sub(r'\b' + re.escape(name) + r'\b', replacement, text)
    return text

def remove_rt_and_single_char(text):
    if isinstance(text, str):
        pattern = r'\brt\b|\b[a-z]\b'
        return re.sub(pattern, '', text.strip())
    else:
        return text

    
model_1 = joblib.load("DT.pkl")
model_2 = joblib.load("KNN.pkl")   
model_3 = joblib.load("GNB.pkl")   
model_4 = joblib.load("SVM.pkl")   
meta_model = joblib.load("LR-metamodel.pkl")   
tfidf_vectorizer = joblib.load("tfidf_vectorizer.pkl")   
    
    

def classify_text(event=None):
    input_text = text_input.get("1.0", "end-1c")
    if input_text:
        # Preprocess the input text
        preprocessed_text = lowercasing(input_text)
        preprocessed_text = remove_hashtags(preprocessed_text)
        preprocessed_text = remove_urls(preprocessed_text)
        preprocessed_text = remove_numbers(preprocessed_text)
        preprocessed_text = remove_extra_spaces(preprocessed_text)
        preprocessed_text = contraction_expansion(preprocessed_text)
        preprocessed_text = punctuations_and_abbreviations(preprocessed_text)
        preprocessed_text = name_removal(preprocessed_text)
        preprocessed_text = remove_rt_and_single_char(preprocessed_text)
        
        if not preprocessed_text.strip():
            messagebox.showwarning("Warning", "Please enter text for analysis")
            return
        
        # TF-IDF Vectorization for the preprocessed text
        input_tfidf = tfidf_vectorizer.transform([preprocessed_text])
        input_df = pd.DataFrame(input_tfidf.toarray(), columns=tfidf_vectorizer.get_feature_names())
        
        # Make predictions using base models
        input_pred_1 = model_1.predict(input_df)
        input_pred_2 = model_2.predict(input_df)
        input_pred_3 = model_3.predict(input_df)
        input_pred_4 = model_4.predict(input_df)
        
        # Display predictions of base models
        base_model_results = f"DT: {'Hate' if input_pred_1[0] else 'Not Hate'}\n" + \
                             f"KNN: {'Hate' if input_pred_2[0] else 'Not Hate'}\n" + \
                             f"GNB: {'Hate' if input_pred_3[0] else 'Not Hate'}\n" + \
                             f"SVM: {'Hate' if input_pred_4[0] else 'Not Hate'}\n"
        result_label.config(text=base_model_results)
        
        input_blend = pd.concat([input_df, 
                                 pd.DataFrame(input_pred_1), 
                                 pd.DataFrame(input_pred_2), 
                                 pd.DataFrame(input_pred_3), 
                                 pd.DataFrame(input_pred_4)], axis=1)
        input_blend.reset_index(drop=True, inplace=True)
        # Make predictions using the meta-model
        meta_pred = meta_model.predict(input_blend.values)
        
        # Display final classification result
        final_result = f"LR Metamodel : {'Hate' if meta_pred[0] else 'Not Hate'}"
        result_label.config(text=result_label.cget("text") + "\n\n" + final_result)
        
    else:
        messagebox.showwarning("Warning", "Please enter text for analysis")


# Create GUI window
root = tk.Tk()
root.title("Hate Detector GUI")

# Text Input
text_input_label = Label(root, text="Enter Text:", font=("Helvetica", 18))  # Adjust font size here
text_input_label.pack()
text_input = Text(root, height=20, width=70, font=("Helvetica", 20), wrap="word") 
text_input.pack()

# Analyze Button
analyze_button = Button(root, text="Analyze", command=classify_text, font=("Helvetica", 15))  # Adjust font size here
analyze_button.pack()

# Result Label
result_label = Label(root, text="Base Model Predictions:", font=("Helvetica", 18))  # Adjust font size here
result_label.pack()

def on_enter(event):
    classify_text()
    return 'break'
root.bind('<Return>', classify_text)

# Run the GUI
root.mainloop()