# Inappropriate Comment Scanner 
Rudainah Khaled and Omair Hamd alla - Natural Language Processing - IAU

# The Libraries

In [None]:
from customtkinter import *
import pandas as pd
from better_profanity import profanity

# Filtering the Comments 

In [None]:
def filteringCommnet(data, text_column, text_list):
    added_bad_words = []
    profanity.load_censor_words(added_bad_words)

    data["filtered_text"] = data[text_column].apply(lambda text: profanity.censor(text))
    data.to_excel("filtered_comments.xlsx", index=False)
    
    print("Filtering completed and it has been exported to a new file!")

# Comments Toxicity 

In [None]:
def is_toxic(text):
    return profanity.contains_profanity(text) or any(trigger_word in text for trigger_word in ["offensive", "hateful"])

In [None]:
def toxicity(data, text_column, is_toxic):
    data["toxicity"] = data[text_column].apply(is_toxic)
    data["toxicity"] = data["toxicity"].map({True: 1, False: 0})

    data["toxic_word_count"] = data[text_column].apply(count_toxic_words)  

    data.to_excel("comments_toxicity.xlsx", index=False)

    print("Toxicity analysis completed and it has been exported to a new file!")

# Toxic Words Counter in Each Comment

In [None]:
def count_toxic_words(text):
    toxic_word_count = 0
    for word in text.split():
        if profanity.contains_profanity(word):
            toxic_word_count += 1
    return toxic_word_count

# Toxic Comments Countert

In [None]:
def count_toxic_comments(data):
    toxic_count = data["toxicity"].sum()    
    non_toxic_count = len(data) - toxic_count

    print(f"Number of comments in this file: {len(data)}")
    print(f"Number of toxic comments: {toxic_count}")
    print(f"Number of non-toxic comments: {non_toxic_count}")

# Open the file 

In [None]:
def openFile():
    filepath = filedialog.askopenfilename(title="Choose Excel file:",
                                          filetypes= (("Excel files",("*.csv" , "xlsx")),
                                          ("all files",".")))
    
    print(f"\nThe Dataset File Path {filepath}")
    
    activateFunctions(filepath)

# Activate the Functions

In [None]:
def activateFunctions(filepath):    
    data = pd.read_excel(filepath)
    
    text_column = data.columns[0]
    text_list = data[text_column].tolist()

    filteringCommnet(data, text_column, text_list)
    toxicity(data, text_column, is_toxic)
    count_toxic_comments(data)

# User Interface

In [None]:
def about():
    global infoWindow
    if infoWindow and infoWindow.winfo_exists():
        infoWindow.lift()  
    else:
        infoWindow = CTkToplevel(root)  
        infoWindow.title("About")
        infoWindow.geometry("420x200")
        infoWindow.columnconfigure((1,3), weight=1)
        infoWindow.columnconfigure(2, weight=2)
        infoWindow.rowconfigure((1,2,4,6,7) ,weight=2)
        infoWindow.rowconfigure((3,5) ,weight=1)
        infoWindow.resizable(False, False)
        infoWindow.attributes('-topmost', True)

        names_Label = CTkLabel(infoWindow,
                               text="This is a Project for NLP course\nMade to scaninappropriate words\nIt was done by:\n\t• Omair Hamd Alla\n\t• Rudainah Khaled",
                               justify='left')
        names_Label.configure(font=("", 16, "normal"))
        names_Label.grid(row=3, column=2, sticky='ew')

        libraries_Label = CTkLabel(infoWindow,
                               text="With the help of these libraries:\n\t• better_profanity : for profanity checking\n\t• pandas : for Excel handling\n\t• customtkinter : for GUI",
                               justify='left')
        libraries_Label.configure(font=("", 16, "normal"))
        libraries_Label.grid(row=4, column=2, sticky='ew')
        infoWindow.lift()

In [None]:
root = CTk()
root.geometry("420x200")
root.resizable(False, False)
root.title("NLP - Innappropriate Comment Scanner")

root.columnconfigure((1,2,4,5), weight=1)
root.columnconfigure(3, weight=2)
root.rowconfigure((1,2,4,7,8) ,weight=2)
root.rowconfigure((3,5,6) ,weight=1)

label = CTkLabel(root,text="Welcome to Innappropriate Comment Scanner!\nAdd a CSV file below with \"Comments\" column\nAnd a new file will be created\nIt will be in the same directory as this code.").grid(row=3,column=3)
button = CTkButton(master = root, text="Open File",command=openFile,)
button.grid(row=5, column=3, sticky='ew')

infoWindow = None
info = CTkButton(master = root, text="About",command=about)
info.grid(row=6, column=3, sticky='ew')

root.mainloop()

# Copyright

In [None]:
def copyright():
    copyright_notice = "© 2023 Rudainah Khaled and Omair Hamd-alla. All rights reserved."

    print(copyright_notice)