In [2]:
import json
import re
import os
import pandas as pd
import numpy as np
import glob
import string

In [3]:
# List of profane words
profane_words = [
    "damn", "hell", "crap", "suck", "idiot", "stupid", "fool", "jerk", "fuck", "shit",
    "ass", "bitch", "dick", "piss", "bastard", "slut", "douche", "cock", "cunt",
    "motherfucker", "twat", "asshole", "bullshit", "horseshit", "goddamn",
    "son of a bitch"
]

In [7]:
# Read one json file at a time from All_Conversations
# Check for the presence of profane words in the text
def check_profanity(text):
    """Check if the text contains any profane words."""
    # Create a regex pattern that matches any of the profane words
    pattern = r'\b(?:' + '|'.join(re.escape(word) for word in profane_words) + r')\b'
    # Search for the pattern in the text
    return bool(re.search(pattern, text, re.IGNORECASE))


# Each json file is a conversation with multiple messages structured as follows
# [
#     {
#         "speaker": "Agent",
#         "text": "Hello, is this Mr. Johnson? This is Lisa calling from XYZ Collections. How are you today?",
#         "stime": 0,
#         "etime": 7
#     },
#     {
#         "speaker": "Customer",
#         "text": "I'm sorry, but I think you have the wrong person. My name is Sarah.",
#         "stime": 6.5,
#         "etime": 12
#     },
#     {
#         "speaker": "Agent",
#         "text": "Oh, I apologize for the confusion, Sarah. I'm reaching out about a debt related to an outstanding balance with Definite Bank.",
#         "stime": 11,
#         "etime": 19
#     },
#     {
#         "speaker": "Customer",
#         "text": "I don't have any account with Definite Bank. You might want to check your records.",
#         "stime": 18,
#         "etime": 24
#     },
#     {
#         "speaker": "Agent",
#         "text": "Thank you for letting me know. I will make a note to update our records.",
#         "stime": 23,
#         "etime": 30
#     },
#     {
#         "speaker": "Customer",
#         "text": "I appreciate that. Is there anything else I need to do?",
#         "stime": 29,
#         "etime": 34
#     },
#     {
#         "speaker": "Agent",
#         "text": "No, that's all. I'm sorry for any inconvenience caused. Have a great day!",
#         "stime": 33,
#         "etime": 40
#     },
#     {
#         "speaker": "Customer",
#         "text": "Thank you, you too!",
#         "stime": 39,
#         "etime": 42
#     }
# ]

def process_json_file(file_path):
    """Process a single JSON file and check for profanity."""
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    # Initialize a list to store results
    results = []

    # Iterate through each message in the conversation
    for message in data:
        text = message['text']
        contains_profanity = check_profanity(text)
        results.append({
            'speaker': message['speaker'],
            'text': text,
            'contains_profanity': contains_profanity,
            'file_name': os.path.basename(file_path)
        })

    return results


def main():
    # Get the list of JSON files in the directory
    json_files = glob.glob('All_Conversations/*.json')

    # Initialize a list to store all results
    all_results = []

    # Process each JSON file
    for json_file in json_files:
        results = process_json_file(json_file)
        all_results.extend(results)

    # Print the messages with profanity along with the file name
    for result in all_results:
        if result['contains_profanity']:
            print(f"File: {result['file_name']}")
            print(f"Speaker: {result['speaker']}, Text: {result['text']}")
            print("-" * 80)
            print()


main()

File: 04bec80f-8614-484b-8ba2-831ff9dd03ef.json
Speaker: Agent, Text: Hello, this is Kyle from XYZ Collections. How the hell are you?
--------------------------------------------------------------------------------

File: 04bec80f-8614-484b-8ba2-831ff9dd03ef.json
Speaker: Agent, Text: Alright, that checks out. Now, you owe us a shitload of money. What the hell are you planning to do about it?
--------------------------------------------------------------------------------

File: 04bec80f-8614-484b-8ba2-831ff9dd03ef.json
Speaker: Agent, Text: Options? Don't give me that crap. You need to pay up, plain and simple!
--------------------------------------------------------------------------------

File: 04bec80f-8614-484b-8ba2-831ff9dd03ef.json
Speaker: Agent, Text: Payment plan? Who the hell do you think you are? Just pay the damn balance!
--------------------------------------------------------------------------------

File: 20690906-b8d4-40c5-8474-9127c47b1299.json
Speaker: Customer, Tex