In [None]:
import os
import pandas as pd
import datetime
import re

DELAY_THRESHOLD = 15  # minutes in delay threshold

# Regex pattern for message start (adjust based on your chat format)
regex_pattern = r'^\d{2}/\d{2}/\d{2}, \d{1,2}:\d{2}\s[ap]m -'

# Function to read chat files and extract messages
def read_chat_file(file_path):
    messages = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            if re.match(regex_pattern, line):
                messages.append(line.strip())
    return messages

# Helper function to extract timestamp and sender from a message
def extract_timestamp_and_sender(message):
    timestamp_str, sender_and_message = message.split(' - ', 1)
    sender = sender_and_message.split(':', 1)[0]
    timestamp = pd.to_datetime(timestamp_str, format='%d/%m/%y, %I:%M %p', errors='coerce')
    return timestamp, sender

# Function to aggregate messages by date
def aggregate_messages_by_date(root_directory):
    aggregated_messages = {}
    for date_folder in os.listdir(root_directory):
        date_path = os.path.join(root_directory, date_folder)
        messages_for_date = []
        for team_folder in os.listdir(date_path):
            team_path = os.path.join(date_path, team_folder)
            for person_folder in os.listdir(team_path):
                person_path = os.path.join(team_path, person_folder)
                for file in os.listdir(person_path):
                    if file.endswith('.txt'):
                        chat_file_path = os.path.join(person_path, file)
                        messages = read_chat_file(chat_file_path)
                        messages_for_date.extend(messages)
        aggregated_messages[date_folder] = messages_for_date
    return aggregated_messages

# Function to check for delay and calculate delay time
def check_for_delay_and_calculate_time(messages):
    delays = []
    for i in range(len(messages) - 8):  # Iterate with enough messages left for last 7
        current_msg = messages[i]
        next_msg = messages[i + 1]
        current_timestamp, current_sender = extract_timestamp_and_sender(current_msg)
        next_timestamp, next_sender = extract_timestamp_and_sender(next_msg)

        if current_sender == next_sender:  # Check delay for the same sender
            time_diff = (next_timestamp - current_timestamp).total_seconds() / 60
            if time_diff > DELAY_THRESHOLD:
                delay_info = (i, time_diff)  # Store index and time difference
                delays.append(delay_info)
    
    return delays

# Main analysis process
def main_analysis(root_directory):
    analysis_table = pd.DataFrame(columns=['Date', 'Person', 'Delay Detected', 'Delay Time', 'Last 7 Messages'])
    aggregated_messages = aggregate_messages_by_date(root_directory)
    
    for date, messages in aggregated_messages.items():
        delays = check_for_delay_and_calculate_time(messages)
        for delay_index, delay_time in delays:
            last_7_messages = ' | '.join(messages[delay_index - 6:delay_index + 1])
            sender = extract_timestamp_and_sender(messages[delay_index])[1]
            analysis_table = analysis_table.append({
                'Date': date,
                'Person': sender,
                'Delay Detected': True,
                'Delay Time': delay_time,
                'Last 7 Messages': last_7_messages
            }, ignore_index=True)

    return analysis_table

# Run the analysis
root_directory = 'C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Test\\filtered_chats'  # Replace with the actual path
delay_analysis_table = main_analysis(root_directory)
print(delay_analysis_table)
