In [None]:
import os
import pandas as pd
import re

# Function to list all chat files in all date folders within the base directory
def list_chat_files(base_directory):
    chat_files = {}
    for date_folder in os.listdir(base_directory):
        date_path = os.path.join(base_directory, date_folder)
        if os.path.isdir(date_path):
            for team_folder in os.listdir(date_path):
                team_path = os.path.join(date_path, team_folder)
                if os.path.isdir(team_path):
                    for person_folder in os.listdir(team_path):
                        person_path = os.path.join(team_path, person_folder)
                        if os.path.isdir(person_path):
                            for file in os.listdir(person_path):
                                if file.endswith('.txt'):
                                    full_file_path = os.path.join(person_path, file)
                                    chat_files.setdefault(date_folder, []).append(full_file_path)
    return chat_files

# Function to parse a chat file and find start and end times for each person
def parse_chat_file(file_path):
    date_pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2}, \d{1,2}:\d{2}\s?[APMapm]{2} - ')
    time_data = {}

    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            if date_pattern.match(line):
                date_time_str, sender = re.match(r'(\d{1,2}/\d{1,2}/\d{2}, \d{1,2}:\d{2}\s?[APMapm]{2}) - (.*?):', line).groups()
                date_time = pd.to_datetime(date_time_str, format='%d/%m/%y, %I:%M %p', errors='coerce')
                if sender not in time_data:
                    time_data[sender] = {'start': date_time, 'end': date_time}
                else:
                    time_data[sender]['end'] = date_time
    return time_data

# Main script
base_directory = "F:\\Github-mauriceyeng\\Chat-Analyzer-V2\\date-wise"  # Update this path

all_chat_files = list_chat_files(base_directory)
all_chat_data = {}

for date_folder, files in all_chat_files.items():
    for file_path in files:
        chat_data = parse_chat_file(file_path)
        for person, times in chat_data.items():
            key = (date_folder, person)
            if key not in all_chat_data:
                all_chat_data[key] = times
            else:
                # Update the end time if it's later than the existing one
                all_chat_data[key]['end'] = max(all_chat_data[key]['end'], times['end'])

# Creating a DataFrame for the CSV export
df = pd.DataFrame([(date, person, times['start'], times['end']) for (date, person), times in all_chat_data.items()], 
                  columns=['Date', 'Person', 'Start Time', 'End Time'])

# Exporting to CSV
csv_file_path = os.path.join(base_directory, 'chat_times.csv')
df.to_csv(csv_file_path, index=False)
print(f"Chat times saved to {csv_file_path}")
