# 1st code

In [None]:
import os
import pandas as pd
import re
import csv
from datetime import datetime, timedelta

def list_chat_files(base_directory):
    chat_files = {}
    for date_folder in os.listdir(base_directory):
        date_path = os.path.join(base_directory, date_folder)
        if os.path.isdir(date_path):
            print(f"Processing date folder: {date_folder}")
            for team_folder in os.listdir(date_path):
                if team_folder != "SALES":  # Only process 'SALES' folder
                    continue
                team_path = os.path.join(date_path, team_folder)
                if os.path.isdir(team_path):
                    print(f"Processing team folder: {team_folder}")
                    for person_folder in os.listdir(team_path):
                        print(f"Found person folder: {person_folder}")
                        person_path = os.path.join(team_path, person_folder)
                        if os.path.isdir(person_path):
                            for file in os.listdir(person_path):
                                if file.endswith('.txt'):
                                    full_file_path = os.path.join(person_path, file)
                                    chat_files.setdefault(date_folder, []).append(full_file_path)
                                    print(f"Added chat file for analysis: {full_file_path}")
    return chat_files

def parse_chat_file(file_path, expected_date_minus_one, date_formats):
    date_pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2}\s?[APMapm]{2} - ')
    chat_data = []

    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            if date_pattern.match(line):
                match = re.match(r'(\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2}\s?[APMapm]{2}) - (.*?):', line)
                if match:
                    date_time_str, sender = match.groups()
                    # Check if sender is alphabetical
                    if not re.match(r'^[A-Za-z\s]+$', sender):
                        continue
                    date_time = None
                    for fmt in date_formats:
                        try:
                            date_time = pd.to_datetime(date_time_str, format=fmt, errors='raise')
                            break
                        except ValueError:
                            continue
                    if date_time and date_time.date() == expected_date_minus_one:
                        chat_data.append((os.path.basename(file_path), sender, date_time))
    return chat_data

base_directory = "F:\\Github-mauriceyeng\\Chat-Analyzer-V2\\date-wise"

all_chat_files = list_chat_files(base_directory)

date_formats = ['%d/%m/%y, %I:%M %p', '%d/%m/%Y, %I:%M %p', 
                '%m/%d/%y, %I:%M %p', '%m/%d/%Y, %I:%M %p',
                '%y/%m/%d, %I:%M %p', '%Y/%m/%d, %I:%M %p']

chat_times_data = []

for date_folder, files in all_chat_files.items():
    folder_date = pd.to_datetime(date_folder).date()
    expected_date_minus_one = folder_date - timedelta(days=1)
    
    for file_path in files:
        print(f"Analyzing file: {file_path}")
        chat_data = parse_chat_file(file_path, expected_date_minus_one, date_formats)
        chat_times_data.extend(chat_data)

# Saving data to CSV file
csv_file_path = os.path.join(base_directory, 'all_chat_times.csv')
with open(csv_file_path, 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Date', 'Chat File Name', 'Sender', 'Timestamp'])
    for date_folder, file_name, sender, timestamp in chat_times_data:
        writer.writerow([date_folder, file_name, sender, timestamp.strftime('%Y-%m-%d %H:%M:%S')])

print(f"All chat times saved to {csv_file_path}")


# V2

In [4]:
import os
import pandas as pd
import re
from datetime import timedelta

def list_chat_files(base_directory):
    chat_files = {}
    for date_folder in os.listdir(base_directory):
        date_path = os.path.join(base_directory, date_folder)
        if os.path.isdir(date_path):
            for team_folder in os.listdir(date_path):
                if team_folder == 'SALES':
                    team_path = os.path.join(date_path, team_folder)
                    if os.path.isdir(team_path):
                        for person_folder in os.listdir(team_path):
                            person_path = os.path.join(team_path, person_folder)
                            if os.path.isdir(person_path):
                                for file in os.listdir(person_path):
                                    if file.endswith('.txt'):
                                        full_file_path = os.path.join(person_path, file)
                                        chat_files.setdefault((date_folder, person_folder), []).append(full_file_path)
    return chat_files

def process_timestamps(timestamps, target_date):
    if not timestamps:
        return None, None
    timestamps.sort()
    filtered_timestamps = [ts for ts in timestamps if ts.date() == target_date]
    if not filtered_timestamps:
        return None, None
    start_time = filtered_timestamps[0]
    end_time = filtered_timestamps[0]
    for time in filtered_timestamps[1:]:
        if time - end_time <= timedelta(hours=2):
            end_time = time
        else:
            break
    return start_time, end_time

def parse_chat_file(file_path, person_name, target_date):
    date_pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2}, \d{1,2}:\d{2}\s?[APMapm]{2} - ')
    timestamps = []

    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            if date_pattern.match(line):
                match = re.match(r'(\d{1,2}/\d{1,2}/\d{2}, \d{1,2}:\d{2}\s?[APMapm]{2}) - (.*?):', line)
                if match:
                    date_time_str, sender = match.groups()
                    if sender == person_name:
                        date_time = pd.to_datetime(date_time_str, format='%d/%m/%y, %I:%M %p', errors='coerce')
                        timestamps.append(date_time)
    return process_timestamps(timestamps, target_date)

base_directory = "F:\\Github-mauriceyeng\\Chat-Analyzer-V2\\date-wise"

all_chat_files = list_chat_files(base_directory)
all_chat_data = {}

for (date_folder, person_name), files in all_chat_files.items():
    # Calculate the target date (date - 1 day)
    target_date = pd.to_datetime(date_folder) - timedelta(days=1)
    target_date = target_date.date()

    for file_path in files:
        start_time, end_time = parse_chat_file(file_path, person_name, target_date)
        if start_time and end_time:
            key = (date_folder, person_name)
            if key not in all_chat_data:
                all_chat_data[key] = {'start': start_time, 'end': end_time}
            else:
                all_chat_data[key]['end'] = max(all_chat_data[key]['end'], end_time)

df = pd.DataFrame([(date, person, times['start'], times['end']) for (date, person), times in all_chat_data.items()], 
                  columns=['Date', 'Person', 'Start Time', 'End Time'])

csv_file_path = os.path.join(base_directory, 'chat_times_v6.csv')
df.to_csv(csv_file_path, index=False)
print(f"Chat times saved to {csv_file_path}")


Chat times saved to F:\Github-mauriceyeng\Chat-Analyzer-V2\date-wise\chat_times_v6.csv
