In [2]:
import os
import re
import csv
import time
import pyautogui
import keyboard
import pandas as pd
os.chdir('..')

# Function to extract unique intersection numbers from filenames
def get_unique_intersections(directory):
    intersections = set()
    pattern = re.compile(r'.*Events.*\.csv$')
    
    for filename in os.listdir(directory):
        match = pattern.match(filename)
        if match:
            intersections.add(filename.split('_')[0])
    
    return sorted(intersections)

# Function to collect coordinates with hover-spacebar workflow
def collect_coordinates(prompts, output_file):
    coordinates = {}
    
    for prompt in prompts:
        print(f"Hover over {prompt} and press spacebar")
        keyboard.wait('space')
        x, y = pyautogui.position()
        coordinates[prompt] = (x, y)
        print(f"Recorded {prompt} at ({x}, {y})")
    
    with open(output_file, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Action", "X", "Y"])
        for key, (x, y) in coordinates.items():
            writer.writerow([key, x, y])
    
    print(f"Coordinates saved to {output_file}")
    return coordinates

# Function to load saved coordinates from CSV
def load_coordinates(file_path):
    coordinates = {}
    try:
        with open(file_path, 'r') as file:
            reader = csv.reader(file)
            next(reader)  # Skip header
            for row in reader:
                coordinates[row[0]] = (int(row[1]), int(row[2]))
    except FileNotFoundError:
        print("Coordinate file not found. Please collect coordinates first.")
    return coordinates

# Function to collect calendar selection coordinates
def collect_calendar_coordinates(months, output_file):
    coordinates = {}
    
    for month in months:
        print(f"Collecting coordinates for {month}")
        for day in ["1st", "15th", "16th", "Last Day", "Apply"]:
            print(f"Hover over {month} {day} and press spacebar")
            keyboard.wait('space')
            x, y = pyautogui.position()
            coordinates[f"{month} {day}"] = (x, y)
            print(f"Recorded {month} {day} at ({x}, {y})")
    
    # Collect additional navigation buttons
    for prompt in ["Date Selector", "Next Month", "Previous Month"]:
        print(f"Hover over {prompt} and press spacebar")
        keyboard.wait('space')
        x, y = pyautogui.position()
        coordinates[prompt] = (x, y)
        print(f"Recorded {prompt} at ({x}, {y})")
    
    with open(output_file, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Action", "X", "Y"])
        for key, (x, y) in coordinates.items():
            writer.writerow([key, x, y])
    
    print(f"Calendar coordinates saved to {output_file}")
    return coordinates

def process_intersections(intersections, coordinates, month=None, day=None, download_dir=None, missing_df=None):
    if month and day:
        month_key = {"January": "01", "February": "02", "March": "03", "April": "04", "May": "05", "June": "06", "July": "07", "August": "08", "September": "09", "October": "10", "November": "11", "December": "12"}
        year = month.split(' ')[1]
        month_num = month_key[month.split(' ')[0]]
        date = f'{year}{month_num}{day:02d}'
    else:
        date = None

    for intersection in intersections:

        if missing_df is not None and date and date in missing_df.index and missing_df.at[date, intersection] == 'Present':
            print(f'{intersection} {month} already downloaded')
            continue
        
        print(f"Processing intersection {intersection}")
        
        # Click Intersection Selector
        (x, y) = pyautogui.position()
        pyautogui.click(coordinates["Intersection Selector"])
        time.sleep(1)
        
        # Type Intersection Number and Press Enter
        pyautogui.write(intersection)
        pyautogui.press('enter')
        pyautogui.moveTo(x, y)
        time.sleep(10)
        
        # Click Export Excel Button
        (x, y) = pyautogui.position()
        pyautogui.click(coordinates["Export Excel Button"])
        pyautogui.moveTo(x, y)
        
        if month and day and download_dir:
            # Convert month to yyyymm format
            month_year = time.strptime(month, "%B %Y")
            yyyymm = time.strftime("%Y%m", month_year)
            
            # Construct the expected filename
            filename = f"{intersection}_Events_{yyyymm}{day:02d}T0000.csv"
            file_path = os.path.join(download_dir, filename)
            
            # Wait for the file to appear in the download directory
            start_time = time.time()
            while not os.path.exists(file_path):
                if time.time() - start_time > 120:  # 2 minutes
                    print(f"File {filename} not found within 2 minutes. Continuing...")
                    break
                time.sleep(1)
        else:
            time.sleep(30)
    
    print("Processing complete.")

# Function to automate full month-wise processing
def process_monthly_intersections(intersections, months, coordinates, download_dir=None, missing_df=None):
    for month in months:
        print(f"Processing month: {month}")
        
        # Process 1st-15th
        process_intersections(intersections, coordinates, month=month, day=1, download_dir=download_dir, missing_df=missing_df)
        
        # Select 16th-last day
        pyautogui.click(coordinates["Date Selector"])
        time.sleep(1)
        pyautogui.click(coordinates[f"{month} 16th"])
        time.sleep(1)
        pyautogui.click(coordinates[f"{month} Last Day"])
        time.sleep(1)
        pyautogui.click(coordinates[f"{month} Apply"])
        time.sleep(5)
        
        # Process 16th-last day
        process_intersections(intersections, coordinates, month=month, day=16, download_dir=download_dir, missing_df=missing_df)
        
        # Move to next month
        if month != months[-1]:
            pyautogui.click(coordinates["Date Selector"])
            time.sleep(1)
            pyautogui.click(coordinates["Next Month"])
            time.sleep(1)
            pyautogui.click(coordinates[f"{months[months.index(month) + 1]} 1st"])
            time.sleep(1)
            pyautogui.click(coordinates[f"{months[months.index(month) + 1]} 15th"])
            time.sleep(1)
            pyautogui.click(coordinates[f"{month} Apply"])
            time.sleep(5)
    
    print("Monthly processing complete.")

def find_missing_files(directory, intersections):
    # Create a set to store all dates
    dates = set()
    pattern = re.compile(r'(\d{3})_Events_(\d{8})T0000\.csv$')
    
    # Dictionary to store files by intersection and date
    files_dict = {intersection: set() for intersection in intersections}
    
    # Iterate over files in the directory
    for filename in os.listdir(directory):
        match = pattern.match(filename)
        if match:
            intersection, date = match.groups()
            if intersection in files_dict:
                files_dict[intersection].add(date)
                dates.add(date)
    
    # Create a sorted list of dates
    dates = sorted(dates)
    
    # Create a dataframe with intersections as columns and dates as rows
    df = pd.DataFrame(index=dates, columns=intersections)
    
    # Fill the dataframe with 'Present' or 'Missing'
    for intersection in intersections:
        for date in dates:
            if date in files_dict[intersection]:
                df.at[date, intersection] = 'Present'
            else:
                df.at[date, intersection] = 'Missing'
    
    # Create a list of missing files
    missing_files = [(intersection, date) for intersection in intersections for date in dates if df.at[date, intersection] == 'Missing']
    
    return df, missing_files


In [2]:

# Example usage
if __name__ == "__main__":
    # Collect coordinates
    #coordinate_prompts = ["Intersection Selector", "Export Excel Button"]
    #coord_file = "coordinates.csv"
    
    #coordinates = collect_coordinates(coordinate_prompts, coord_file)
    
    # Collect calendar coordinates
    months = ["April 2023", "January 2024"]
    calendar_file = "calendar_coordinates_423-124.csv"
    collect_calendar_coordinates(months, calendar_file)
    
    # Get intersection numbers
    #directory = './achd_data'#input("Enter directory containing event files: ")
    #intersections = get_unique_intersections(directory)
    
    # Process intersections
    #process_intersections(intersections, coordinates)


Collecting coordinates for April 2023
Hover over April 2023 1st and press spacebar
Recorded April 2023 1st at (1650, -535)
Hover over April 2023 15th and press spacebar
Recorded April 2023 15th at (1648, -448)
Hover over April 2023 16th and press spacebar
Recorded April 2023 16th at (1695, -446)
Hover over April 2023 Last Day and press spacebar
Recorded April 2023 Last Day at (1694, -360)
Hover over April 2023 Apply and press spacebar
Recorded April 2023 Apply at (1980, -276)
Collecting coordinates for January 2024
Hover over January 2024 1st and press spacebar
Recorded January 2024 1st at (1429, -534)
Hover over January 2024 15th and press spacebar
Recorded January 2024 15th at (1427, -447)
Hover over January 2024 16th and press spacebar
Recorded January 2024 16th at (1474, -447)
Hover over January 2024 Last Day and press spacebar
Recorded January 2024 Last Day at (1520, -357)
Hover over January 2024 Apply and press spacebar
Recorded January 2024 Apply at (1981, -278)
Hover over Date 

In [4]:
directory = './achd_data'
coord_file = 'coordinates.csv'
intersections = get_unique_intersections(directory)
#intersections = ['323', '325', '339', '341', '361', '392', '423', '430']
intersections = ['201','213', '250', '269', '270', '271', '272', '323', '325', '339', '341', '361', '392', '423', '430']
coordinates = load_coordinates(coord_file)
download_dir = "C:\\Users\\rhansen\\Downloads"
process_intersections(intersections,coordinates, month = "August 2025", day = 16, download_dir = download_dir)

Processing intersection 201
Processing intersection 213
File 213_Events_20250816T0000.csv not found within 2 minutes. Continuing...
Processing intersection 250
Processing intersection 269
Processing intersection 270
Processing intersection 271
Processing intersection 272
Processing intersection 323
Processing intersection 325
Processing intersection 339
Processing intersection 341
Processing intersection 361
Processing intersection 392
Processing intersection 423
Processing intersection 430
Processing complete.


In [12]:
# Example usage
directory = './achd_data'
intersections = ['201', '213', '250', '269', '270', '271', '272', '323', '325', '339', '341', '361', '392', '423', '430']
df, missing_files = find_missing_files(directory, intersections)
print(df)
print(missing_files)

              201      213      250      269      270      271      272  \
20250501  Present  Present  Present  Present  Present  Present  Present   
20250516  Present  Present  Present  Present  Present  Present  Present   
20250601  Present  Present  Present  Present  Present  Present  Present   
20250616  Present  Present  Present  Present  Present  Present  Present   
20250701  Present  Present  Present  Present  Present  Present  Present   
20250716  Present  Present  Present  Present  Present  Present  Present   

              323      325      339      341      361      392      423  \
20250501  Present  Present  Present  Present  Present  Present  Present   
20250516  Present  Present  Present  Present  Present  Present  Present   
20250601  Missing  Present  Present  Present  Present  Present  Present   
20250616  Present  Present  Present  Present  Present  Present  Present   
20250701  Present  Present  Present  Present  Present  Present  Present   
20250716  Present  Prese

In [2]:
df=None

# Example usage
if __name__ == "__main__":
    # Load coordinates
    coord_file = "coordinates.csv"
    calendar_file = "calendar_coordinates_1-2.csv"
    coordinates = load_coordinates(coord_file)
    coordinates.update(load_coordinates(calendar_file))
    
    # Get intersection numbers
    directory = './achd_data'#input("Enter directory containing event files: ")
    #intersections = get_unique_intersections(directory)
    intersections = ['201', '213', '250', '269', '270', '271', '272', '323', '325', '339', '341', '361', '392', '423', '430']
    
    # Define months
    months = ["February 2025"]
    
    download_dir = "C:\\Users\\rhansen\\Downloads"

    # Process intersections month-wise
    process_monthly_intersections(intersections, months, coordinates, download_dir=download_dir, missing_df=df)

Processing month: February 2025
Processing intersection 201
Processing intersection 213
Processing intersection 250
Processing intersection 269
Processing intersection 270
Processing intersection 271
Processing intersection 272
Processing intersection 323
Processing intersection 325
Processing intersection 339
Processing intersection 341
Processing intersection 361
Processing intersection 392
Processing intersection 423
Processing intersection 430
Processing complete.
Processing intersection 201
Processing intersection 213
Processing intersection 250
Processing intersection 269
Processing intersection 270
Processing intersection 271
Processing intersection 272
Processing intersection 323
Processing intersection 325
Processing intersection 339
Processing intersection 341
Processing intersection 361
Processing intersection 392
Processing intersection 423
Processing intersection 430
Processing complete.
Monthly processing complete.
