In [1]:
import os
import re
from collections import defaultdict

def aggregate_speeches():
    # Define the current directory
    current_dir = os.getcwd()
    
    # Use a regular expression to match filenames like 'President_Year.txt'
    pattern = re.compile(r'^(.+?)_(\d{4})\.txt$')
    
    # Dictionary to hold speeches for each president
    presidents = defaultdict(list)
    
    # Iterate over all files in the current directory
    for filename in os.listdir(current_dir):
        if filename.endswith('.txt'):
            match = pattern.match(filename)
            if match:
                president, year = match.groups()
                file_path = os.path.join(current_dir, filename)
                
                try:
                    with open(file_path, 'r', encoding='utf-8') as file:
                        content = file.read().strip()
                        presidents[president].append((int(year), content))
                except Exception as e:
                    print(f"Error reading {filename}: {e}")
            else:
                print(f"Filename '{filename}' does not match the pattern and will be skipped.")
    
    # Create an output directory to store aggregated speeches
    output_dir = os.path.join(current_dir, 'Aggregated_Speeches')
    os.makedirs(output_dir, exist_ok=True)
    
    # Write aggregated speeches for each president
    for president, speeches in presidents.items():
        # Sort speeches by year
        sorted_speeches = sorted(speeches, key=lambda x: x[0])
        
        output_file = os.path.join(output_dir, f"{president}.txt")
        try:
            with open(output_file, 'w', encoding='utf-8') as outfile:
                for year, speech in sorted_speeches:
                    outfile.write(f"{year}:\n")
                    outfile.write(speech + "\n\n")  # Add extra newline for separation
            print(f"Aggregated speeches for {president} written to '{output_file}'.")
        except Exception as e:
            print(f"Error writing to {output_file}: {e}")

# Run the aggregation
aggregate_speeches()

Aggregated speeches for Reagan written to '/Users/joshstrupp/Documents/Working/Educational/MSDV/DVIA/DVIA Working/Text and Qual/sotu/Aggregated_Speeches/Reagan.txt'.
Aggregated speeches for Roosevelt written to '/Users/joshstrupp/Documents/Working/Educational/MSDV/DVIA/DVIA Working/Text and Qual/sotu/Aggregated_Speeches/Roosevelt.txt'.
Aggregated speeches for Wilson written to '/Users/joshstrupp/Documents/Working/Educational/MSDV/DVIA/DVIA Working/Text and Qual/sotu/Aggregated_Speeches/Wilson.txt'.
Aggregated speeches for Taft written to '/Users/joshstrupp/Documents/Working/Educational/MSDV/DVIA/DVIA Working/Text and Qual/sotu/Aggregated_Speeches/Taft.txt'.
Aggregated speeches for Madison written to '/Users/joshstrupp/Documents/Working/Educational/MSDV/DVIA/DVIA Working/Text and Qual/sotu/Aggregated_Speeches/Madison.txt'.
Aggregated speeches for Polk written to '/Users/joshstrupp/Documents/Working/Educational/MSDV/DVIA/DVIA Working/Text and Qual/sotu/Aggregated_Speeches/Polk.txt'.
Aggr