In [None]:
# Imports

import os
import json
from typing import Dict, List
import openai
from tqdm import tqdm
from pydantic import BaseModel
from dotenv import load_dotenv
from collections import Counter
import random

load_dotenv()
client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

In [49]:
# JSON schema for gpt output (structured outputs)

class Speech(BaseModel):
    speech_id: str
    topics: List[str]
    
class Speeches(BaseModel):
    speeches: List[Speech]

In [50]:
def read_speech_file(file_path: str) -> Dict[str, str]:
    """
    Reads a speech file and returns a dictionary mapping speech_ids to speeches.
    Only includes speeches with more than 35 words and less than 400 words.
    Handles and logs problematic lines without stopping the process.

    Args:
        file_path (str): Path to the speech file

    Returns:
        Dict[str, str]: Dictionary mapping speech_ids to speeches
    """
    speeches = {}
    with open(file_path, 'r', encoding='utf-8', errors='replace') as file:
        # Skip header line
        next(file)
        for line_number, line in enumerate(file, start=2):  # Start at 2 to account for header
            try:
                # Replace unknown characters with space
                cleaned_line = line.replace('�', ' ')
                # Split on pipe character - column delimmiter
                parts = line.strip().split('|')
                if len(parts) == 2:
                    speech_id, speech = parts
                    # Only add speech if it has more than 35 words and less than 400 words
                    word_count = len(speech.split())
                    if 35 < word_count < 400:
                        speeches[speech_id] = speech
            except Exception as e:
                # Log the problematic line and continue
                print(f"Error processing line {line_number}: {line.strip()}\nException: {e}")
    return speeches

In [51]:
def chunk_speeches(speeches: Dict[str, str], max_chunk_size: int = 20000) -> List[Dict[str, str]]:
    """
    Splits speeches into chunks while keeping individual speeches intact.

    Args:
        speeches (Dict[str, str]): Dictionary mapping speech_ids to speeches
        max_chunk_size (int): Maximum size of each chunk in characters

    Returns:
        List[Dict[str, str]]: List of dictionaries mapping speech_ids to speechess
            such that the total size of speeches in each dictionary is less than or
            equal to max_chunk_size
    """
    chunks = []
    current_chunk = {}
    current_size = 0
    
    for speech_id, speech in speeches.items():
        speech_size = len(speech)
        
        # If adding this speech would exceed max size and we already have speeches,
        # start a new chunk
        if current_size + speech_size > max_chunk_size and current_chunk:
            chunks.append(current_chunk)
            current_chunk = {}
            current_size = 0
            
        current_chunk[speech_id] = speech
        current_size += speech_size
    
    # Add the last chunk if it's not empty
    if current_chunk:
        chunks.append(current_chunk)
    
    return chunks

In [52]:
def extract_speech_topics(speeches: Dict[str, str]):
    """
    Uses OpenAI's API to extract topics from the given speeches.

    Args:
        speeches (Dict[str, str]): Dictionary mapping speech_ids to speeches

    Returns:
        JSON Schema formatted output. Key is "speeches" and value is a list of objects
            where each object has a "speech_id" and a "topics" list.
    """
    # Prepare the speeches for analysis
    speeches_text = "\n\n".join([f"Speech ID: {id}\nContent: {text}" for id, text in speeches.items()])
    
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {
                    "role": "system",
                    "content": """
                    Analyze each speech and identify its relevant topics. Topics should be chosen from this fixed list:
                    - Governance and Democracy
                    - Economy and Jobs
                    - Health and Social Services
                    - Education and Innovation
                    - Environment and Energy
                    - Defense and Security
                    - Immigration and Border Policy
                    - Justice and Civil Rights
                    - Infrastructure and Transportation
                    - Budget and Fiscal Responsibility

                    For each speech, assign one or more topics that best match its content.
                    Output should be in JSON format containing a list of objects, each with a speech_id and its corresponding topics list.
                    Be precise and thorough in topic assignment. Choose ONLY from the provided list of topics.
                    """
                },
                {
                    "role": "user",
                    "content": speeches_text
                }
            ],
            response_format={
                "type": "json_schema",
                "json_schema": 
                    {
                        "name":"_", 
                        "schema": Speeches.model_json_schema()
                    }
            },
            temperature=0.3,
            timeout=600  # 10 minute timeout
        )
        
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error during API call: {str(e)}")
        raise

In [53]:
def process_speech_file(file_path: str, output_dir: str, max_chunks: int=None):
    """
    Process a speech file and save the results.

    Args:
        file_path (str): Path to the speech file
        output_dir (str): Directory to save the results
        max_chunks (int): Maximum number of chunks to process

    Returns:
        None
    """
    print(f"Processing file: {file_path}")
    
    # Read speeches - > turn into dictionary of speech_id: speech_text
    speeches = read_speech_file(file_path)
    print(f"Found {len(speeches)} speeches")
    
    # Split into chunks
    chunks = chunk_speeches(speeches)
    print(f"Split into {len(chunks)} chunks")
    
    # Process each chunk and combine results
    all_results = {}

    # don't process chunks sequentially, instead loop through a
    # random order of indices to process the chunks in a random order
    chunk_indices = list(range(len(chunks)))
    random.shuffle(chunk_indices)
    chunk_count = 0
    for i in tqdm(chunk_indices, desc="Processing chunks"):
        chunk_count += 1
        chunk = chunks[i]
        try:
            # Get topics for speeches in this chunk
            topics_json = extract_speech_topics(chunk)
            topics_results = json.loads(topics_json)
            
            # Combine speech text with topics
            for speech in topics_results["speeches"]:
                speech_id = speech["speech_id"]
                all_results[speech_id] = {
                    "speech": speeches[speech_id],
                    "topics": speech["topics"]
                }
            
            # print for every 10 chunks we process
            if (chunk_count) % 10 == 0:
                print(f"Successfully processed chunk {chunk_count}")

            # Stop if we've reached the maximum number of chunks (if we set a limit)
            if max_chunks and chunk_count >= max_chunks:
                break
        except Exception as e:
            print(f"Error processing chunk {i}: {str(e)}")
            continue

    # Save results
    output_file = os.path.join(output_dir, os.path.basename(file_path).replace('.txt', '_gpt_topic_labels.json'))
    os.makedirs(output_dir, exist_ok=True)
    
    with open(output_file, 'w') as f:
        json.dump(all_results, f, indent=2)
    
    print(f"\nResults saved to {output_file}")

In [None]:
def analyze_topic_frequencies(input_dir: str, suffix:str='_gpt_topic_labels.json') -> Dict[str, int]:
    """
    Analyze JSON files in the input directory and count topic frequencies.
    
    Args:
        input_dir: Directory containing the speech analysis JSON files
        suffix: Suffix of the JSON files to analyze
        
    Returns:
        Dictionary mapping topics to their frequencies
    """
    # Initialize counter for topics
    topic_counter = Counter()
    
    # Get all JSON files in directory
    json_files = [f for f in os.listdir(input_dir) if f.endswith(suffix)]
    
    # Process each file
    for filename in tqdm(json_files, desc="Processing files"):
        file_path = os.path.join(input_dir, filename)
        
        with open(file_path, 'r') as f:
            data = json.load(f)
            
            # Go through each speech
            for speech_id, speech_data in data.items():
                # Add each topic to our counter
                topic_counter.update(speech_data['topics'])
    
    # Convert Counter to dictionary and sort by frequency
    topic_frequencies = dict(sorted(topic_counter.items(), key=lambda x: x[1], reverse=True))
    
    return topic_frequencies

In [55]:
def print_topic_statistics(topic_frequencies: Dict[str, int]):
    """
    Print formatted statistics about topic frequencies.

    Args:
        topic_frequencies: Dictionary mapping topics to their frequencie
    
    Returns:
        None
    """
    print("\nTopic Frequencies:")
    print("-" * 40)
    
    # Find the longest topic name for formatting
    max_topic_length = max(len(topic) for topic in topic_frequencies.keys())
    
    # Print each topic and its count
    for topic, count in topic_frequencies.items():
        print(f"{topic:<{max_topic_length}} : {count:>6}")
    
    print("-" * 40)
    print(f"Total topics mentioned: {sum(topic_frequencies.values())}")

In [None]:
def process_speech_files(speech_files: List[str], input_dir: str, output_dir: str, max_chunks: int=None):
    """
    Process multiple speech files.

    Args:
        speech_files: List of speech file names
        input_dir: Directory containing the speech files
        output_dir: Directory to save the results
        max_chunks: Maximum number of chunks to process for each file

    Returns:
        None
    """
    for file_name in tqdm(speech_files, desc="Processing files"):
        file_path = os.path.join(input_dir, file_name)
        # get the specific file number, like for example 80 from 'speeches_080.txt'
        file_num = file_name.split('_')[1].split('.')[0]
        process_speech_file(file_path, output_dir, max_chunks)
        print(f"topic frequency graph for {file_num+'_gpt_topic_labels.json'}:")
        # print topic frequencies for each file as we process them
        topic_frequencies_interim = analyze_topic_frequencies(output_dir, suffix=file_num+'_gpt_topic_labels.json')
        print_topic_statistics(topic_frequencies_interim)

In [None]:
if __name__ == "__main__":
    # first, process the hein-bound files (they only go up to 111)
    input_dir = "../hein-bound"
    # save outputs to 'outputs' directory
    output_dir = "outputs"
    speech_files = [f for f in os.listdir(input_dir) 
                    if f.startswith("speeches_") and f.endswith(".txt") 
                    and f[9:-4].isdigit() and 79 <= int(f[9:-4]) <= 111]
    # sort the speech files
    speech_files.sort()
    process_speech_files(speech_files, input_dir, output_dir, max_chunks=100)

    # then, process the hein-daily files (they go from 112 to 114, the congresses not covered by hein-bound)
    input_dir = "../hein-daily"
    speech_files = [f for f in os.listdir(input_dir) 
                    if f.startswith("speeches_") and f.endswith(".txt") 
                    and f[9:-4].isdigit() and 112 <= int(f[9:-4]) <= 114]
    process_speech_files(speech_files, input_dir, output_dir, max_chunks=100)

    # Analyze topic frequencies for all files at the very end
    input_dir_freq = "outputs"
    topic_frequencies = analyze_topic_frequencies(input_dir_freq)
    print_topic_statistics(topic_frequencies)

Processing files:   0%|          | 0/33 [00:00<?, ?it/s]

Processing file: ../hein-bound/speeches_079.txt
Found 77145 speeches
Split into 2493 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   4%|▍         | 99/2493 [09:25<3:47:43,  5.71s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_079_gpt_topic_labels.json





topic frequency graph for 079_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 110.43it/s]
Processing files:   3%|▎         | 1/33 [09:26<5:02:07, 566.48s/it]


Topic Frequencies:
----------------------------------------
Economy and Jobs                  :    853
Governance and Democracy          :    691
Budget and Fiscal Responsibility  :    561
Justice and Civil Rights          :    502
Defense and Security              :    438
Health and Social Services        :    184
Infrastructure and Transportation :    141
Education and Innovation          :     99
Immigration and Border Policy     :     70
Environment and Energy            :     29
Labor Relations                   :     25
Trade Agreements                  :     17
Agriculture                       :      9
Legislative Process               :      3
International Relations           :      2
Agricultural Policy               :      1
Price Control                     :      1
----------------------------------------
Total topics mentioned: 3626
Processing file: ../hein-bound/speeches_080.txt
Found 64251 speeches
Split into 2151 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Error processing chunk 180: Unterminated string starting at: line 1 column 62545 (char 62544)




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   5%|▍         | 99/2151 [11:53<4:06:36,  7.21s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_080_gpt_topic_labels.json





topic frequency graph for 080_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 155.77it/s]
Processing files:   6%|▌         | 2/33 [21:21<5:37:49, 653.86s/it]


Topic Frequencies:
----------------------------------------
Economy and Jobs                  :    668
Governance and Democracy          :    646
Budget and Fiscal Responsibility  :    530
Justice and Civil Rights          :    509
Defense and Security              :    410
Health and Social Services        :    250
Infrastructure and Transportation :    238
Immigration and Border Policy     :    103
Housing and Urban Development     :    101
Education and Innovation          :     67
Environment and Energy            :     45
Housing and Home Finance Agency   :      6
Housing and Social Services       :      4
Housing                           :      3
Foreign Policy                    :      1
Agriculture                       :      1
----------------------------------------
Total topics mentioned: 3582
Processing file: ../hein-bound/speeches_081.txt
Found 101954 speeches
Split into 3258 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Error processing chunk 413: '810043881'


Processing chunks:   3%|▎         | 99/3258 [09:58<5:18:29,  6.05s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_081_gpt_topic_labels.json





topic frequency graph for 081_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 130.33it/s]
Processing files:   9%|▉         | 3/33 [31:22<5:14:46, 629.54s/it]


Topic Frequencies:
----------------------------------------
Governance and Democracy          :    752
Budget and Fiscal Responsibility  :    683
Economy and Jobs                  :    616
Justice and Civil Rights          :    567
Defense and Security              :    375
Infrastructure and Transportation :    196
Health and Social Services        :    146
Immigration and Border Policy     :     89
Education and Innovation          :     63
Agriculture                       :     56
Housing and Social Services       :     40
Environment and Energy            :     27
Housing                           :      2
Housing and Rent Control          :      1
----------------------------------------
Total topics mentioned: 3613
Processing file: ../hein-bound/speeches_082.txt
Found 77584 speeches
Split into 2538 chunks




Error processing chunk 934: '820096691'




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   4%|▍         | 99/2538 [09:09<3:45:28,  5.55s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_082_gpt_topic_labels.json





topic frequency graph for 082_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 148.88it/s]
Processing files:  12%|█▏        | 4/33 [40:32<4:49:11, 598.34s/it]


Topic Frequencies:
----------------------------------------
Governance and Democracy          :    701
Budget and Fiscal Responsibility  :    693
Economy and Jobs                  :    604
Defense and Security              :    592
Justice and Civil Rights          :    279
Health and Social Services        :    113
Education and Innovation          :    112
Environment and Energy            :     74
Infrastructure and Transportation :     63
Immigration and Border Policy     :     54
Agriculture                       :     25
Trade                             :      5
Agriculture and Forestry          :      2
Housing and Rent Act              :      2
----------------------------------------
Total topics mentioned: 3319
Processing file: ../hein-bound/speeches_083.txt
Found 87764 speeches
Split into 2971 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   3%|▎         | 99/2971 [09:15<4:28:32,  5.61s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_083_gpt_topic_labels.json





topic frequency graph for 083_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 153.42it/s]
Processing files:  15%|█▌        | 5/33 [49:49<4:32:16, 583.43s/it]


Topic Frequencies:
----------------------------------------
Governance and Democracy          :    786
Economy and Jobs                  :    676
Budget and Fiscal Responsibility  :    526
Justice and Civil Rights          :    526
Defense and Security              :    330
Health and Social Services        :    156
Infrastructure and Transportation :    155
Environment and Energy            :    123
Education and Innovation          :     97
Immigration and Border Policy     :     60
Agriculture                       :     37
Foreign Policy                    :      1
----------------------------------------
Total topics mentioned: 3473
Processing file: ../hein-bound/speeches_084.txt
Found 77004 speeches
Split into 2740 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   4%|▎         | 99/2740 [08:51<3:56:18,  5.37s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_084_gpt_topic_labels.json





topic frequency graph for 084_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 133.90it/s]
Processing files:  18%|█▊        | 6/33 [58:42<4:14:51, 566.35s/it]


Topic Frequencies:
----------------------------------------
Economy and Jobs                  :    669
Governance and Democracy          :    557
Justice and Civil Rights          :    525
Budget and Fiscal Responsibility  :    342
Defense and Security              :    268
Infrastructure and Transportation :    251
Environment and Energy            :    197
Health and Social Services        :    177
Education and Innovation          :     72
Immigration and Border Policy     :     42
Agriculture                       :     15
Agriculture and Rural Development :      3
Housing                           :      1
Energy                            :      1
----------------------------------------
Total topics mentioned: 3120
Processing file: ../hein-bound/speeches_085.txt
Found 100594 speeches
Split into 3735 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90




Error processing chunk 3252: '850249988'


Processing chunks:   3%|▎         | 99/3735 [09:01<5:31:23,  5.47s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_085_gpt_topic_labels.json





topic frequency graph for 085_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 112.12it/s]
Processing files:  21%|██        | 7/33 [1:07:46<4:02:11, 558.89s/it]


Topic Frequencies:
----------------------------------------
Economy and Jobs                  :    634
Justice and Civil Rights          :    594
Budget and Fiscal Responsibility  :    496
Defense and Security              :    413
Governance and Democracy          :    401
Health and Social Services        :    156
Infrastructure and Transportation :    141
Education and Innovation          :    107
Environment and Energy            :     63
Immigration and Border Policy     :     31
Agriculture                       :     28
Housing and Social Services       :     13
Trade                             :      2
Labor                             :      1
Housing and Urban Development     :      1
Culture and Society               :      1
Foreign Policy                    :      1
----------------------------------------
Total topics mentioned: 3083
Processing file: ../hein-bound/speeches_086.txt
Found 102324 speeches
Split into 3872 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Error processing chunk 2352: '860175167'




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   3%|▎         | 99/3872 [08:34<5:26:47,  5.20s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_086_gpt_topic_labels.json





topic frequency graph for 086_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 157.85it/s]
Processing files:  24%|██▍       | 8/33 [1:16:23<3:47:16, 545.47s/it]


Topic Frequencies:
----------------------------------------
Budget and Fiscal Responsibility  :    521
Governance and Democracy          :    521
Justice and Civil Rights          :    430
Economy and Jobs                  :    403
Defense and Security              :    349
Health and Social Services        :    178
Infrastructure and Transportation :    130
Education and Innovation          :    119
Environment and Energy            :     35
Immigration and Border Policy     :     24
Agriculture                       :      4
Labor and Public Welfare          :      1
Water and Natural Resources       :      1
----------------------------------------
Total topics mentioned: 2716
Processing file: ../hein-bound/speeches_087.txt
Found 106156 speeches
Split into 4138 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Error processing chunk 1873: '870134626'




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   2%|▏         | 99/4138 [08:32<5:48:43,  5.18s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_087_gpt_topic_labels.json





topic frequency graph for 087_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 165.78it/s]
Processing files:  27%|██▋       | 9/33 [1:24:58<3:34:23, 535.98s/it]


Topic Frequencies:
----------------------------------------
Economy and Jobs                  :    495
Governance and Democracy          :    482
Budget and Fiscal Responsibility  :    463
Defense and Security              :    406
Justice and Civil Rights          :    390
Education and Innovation          :    223
Health and Social Services        :    156
Infrastructure and Transportation :    120
Immigration and Border Policy     :     61
Environment and Energy            :     47
Agriculture                       :     13
Agriculture and Rural Affairs     :     10
Media and Communications          :      9
Labor and Employment              :      2
Energy and Environment            :      1
Housing and Urban Development     :      1
Foreign Policy                    :      1
International Relations           :      1
Youth Engagement                  :      1
Housing                           :      1
Culture and Society               :      1
------------------------------------



Successfully processed chunk 10




Error processing chunk 2068: '880141648'




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   2%|▏         | 99/4133 [08:05<5:29:50,  4.91s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_088_gpt_topic_labels.json





topic frequency graph for 088_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 156.08it/s]
Processing files:  30%|███       | 10/33 [1:33:06<3:19:45, 521.12s/it]


Topic Frequencies:
----------------------------------------
Justice and Civil Rights          :    680
Governance and Democracy          :    435
Economy and Jobs                  :    362
Budget and Fiscal Responsibility  :    325
Defense and Security              :    312
Education and Innovation          :    228
Health and Social Services        :    141
Infrastructure and Transportation :     78
Environment and Energy            :     37
Immigration and Border Policy     :     26
Agriculture                       :      7
Trade                             :      6
International Relations           :      6
Culture and Arts                  :      4
Housing and Urban Development     :      3
Water and Infrastructure          :      1
Foreign Aid                       :      1
Housing                           :      1
Agriculture and Food Security     :      1
Energy and Environment            :      1
----------------------------------------
Total topics mentioned: 2655
Processin



Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Error processing chunk 265: '890016197'




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   2%|▏         | 99/4507 [08:15<6:07:26,  5.00s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_089_gpt_topic_labels.json





topic frequency graph for 089_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 105.78it/s]
Processing files:  33%|███▎      | 11/33 [1:41:23<3:08:26, 513.95s/it]


Topic Frequencies:
----------------------------------------
Budget and Fiscal Responsibility  :    517
Governance and Democracy          :    413
Justice and Civil Rights          :    406
Economy and Jobs                  :    397
Defense and Security              :    310
Health and Social Services        :    268
Education and Innovation          :    228
Infrastructure and Transportation :    146
Immigration and Border Policy     :     57
Environment and Energy            :     44
Housing and Urban Development     :      3
Culture and Arts                  :      1
Technology and Innovation         :      1
Community Development             :      1
Science and Technology            :      1
Energy and Environment            :      1
Foreign Policy                    :      1
Foreign Aid                       :      1
Housing and Social Services       :      1
----------------------------------------
Total topics mentioned: 2797
Processing file: ../hein-bound/speeches_090.txt
Foun



Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   2%|▏         | 99/4596 [07:45<5:52:26,  4.70s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_090_gpt_topic_labels.json





topic frequency graph for 090_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 169.08it/s]
Processing files:  36%|███▋      | 12/33 [1:49:11<2:54:58, 499.94s/it]


Topic Frequencies:
----------------------------------------
Justice and Civil Rights          :    435
Governance and Democracy          :    420
Defense and Security              :    381
Budget and Fiscal Responsibility  :    362
Economy and Jobs                  :    336
Health and Social Services        :    265
Education and Innovation          :    160
Infrastructure and Transportation :    151
Environment and Energy            :    135
Immigration and Border Policy     :     48
Housing and Urban Development     :     30
Foreign Policy                    :     10
Culture and Community             :      2
Housing and Social Services       :      2
Agriculture                       :      2
Trade and Tariffs                 :      1
International Relations           :      1
Energy and Environment            :      1
Tourism                           :      1
----------------------------------------
Total topics mentioned: 2743
Processing file: ../hein-bound/speeches_091.txt
Foun



Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Error processing chunk 2735: '910167696'




Successfully processed chunk 90


Processing chunks:   2%|▏         | 99/5231 [07:57<6:52:46,  4.83s/it]


Successfully processed chunk 100

Results saved to outputs/speeches_091_gpt_topic_labels.json
topic frequency graph for 091_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 176.59it/s]
Processing files:  39%|███▉      | 13/33 [1:57:11<2:44:38, 493.93s/it]


Topic Frequencies:
----------------------------------------
Defense and Security                 :    532
Justice and Civil Rights             :    460
Budget and Fiscal Responsibility     :    448
Governance and Democracy             :    395
Economy and Jobs                     :    330
Education and Innovation             :    237
Health and Social Services           :    213
Infrastructure and Transportation    :    148
Environment and Energy               :     89
Immigration and Border Policy        :     13
International Relations              :      3
Energy and Environment               :      1
Labor and Employment                 :      1
Foreign Policy                       :      1
Housing and Urban Development        :      1
Population Growth and Urban Planning :      1
Agriculture                          :      1
Trade and Tariffs                    :      1
Free Speech                          :      1
Housing                              :      1
-------------------



Error processing chunk 2080: '920135900'




Successfully processed chunk 10




Successfully processed chunk 20




Error processing chunk 2782: '920185690'




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Error processing chunk 1449: '920092379'




Successfully processed chunk 90


Processing chunks:   2%|▏         | 99/4683 [07:27<5:45:26,  4.52s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_092_gpt_topic_labels.json





topic frequency graph for 092_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 185.12it/s]
Processing files:  42%|████▏     | 14/33 [2:04:41<2:32:12, 480.66s/it]


Topic Frequencies:
----------------------------------------
Budget and Fiscal Responsibility  :    443
Defense and Security              :    409
Justice and Civil Rights          :    398
Governance and Democracy          :    371
Health and Social Services        :    286
Economy and Jobs                  :    241
Infrastructure and Transportation :    160
Environment and Energy            :    157
Education and Innovation          :    154
Immigration and Border Policy     :     34
Housing and Urban Development     :     23
Agriculture                       :      8
Housing                           :      1
Culture and Community             :      1
----------------------------------------
Total topics mentioned: 2686
Processing file: ../hein-bound/speeches_093.txt
Found 117171 speeches
Split into 5055 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   2%|▏         | 99/5055 [07:50<6:32:08,  4.75s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_093_gpt_topic_labels.json





topic frequency graph for 093_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 126.23it/s]
Processing files:  45%|████▌     | 15/33 [2:12:34<2:23:27, 478.19s/it]


Topic Frequencies:
----------------------------------------
Governance and Democracy          :    555
Justice and Civil Rights          :    437
Budget and Fiscal Responsibility  :    431
Economy and Jobs                  :    354
Health and Social Services        :    265
Defense and Security              :    231
Education and Innovation          :    160
Infrastructure and Transportation :    145
Environment and Energy            :    143
Immigration and Border Policy     :     22
Agriculture                       :      6
Technology and Innovation         :      1
Housing                           :      1
Housing and Urban Development     :      1
Agriculture and Food Security     :      1
----------------------------------------
Total topics mentioned: 2753
Processing file: ../hein-bound/speeches_094.txt
Found 117164 speeches
Split into 4917 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Error processing chunk 1035: '940073703'




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Error processing chunk 4017: '940309100'




Successfully processed chunk 90


Processing chunks:   2%|▏         | 99/4917 [07:39<6:12:38,  4.64s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_094_gpt_topic_labels.json





topic frequency graph for 094_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 183.18it/s]
Processing files:  48%|████▊     | 16/33 [2:20:15<2:14:05, 473.25s/it]


Topic Frequencies:
----------------------------------------
Governance and Democracy          :    617
Justice and Civil Rights          :    435
Economy and Jobs                  :    427
Budget and Fiscal Responsibility  :    363
Defense and Security              :    266
Health and Social Services        :    251
Environment and Energy            :    195
Education and Innovation          :    110
Infrastructure and Transportation :     85
Immigration and Border Policy     :     23
Housing and Community Development :     14
Culture and Community             :      4
Energy and Environment            :      1
Foreign Policy                    :      1
Housing and Urban Development     :      1
----------------------------------------
Total topics mentioned: 2793
Processing file: ../hein-bound/speeches_095.txt
Found 120409 speeches
Split into 4996 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   2%|▏         | 99/4996 [07:41<6:20:44,  4.67s/it]


Successfully processed chunk 100

Results saved to outputs/speeches_095_gpt_topic_labels.json
topic frequency graph for 095_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 139.82it/s]
Processing files:  52%|█████▏    | 17/33 [2:27:59<2:05:27, 470.48s/it]


Topic Frequencies:
----------------------------------------
Justice and Civil Rights          :    558
Governance and Democracy          :    533
Economy and Jobs                  :    470
Budget and Fiscal Responsibility  :    393
Defense and Security              :    252
Environment and Energy            :    198
Health and Social Services        :    191
Education and Innovation          :     86
Infrastructure and Transportation :     64
Immigration and Border Policy     :     36
Energy and Environment            :     22
Housing and Urban Affairs         :      1
Agriculture                       :      1
----------------------------------------
Total topics mentioned: 2805
Processing file: ../hein-bound/speeches_096.txt
Found 107326 speeches
Split into 4658 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   2%|▏         | 99/4658 [07:32<5:47:18,  4.57s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_096_gpt_topic_labels.json





topic frequency graph for 096_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 178.06it/s]
Processing files:  55%|█████▍    | 18/33 [2:35:34<1:56:26, 465.79s/it]


Topic Frequencies:
----------------------------------------
Budget and Fiscal Responsibility  :    523
Economy and Jobs                  :    393
Defense and Security              :    389
Environment and Energy            :    363
Governance and Democracy          :    332
Justice and Civil Rights          :    307
Health and Social Services        :    179
Education and Innovation          :    107
Infrastructure and Transportation :     85
Immigration and Border Policy     :     45
Agriculture                       :      2
Energy and Environment            :      1
----------------------------------------
Total topics mentioned: 2726
Processing file: ../hein-bound/speeches_097.txt
Found 89302 speeches
Split into 4087 chunks




Successfully processed chunk 10




Error processing chunk 1292: '970083919'




Error processing chunk 1263: '970082008'




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   2%|▏         | 99/4087 [07:27<5:00:11,  4.52s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_097_gpt_topic_labels.json





topic frequency graph for 097_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 171.93it/s]
Processing files:  58%|█████▊    | 19/33 [2:43:04<1:47:31, 460.82s/it]


Topic Frequencies:
----------------------------------------
Budget and Fiscal Responsibility  :    606
Economy and Jobs                  :    370
Defense and Security              :    361
Justice and Civil Rights          :    343
Governance and Democracy          :    330
Health and Social Services        :    235
Environment and Energy            :    143
Education and Innovation          :    116
Infrastructure and Transportation :     78
Immigration and Border Policy     :     50
Community Engagement              :      3
Housing Policy                    :      1
Energy                            :      1
Agriculture and Food              :      1
Agriculture                       :      1
----------------------------------------
Total topics mentioned: 2639
Processing file: ../hein-bound/speeches_098.txt
Found 91014 speeches
Split into 4245 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Error processing chunk 2120: '980142048'




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Error processing chunk 2996: '980194090'




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   2%|▏         | 99/4245 [07:03<4:55:50,  4.28s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_098_gpt_topic_labels.json





topic frequency graph for 098_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 148.17it/s]
Processing files:  61%|██████    | 20/33 [2:50:10<1:37:34, 450.35s/it]


Topic Frequencies:
----------------------------------------
Governance and Democracy          :    407
Budget and Fiscal Responsibility  :    400
Defense and Security              :    390
Justice and Civil Rights          :    346
Economy and Jobs                  :    291
Health and Social Services        :    175
Education and Innovation          :    129
Environment and Energy            :    103
Infrastructure and Transportation :     93
Immigration and Border Policy     :     56
Agriculture                       :     15
Housing and Urban Development     :      6
Agriculture and Food Policy       :      5
Community Service                 :      3
Culture and Arts                  :      2
Water Rights                      :      2
Community Development             :      1
Innovation and Technology         :      1
Housing                           :      1
Culture and Society               :      1
----------------------------------------
Total topics mentioned: 2427
Processin



Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   2%|▏         | 99/4306 [07:23<5:14:08,  4.48s/it]


Successfully processed chunk 100

Results saved to outputs/speeches_099_gpt_topic_labels.json
topic frequency graph for 099_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 183.86it/s]
Processing files:  64%|██████▎   | 21/33 [2:57:35<1:29:46, 448.92s/it]


Topic Frequencies:
----------------------------------------
Budget and Fiscal Responsibility  :    666
Justice and Civil Rights          :    369
Defense and Security              :    340
Economy and Jobs                  :    320
Governance and Democracy          :    275
Health and Social Services        :    220
Environment and Energy            :    139
Infrastructure and Transportation :    107
Education and Innovation          :     88
Immigration and Border Policy     :     31
Housing and Urban Development     :     16
Agriculture                       :      2
Trade and Foreign Policy          :      1
Housing and Community Development :      1
Trade Policy                      :      1
Labor and Employment              :      1
----------------------------------------
Total topics mentioned: 2577
Processing file: ../hein-bound/speeches_100.txt
Found 85604 speeches
Split into 4154 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Error processing chunk 2364: '1000154296'




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   2%|▏         | 99/4154 [07:30<5:07:26,  4.55s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_100_gpt_topic_labels.json





topic frequency graph for 100_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 186.49it/s]
Processing files:  67%|██████▋   | 22/33 [3:05:08<1:22:30, 450.01s/it]


Topic Frequencies:
----------------------------------------
Defense and Security              :    394
Governance and Democracy          :    363
Justice and Civil Rights          :    347
Budget and Fiscal Responsibility  :    305
Health and Social Services        :    246
Economy and Jobs                  :    242
Environment and Energy            :    137
Education and Innovation          :    127
Infrastructure and Transportation :     69
Immigration and Border Policy     :     26
Housing and Community Development :      1
Energy and Environment            :      1
----------------------------------------
Total topics mentioned: 2258
Processing file: ../hein-bound/speeches_101.txt
Found 78132 speeches
Split into 3920 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   3%|▎         | 99/3920 [06:56<4:27:48,  4.21s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_101_gpt_topic_labels.json





topic frequency graph for 101_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 68.96it/s]
Processing files:  70%|██████▉   | 23/33 [3:12:06<1:13:25, 440.55s/it]


Topic Frequencies:
----------------------------------------
Governance and Democracy          :    362
Justice and Civil Rights          :    346
Budget and Fiscal Responsibility  :    304
Health and Social Services        :    291
Defense and Security              :    265
Economy and Jobs                  :    213
Education and Innovation          :    164
Environment and Energy            :    116
Infrastructure and Transportation :     68
Immigration and Border Policy     :     35
Housing and Community Development :      7
Culture and Arts                  :      3
Community Engagement              :      3
Community and Social Services     :      2
Labor and Jobs                    :      1
Housing                           :      1
Culture and Community             :      1
Foreign Policy                    :      1
----------------------------------------
Total topics mentioned: 2183
Processing file: ../hein-bound/speeches_102.txt
Found 76048 speeches
Split into 3903 chunks




Error processing chunk 685: '1020033889'




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Error processing chunk 1489: '1020088069'




Successfully processed chunk 70




Error processing chunk 3604: '1020220697'




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   3%|▎         | 99/3903 [07:29<4:47:55,  4.54s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_102_gpt_topic_labels.json





topic frequency graph for 102_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 148.48it/s]
Processing files:  73%|███████▎  | 24/33 [3:19:38<1:06:35, 443.90s/it]


Topic Frequencies:
----------------------------------------
Justice and Civil Rights          :    387
Governance and Democracy          :    376
Economy and Jobs                  :    339
Defense and Security              :    299
Budget and Fiscal Responsibility  :    297
Health and Social Services        :    259
Environment and Energy            :    146
Education and Innovation          :    145
Infrastructure and Transportation :     73
Immigration and Border Policy     :     23
Culture and Heritage              :      2
Community Service                 :      2
Community Development             :      2
Trade and Tariffs                 :      2
Agriculture                       :      2
Agriculture and Rural Development :      1
Energy and Environment            :      1
Disaster Relief                   :      1
Housing and Urban Development     :      1
Trade                             :      1
----------------------------------------
Total topics mentioned: 2359
Processin



Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Error processing chunk 2884: '1030184705'




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   3%|▎         | 99/3645 [06:38<3:57:36,  4.02s/it]


Successfully processed chunk 100

Results saved to outputs/speeches_103_gpt_topic_labels.json
topic frequency graph for 103_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 158.13it/s]
Processing files:  76%|███████▌  | 25/33 [3:26:18<57:25, 430.66s/it]  


Topic Frequencies:
----------------------------------------
Budget and Fiscal Responsibility  :    464
Economy and Jobs                  :    378
Justice and Civil Rights          :    295
Governance and Democracy          :    284
Defense and Security              :    251
Health and Social Services        :    209
Education and Innovation          :    180
Environment and Energy            :     87
Infrastructure and Transportation :     81
Immigration and Border Policy     :     23
Housing and Urban Development     :      2
Trade and Foreign Policy          :      1
----------------------------------------
Total topics mentioned: 2255
Processing file: ../hein-bound/speeches_104.txt
Found 85117 speeches
Split into 4091 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Error processing chunk 156: '1040010745'




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   2%|▏         | 99/4091 [07:21<4:56:23,  4.45s/it]


Successfully processed chunk 100

Results saved to outputs/speeches_104_gpt_topic_labels.json
topic frequency graph for 104_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 199.92it/s]
Processing files:  79%|███████▉  | 26/33 [3:33:41<50:40, 434.39s/it]


Topic Frequencies:
----------------------------------------
Budget and Fiscal Responsibility  :    476
Justice and Civil Rights          :    381
Economy and Jobs                  :    334
Governance and Democracy          :    325
Health and Social Services        :    305
Defense and Security              :    253
Education and Innovation          :    159
Environment and Energy            :     98
Infrastructure and Transportation :     64
Immigration and Border Policy     :     41
Housing and Urban Development     :      1
Community Engagement              :      1
----------------------------------------
Total topics mentioned: 2438
Processing file: ../hein-bound/speeches_105.txt
Found 64301 speeches
Split into 3233 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Error processing chunk 2077: Unterminated string starting at: line 3188 column 20 (char 65017)




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   3%|▎         | 99/3233 [09:20<4:55:43,  5.66s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_105_gpt_topic_labels.json





topic frequency graph for 105_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 199.28it/s]
Processing files:  82%|████████▏ | 27/33 [3:43:03<47:16, 472.78s/it]


Topic Frequencies:
----------------------------------------
Justice and Civil Rights          :    435
Health and Social Services        :    389
Budget and Fiscal Responsibility  :    349
Governance and Democracy          :    297
Economy and Jobs                  :    277
Education and Innovation          :    225
Defense and Security              :    177
Environment and Energy            :    122
Infrastructure and Transportation :     30
Immigration and Border Policy     :     10
Culture and Arts                  :      4
Trade Policy                      :      3
Foreign Policy                    :      3
Social Security                   :      3
Housing and Community Development :      2
Housing                           :      1
History and Heritage              :      1
Agriculture                       :      1
Community and Social Services     :      1
----------------------------------------
Total topics mentioned: 2330
Processing file: ../hein-bound/speeches_106.txt
Foun



Error processing chunk 2631: '1060168732'




Error processing chunk 2327: '1060147357'




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   3%|▎         | 99/3275 [07:27<3:59:23,  4.52s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_106_gpt_topic_labels.json





topic frequency graph for 106_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 168.04it/s]
Processing files:  85%|████████▍ | 28/33 [3:50:33<38:49, 465.88s/it]


Topic Frequencies:
----------------------------------------
Budget and Fiscal Responsibility  :    470
Health and Social Services        :    418
Justice and Civil Rights          :    408
Economy and Jobs                  :    341
Governance and Democracy          :    229
Defense and Security              :    220
Education and Innovation          :    189
Environment and Energy            :     74
Immigration and Border Policy     :     34
Infrastructure and Transportation :     27
Community and Social Services     :      2
Energy and Environment            :      1
Census                            :      1
Agriculture                       :      1
International Relations           :      1
----------------------------------------
Total topics mentioned: 2416
Processing file: ../hein-bound/speeches_107.txt
Found 54327 speeches
Split into 2852 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90




Error processing chunk 503: '1070033110'


Processing chunks:   3%|▎         | 99/2852 [07:18<3:23:18,  4.43s/it]


Successfully processed chunk 100

Results saved to outputs/speeches_107_gpt_topic_labels.json
topic frequency graph for 107_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 161.97it/s]
Processing files:  88%|████████▊ | 29/33 [3:57:53<30:32, 458.19s/it]


Topic Frequencies:
----------------------------------------
Defense and Security              :    384
Justice and Civil Rights          :    312
Governance and Democracy          :    308
Health and Social Services        :    307
Budget and Fiscal Responsibility  :    279
Economy and Jobs                  :    272
Education and Innovation          :    246
Environment and Energy            :    161
Immigration and Border Policy     :     63
Infrastructure and Transportation :     62
Housing and Social Services       :      2
Housing and Urban Development     :      2
Agriculture                       :      2
Community Development             :      1
Energy and Environment            :      1
Culture and Community             :      1
----------------------------------------
Total topics mentioned: 2403
Processing file: ../hein-bound/speeches_108.txt
Found 60530 speeches
Split into 3132 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   3%|▎         | 99/3132 [06:52<3:30:27,  4.16s/it]


Successfully processed chunk 100

Results saved to outputs/speeches_108_gpt_topic_labels.json
topic frequency graph for 108_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 161.68it/s]
Processing files:  91%|█████████ | 30/33 [4:04:47<22:14, 444.89s/it]


Topic Frequencies:
----------------------------------------
Justice and Civil Rights          :    385
Health and Social Services        :    369
Defense and Security              :    327
Economy and Jobs                  :    327
Budget and Fiscal Responsibility  :    323
Governance and Democracy          :    210
Education and Innovation          :    176
Infrastructure and Transportation :     69
Environment and Energy            :     57
Immigration and Border Policy     :     30
Culture and Arts                  :      2
Housing and Urban Development     :      1
Culture and Community             :      1
Science and Technology            :      1
----------------------------------------
Total topics mentioned: 2278
Processing file: ../hein-bound/speeches_109.txt
Found 58229 speeches
Split into 3135 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90




Error processing chunk 2874: '1090172920'


Processing chunks:   3%|▎         | 99/3135 [06:32<3:20:23,  3.96s/it]


Successfully processed chunk 100

Results saved to outputs/speeches_109_gpt_topic_labels.json
topic frequency graph for 109_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 168.38it/s]
Processing files:  94%|█████████▍| 31/33 [4:11:21<14:19, 429.54s/it]


Topic Frequencies:
----------------------------------------
Defense and Security              :    380
Justice and Civil Rights          :    359
Economy and Jobs                  :    259
Budget and Fiscal Responsibility  :    249
Health and Social Services        :    226
Governance and Democracy          :    204
Education and Innovation          :    197
Environment and Energy            :    138
Infrastructure and Transportation :     76
Immigration and Border Policy     :     68
Transportation and Infrastructure :      3
Trade Policy                      :      1
Energy and Environment            :      1
----------------------------------------
Total topics mentioned: 2161
Processing file: ../hein-bound/speeches_110.txt
Found 65381 speeches
Split into 3482 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   3%|▎         | 99/3482 [07:33<4:18:24,  4.58s/it]

Successfully processed chunk 100

Results saved to outputs/speeches_110_gpt_topic_labels.json





topic frequency graph for 110_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 132.71it/s]
Processing files:  97%|█████████▋| 32/33 [4:18:57<07:17, 437.46s/it]


Topic Frequencies:
----------------------------------------
Defense and Security              :    357
Health and Social Services        :    350
Justice and Civil Rights          :    318
Governance and Democracy          :    299
Economy and Jobs                  :    278
Budget and Fiscal Responsibility  :    251
Education and Innovation          :    200
Environment and Energy            :    153
Infrastructure and Transportation :    109
Immigration and Border Policy     :     32
Trade Policy                      :      4
Culture and Heritage              :      4
Culture and Community             :      3
Trade                             :      3
Housing and Urban Development     :      3
Culture and Society               :      1
Culture and Arts                  :      1
Energy and Environment            :      1
Foreign Policy                    :      1
History and Culture               :      1
Community Engagement              :      1
------------------------------------



Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Error processing chunk 2349: '1110134185'




Successfully processed chunk 60




Successfully processed chunk 70




Error processing chunk 2127: '1110122169'




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   3%|▎         | 99/3170 [06:56<3:35:31,  4.21s/it]


Successfully processed chunk 100

Results saved to outputs/speeches_111_gpt_topic_labels.json
topic frequency graph for 111_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 145.28it/s]
Processing files: 100%|██████████| 33/33 [4:25:55<00:00, 483.50s/it]



Topic Frequencies:
----------------------------------------
Health and Social Services        :    413
Economy and Jobs                  :    388
Budget and Fiscal Responsibility  :    354
Justice and Civil Rights          :    344
Defense and Security              :    265
Education and Innovation          :    196
Governance and Democracy          :    182
Environment and Energy            :    122
Infrastructure and Transportation :     44
Immigration and Border Policy     :     24
Culture and Community             :      2
Culture and Arts                  :      2
Trade and International Relations :      2
Housing and Social Services       :      1
Energy and Environment            :      1
Community and Civic Engagement    :      1
Housing and Urban Development     :      1
Gun Control                       :      1
----------------------------------------
Total topics mentioned: 2343


Processing files:   0%|          | 0/3 [00:00<?, ?it/s]

Processing file: ../hein-daily/speeches_114.txt
Found 38938 speeches
Split into 2103 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Error processing chunk 1116: '1140062267'




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Error processing chunk 639: '1140037130'




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   5%|▍         | 99/2103 [06:49<2:18:09,  4.14s/it]


Successfully processed chunk 100

Results saved to outputs/speeches_114_gpt_topic_labels.json
topic frequency graph for 114_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 165.73it/s]
Processing files:  33%|███▎      | 1/3 [06:50<13:40, 410.47s/it]


Topic Frequencies:
----------------------------------------
Justice and Civil Rights          :    473
Defense and Security              :    380
Health and Social Services        :    361
Economy and Jobs                  :    297
Education and Innovation          :    225
Budget and Fiscal Responsibility  :    173
Governance and Democracy          :    137
Environment and Energy            :    100
Immigration and Border Policy     :     82
Infrastructure and Transportation :     66
Trade and Tariffs                 :      2
Community and Social Services     :      1
Agriculture                       :      1
----------------------------------------
Total topics mentioned: 2298
Processing file: ../hein-daily/speeches_113.txt
Found 42644 speeches
Split into 2330 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Error processing chunk 1276: '1130068633'




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   4%|▍         | 99/2330 [06:51<2:34:30,  4.16s/it]


Successfully processed chunk 100

Results saved to outputs/speeches_113_gpt_topic_labels.json
topic frequency graph for 113_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 207.12it/s]
Processing files:  67%|██████▋   | 2/3 [13:43<06:51, 411.69s/it]


Topic Frequencies:
----------------------------------------
Health and Social Services        :    419
Justice and Civil Rights          :    375
Economy and Jobs                  :    347
Defense and Security              :    270
Budget and Fiscal Responsibility  :    257
Governance and Democracy          :    255
Education and Innovation          :    184
Environment and Energy            :    101
Immigration and Border Policy     :     85
Infrastructure and Transportation :     60
Agriculture                       :      6
Culture and Community             :      3
Agriculture and Rural Affairs     :      1
Energy and Natural Resources      :      1
----------------------------------------
Total topics mentioned: 2364
Processing file: ../hein-daily/speeches_112.txt
Found 47745 speeches
Split into 2612 chunks




Successfully processed chunk 10




Successfully processed chunk 20




Successfully processed chunk 30




Successfully processed chunk 40




Successfully processed chunk 50




Successfully processed chunk 60




Successfully processed chunk 70




Successfully processed chunk 80




Successfully processed chunk 90


Processing chunks:   4%|▍         | 99/2612 [07:03<2:58:58,  4.27s/it]


Successfully processed chunk 100

Results saved to outputs/speeches_112_gpt_topic_labels.json
topic frequency graph for 112_gpt_topic_labels.json


Processing files: 100%|██████████| 1/1 [00:00<00:00, 177.59it/s]
Processing files: 100%|██████████| 3/3 [20:47<00:00, 415.79s/it]



Topic Frequencies:
----------------------------------------
Economy and Jobs                  :    476
Budget and Fiscal Responsibility  :    414
Health and Social Services        :    288
Justice and Civil Rights          :    285
Defense and Security              :    260
Governance and Democracy          :    234
Education and Innovation          :    210
Environment and Energy            :    110
Infrastructure and Transportation :    105
Immigration and Border Policy     :     54
Agriculture                       :      3
Trade and Foreign Relations       :      3
Community Service                 :      3
Water Issues                      :      1
Tourism                           :      1
Culture and Society               :      1
----------------------------------------
Total topics mentioned: 2448


Processing files: 100%|██████████| 38/38 [00:00<00:00, 150.34it/s]


Topic Frequencies:
----------------------------------------
Budget and Fiscal Responsibility     :  15578
Justice and Civil Rights             :  15305
Economy and Jobs                     :  15189
Governance and Democracy             :  15149
Defense and Security                 :  12459
Health and Social Services           :   9493
Education and Innovation             :   5703
Environment and Energy               :   4288
Infrastructure and Transportation    :   3924
Immigration and Border Policy        :   1656
Agriculture                          :    249
Housing and Urban Development        :    198
Housing and Social Services          :     63
Energy and Environment               :     36
Housing and Community Development    :     26
Labor Relations                      :     25
Foreign Policy                       :     22
Culture and Community                :     20
Culture and Arts                     :     20
Trade                                :     17
Trade Agreements   


