In [None]:
import pandas as pd
import os
import json

JSON_DIR = "compiled_json/"
congresses = [i for i in range(106, 120)] # year 2000-2025
congresses.reverse()
bill_types = ["hr", "s", "hjres", "sjres"]

In [None]:
def create_vote_excel(congress, bill_type, file_path):
    if os.path.exists(file_path):
        with open(file_path, "r") as b_file:
            records = []
            data = json.load(b_file) # bill
            for bill in data:
                if "actions" not in data[bill]:
                    continue
                actions = data[bill]["actions"]
                if "error" not in actions and "actions" in actions:
                    for i in range(len(actions["actions"])):
                        a = actions["actions"][i]
                        text = a.get('text', 'N/A')
                        if "recordedVotes" in a:
                            for j in range(len(a["recordedVotes"])):
                                if "voteTable" in a["recordedVotes"][j]:
                                    # Extract "Yea" and "Nay" votes
                                    date_time = a["recordedVotes"][j]["date"]
                                    vote_table = a["recordedVotes"][j]["voteTable"]
                                    for vote_type, representatives in vote_table.items():
                                        for rep_key, rep_data in representatives.items():
                                            name = rep_data['name']
                                            party = rep_data['party']
                                            state = rep_data['state']
                                            vote = 'Y' if vote_type == 'Yea' else 'N'
                                            # Add to the records list, including the 'text'
                                            records.append({
                                                'Bill': bill,
                                                'DateTime': date_time,
                                                'Name': name,
                                                'Party': party,
                                                'Vote': vote,
                                                'State': state,
                                                'Text': text
                                            })
            df = pd.DataFrame(records, columns=['Bill', 'DateTime', 'Name', 'Party', 'Vote', 'State', 'Text'])
            output_file = f"vote_tables/{congress}_{bill_type}.csv"
            df.to_csv(output_file, index=False)
            print(f"DataFrame saved to {output_file}")
            return df

In [None]:
import requests
import pandas as pd
import os

def extract_bill_text(df, congress, bill_type, file_path):
    if os.path.exists(file_path):
        with open(file_path, "r") as b_file:
            records = []
            data = json.load(b_file) # bill
            unique_bills = df['Bill'].unique()
            for bill in data:
                if bill not in unique_bills:
                    continue
                if "bill" not in data[bill] or "text" not in data[bill]:
                    continue
                
                try:
                    title = data[bill]["bill"]["bill"]["title"]
                except:
                    continue
                
                if "policyArea" in data[bill]["bill"]["bill"]:
                    policy_area = data[bill]["bill"]["bill"]["policyArea"]["name"]
                else:
                    policy_area = "N/A"
                
                if "textVersions" not in data[bill]["text"] or len(data[bill]["text"]["textVersions"]) == 0:
                    continue
                    
                text_versions = data[bill]["text"]["textVersions"]
                for v in text_versions:
                    if not isinstance(v, dict):
                        continue
                        
                    text_type = v["type"]
                    formats = v["formats"]
                    date_time = v["date"]
                    
                    if len(formats) == 0:
                        continue

                    for f in formats:
                        if f["type"] == "Formatted Text":
                            url = f["url"]
                    
                    response = requests.get(url)
                    raw_text = response.text

                    records.append({
                        'Bill': bill,
                        'DateTime': date_time,
                        'Title': title,
                        'Policy Area': policy_area,
                        'Type': text_type,
                        'Raw Text': raw_text
                    })

            df = pd.DataFrame(records, columns=['Bill', 'DateTime', 'Title', 'Policy Area', 'Type', 'Raw Text'])
            output_file = f"bill_texts/{congress}_{bill_type}.csv"
            df.to_csv(output_file, index=False)

            print(f"New DataFrame saved to {output_file}")

In [None]:
for c in congresses:
    for b_t in bill_types:
        file_name = f"{c}_{b_t}_compiled.json"
        file_path = JSON_DIR + file_name
        votes_df = create_vote_excel(c, b_t, file_path)   
        extract_bill_text(votes_df, c, b_t, file_path)

# cadens

In [None]:
import pandas as pd
import os

def process_voting_data(df file_path):
    file_name = os.path.basename(file_path)
    congress, type_ = file_name.split('_')[:2] 
    records = []

    # Load the JSON file
    with open(file_path, 'r') as f:
        data = pd.read_json(f)
    
    # Iterate over each bill
    for bill_name, bill_data in data.items():
        # Ensure actions is a dictionary
        actions = bill_data.get('actions', {})
        if not isinstance(actions, dict) or not actions:
            continue

        # Check if there's another "actions" key under "actions"
        nested_actions = actions.get('actions', [])
        if not isinstance(nested_actions, list) or not nested_actions:
            continue

        # Iterate over each item in the list of nested actions
        for action_data in nested_actions:
            if not isinstance(action_data, dict):
                continue

            # Check if "recordedVotes" exists
            recorded_votes = action_data.get('recordedVotes', [])
            if not isinstance(recorded_votes, list) or not recorded_votes:
                continue

            # Capture the "text" associated with the action
            text = action_data.get('text', 'N/A')  # Default to 'N/A' if no text found

            # Iterate over the list of recordedVotes
            for vote_data in recorded_votes:
                if not isinstance(vote_data, dict):
                    continue

                # Access the "voteTable" inside each vote_data
                vote_table = vote_data.get('voteTable', {})
                if not vote_table:
                    continue

                # Extract "Yea" and "Nay" votes
                for vote_type, representatives in vote_table.items():
                    for rep_key, rep_data in representatives.items():
                        name = rep_data.get('name')
                        party = rep_data.get('party')
                        state = rep_data.get('state')
                        vote = 'Y' if vote_type == 'Yea' else 'N'

                        # Add to the records list, including the 'text'
                        records.append({
                            'Bill': bill_name,
                            'Name': name,
                            'Party': party,
                            'Vote': vote,
                            'State': state,
                            'Text': text  # Add the captured text to the record
                        })

    # Create a DataFrame from the records
    df = pd.DataFrame(records, columns=['Bill', 'Name', 'Party', 'Vote', 'State', 'Text'])

    # Define output path
    output_file = os.path.join('Outputs hack', f"Voting Data {congress} {type_}.csv")
    
    # Save the DataFrame to CSV
    df.to_csv(output_file, index=False)

    print(f"DataFrame saved to {output_file}")

    # Return the DataFrame for further use
    return df

# Example usage:
#file_path = "Hack some bitches/118_hr_compiled.json"
#df = process_voting_data(file_path)

# You can now use df for further processing in another function
#df 


In [None]:
import requests
import pandas as pd
import os

def process_bill_text_data(df, data, file_path):
    # Extract congress and type from the file path
    file_name = os.path.basename(file_path)
    congress, type_ = file_name.split('_')[:2]  # Assuming file name format is like '118_hr_compiled.json'
    
    # Initialize a list to store the new DataFrame records
    new_records = []

    # Iterate through each unique bill in the original DataFrame
    unique_bills = df['Bill'].unique()

    for bill_key in unique_bills:
        bill_data = data.get(bill_key, {})
        if not bill_data:
            print(f"No data found for bill: {bill_key}")
            continue

        # Capture Title
        title = bill_data.get('bill', {}).get('bill', {}).get('title', 'N/A')
        #print(f"Bill: {bill_key}, Title: {title}")

        # Capture Policy Area
        policy_area = bill_data.get('bill', {}).get('bill', {}).get('policyArea', {}).get('name', 'N/A')
        #print(f"Policy Area: {policy_area}")

        # Navigate to text -> textVersions
        text_versions = bill_data.get('text', {}).get('textVersions', [])
        if not isinstance(text_versions, list) or not text_versions:
            print(f"No text versions found for bill: {bill_key}")
            continue

        # Iterate through each version in textVersions
        for version in text_versions:
            if not isinstance(version, dict):
                continue

            # Capture Type
            type_ = version.get('type', 'N/A')
            #print(f"Type: {type_}")

            # Access the URL
            formats = version.get('formats', [])
            if not formats or not isinstance(formats, list):
                print(f"No formats found for type: {type_}")
                continue

            # Capture the first URL
            for f in formats:
                if f["type"] == "Formatted Text":
                    url = f["url"]
            #print(f"URL: {url}")

            # Perform a GET request to fetch raw text
            raw_text = 'N/A'
            if url != 'N/A':
                try:
                    response = requests.get(url)
                    if response.status_code == 200:
                        raw_text = response.text
                    else:
                        raw_text = f"Failed to fetch. Status Code: {response.status_code}"
                except Exception as e:
                    raw_text = f"Error: {str(e)}"
            else:
                print(f"No valid URL for type: {type_}")

            # Append the record
            new_records.append({
                'Bill': bill_key,
                'Title': title,
                'Policy Area': policy_area,
                'Type': type_,
                'Raw Text': raw_text
            })

    # Create a new DataFrame from the records
    new_df = pd.DataFrame(new_records, columns=['Bill', 'Title', 'Policy Area', 'Type', 'Raw Text'])

    # Define output path for saving the DataFrame
    output_file = os.path.join('Outputs hack', f"Bill Type and Text {congress} {type_}.csv")
    
    # Save the DataFrame to CSV
    new_df.to_csv(output_file, index=False)

    print(f"New DataFrame saved to {output_file}")

    # Return the new DataFrame for further use
    return new_df




In [None]:
import json
import os

# Define the range of congress numbers and types
congress_range = range(115, 120)  # 115 to 119 inclusive
types = ['hr', 'hjres', 's', 'sjres']

# Loop through each congress and type combination
for congress in congress_range:
    for type_ in types:
        # Construct the file path
        file_path = f"Hack some bitches/{congress}_{type_}_compiled.json"
        
        # Check if the file exists
        if not os.path.exists(file_path):
            print(f"File {file_path} does not exist, skipping...")
            continue


        # Call the process_voting_data function
        df = process_voting_data(file_path)
        
        # Load the JSON data from the file
        with open(file_path, 'r') as f:
            data = json.load(f)
            
        # Call the process_bill_text_data function
        process_bill_text_data(df, data, file_path)

        print(f"Processed {congress} {type_} completed.")

print("All files processed.")

