In [15]:
import pandas as pd 
import os 
import sys
sys.path.append('./processing_data/')

from metadata_functions import *

In [16]:
# load selected metadata
# EDIT ME
sessions = [116,117,118,119] 
file_types = ['actions','committees','subjects','titles','sponsors','cosponsors','relatedBills','summaries']

metadata_dict = load_metadata(sessions, file_types, "./data/processed_data/")

#### Subjects

In [17]:
# extract subjects from each metadata

for session in metadata_dict.keys():

    metadata_dict[session]['subjects']["subject_names"] = metadata_dict[session]['subjects']["legislativeSubjects"].apply(extract_subject_names)

In [18]:
## count subjects for each session 

subject_count_dfs = {}

for session in sessions:

    subject_count_df = count_subjects(metadata_dict[session]['subjects'])

    subject_count_dfs[session] = subject_count_df

In [19]:
## merge all the subject counts for comparison

subject_count_dfs_combined = pd.concat([df.assign(session=session) for session, df in subject_count_dfs.items()]) # add a column for session and combine all the dataframes

# pivot the table to show values for each session
subject_count_dfs_combined = subject_count_dfs_combined.pivot_table(index="subject",
                                                                    columns="session",
                                                                    values="bill_count",
                                                                    fill_value=0
                                                        ).reset_index().rename_axis(None, axis=1)

subject_count_dfs_combined[[116,117,118,119]] = subject_count_dfs_combined[[116,117,118,119]].astype(int) # reset to integers for readability
subject_count_dfs_combined['total'] = subject_count_dfs_combined.iloc[:, 1:].sum(axis=1) # create total column
subject_count_dfs_combined = subject_count_dfs_combined.sort_values('total', ascending=False).reset_index(drop=True) # sort by total

subject_count_dfs_combined[:10]

Unnamed: 0,subject,116,117,118,119,total
0,Housing and community development funding,162,111,91,22,386
1,Low- and moderate-income housing,99,62,71,16,248
2,Congressional oversight,68,64,57,17,206
3,Housing finance and home ownership,77,47,34,7,165
4,Public housing,67,41,39,11,158
5,Government information and archives,58,44,34,6,142
6,Homelessness and emergency shelter,62,36,22,5,125
7,Housing supply and affordability,54,36,26,7,123
8,Landlord and tenant,54,31,21,7,113
9,Department of Housing and Urban Development,48,30,17,6,101


### Chamber Origination

In [20]:
concatinated_metadata = pd.concat([metadata_dict[session]['metadata'] for session in metadata_dict.keys()])

In [21]:
origination_crosstab = pd.crosstab(concatinated_metadata['data.congress'], concatinated_metadata['data.originChamberCode']).reset_index().rename_axis(None, axis=1) # create crosstab

origination_crosstab['total'] = origination_crosstab.iloc[:,1:].sum(axis=1) # add total column

origination_crosstab

Unnamed: 0,data.congress,H,S,total
0,116,167,82,249
1,117,156,73,229
2,118,167,79,246
3,119,96,43,139


### Sponsors

In [22]:
concatinated_sponsors = pd.concat([metadata_dict[session]['sponsors'] for session in metadata_dict.keys()])

concatinated_sponsors['congressional_session'] = [x[0] for x in concatinated_sponsors['bill_id'].str.split("-")]


In [23]:
sponsors_crosstab = pd.crosstab(concatinated_sponsors['congressional_session'], concatinated_sponsors['party']).reset_index().rename_axis(None, axis=1) # create crosstab

sponsors_crosstab['total'] = sponsors_crosstab.iloc[:,1:].sum(axis=1) # add total column

sponsors_crosstab

Unnamed: 0,congressional_session,D,I,R,total
0,116,215,1,33,249
1,117,192,1,36,229
2,118,188,2,56,246
3,119,94,1,44,139


### Committees

In [24]:
committee_count_dfs = {}

for session in sessions:

    committee_count_df = pd.DataFrame(metadata_dict[session]['committees']['name'].value_counts()).reset_index()
    committee_count_df['session'] = session

    committee_count_dfs[session] = committee_count_df

In [25]:
committe_count_combined = pd.concat(committee_count_dfs).pivot_table(index='name',
                                           columns='session',
                                           values='count',
                                           fill_value=0
                            ).reset_index().rename_axis(None, axis=1)

committe_count_combined[[116,117,118,119]] = committe_count_combined[[116,117,118,119]].astype(int) # reset to integers for readability
committe_count_combined['total'] = committe_count_combined.iloc[:, 1:].sum(axis=1) # create total column
committe_count_combined = committe_count_combined.sort_values('total', ascending=False).reset_index(drop=True) # sort by total

committe_count_combined

Unnamed: 0,name,116,117,118,119,total
0,Financial Services Committee,155,145,149,87,536
1,"Banking, Housing, and Urban Affairs Committee",84,67,68,43,262
2,Judiciary Committee,20,17,18,14,69
3,Ways and Means Committee,7,12,18,9,46
4,Transportation and Infrastructure Committee,7,6,7,6,26
5,Energy and Commerce Committee,3,10,8,4,25
6,Education and Workforce Committee,5,8,6,3,22
7,Appropriations Committee,7,6,5,1,19
8,Veterans' Affairs Committee,6,3,3,3,15
9,Budget Committee,7,3,1,0,11
