In [52]:
# Packages
import yaml
import pandas as pd


# Load legislators data
with open('../data/legislators-historical.yaml', 'r') as file:
    legislators_historical = yaml.safe_load(file)


In [53]:
# Flatten the data and extract necessary information
legislators_data_historical = []
for entry in legislators_historical:
    flat_entry = {
        'bioguide': entry['id']['bioguide'],
        'govtrack': entry['id']['govtrack'],
        'icpsr': entry['id'].get('icpsr',''),
        'wikipedia': entry.get('wikipedia', ''),
        'first_name': entry['name'].get('first',''),
        'last_name': entry['name'].get('last',''),
        'birthday': entry['bio'].get('birthday',''),
        'gender': entry['bio']['gender'],
        'term_type': entry['terms'][0]['type'],  # Assuming first term, adjust if needed
        'term_start': entry['terms'][0]['start'],
        'term_end': entry['terms'][0]['end'],
        'state': entry['terms'][0]['state'],
        'class': entry['terms'][0].get('class', ''),
        'party': entry['terms'][0].get('party', '')
    }
    legislators_data_historical.append(flat_entry)

# Convert to DataFrame
legislators_historical_df = pd.DataFrame(legislators_data_historical)


In [49]:
# Load current legislators data
with open('../data/legislators-current.yaml', 'r') as file:
    legislators_current = yaml.safe_load(file)

In [50]:
# Flatten the data and extract necessary information
legislators_data_current = []
for entry in legislators_current:
    flat_entry = {
        'bioguide': entry['id']['bioguide'],
        'govtrack': entry['id']['govtrack'],
        'icpsr': entry['id'].get('icpsr',''),
        'wikipedia': entry.get('wikipedia', ''),
        'first_name': entry['name'].get('first',''),
        'last_name': entry['name'].get('last',''),
        'birthday': entry['bio'].get('birthday',''),
        'gender': entry['bio']['gender'],
        'term_type': entry['terms'][0]['type'],  # Assuming first term, adjust if needed
        'term_start': entry['terms'][0]['start'],
        'term_end': entry['terms'][0]['end'],
        'state': entry['terms'][0]['state'],
        'class': entry['terms'][0].get('class', ''),
        'party': entry['terms'][0].get('party', '')
    }
    legislators_data_current.append(flat_entry)

# Convert to DataFrame
legislators_current_df = pd.DataFrame(legislators_data_current)

In [51]:
legislators_current_df

Unnamed: 0,bioguide,govtrack,icpsr,wikipedia,first_name,last_name,birthday,gender,term_type,term_start,term_end,state,class,party
0,B000944,400050,29389,,Sherrod,Brown,1952-11-09,M,rep,1993-01-05,1995-01-03,OH,,Democrat
1,C000127,300018,39310,,Maria,Cantwell,1958-10-13,F,rep,1993-01-05,1995-01-03,WA,,Democrat
2,C000141,400064,15408,,Benjamin,Cardin,1943-10-05,M,rep,1987-01-06,1989-01-03,MD,,Democrat
3,C000174,300019,15015,,Thomas,Carper,1947-01-23,M,rep,1983-01-03,1985-01-03,DE,,Democrat
4,C001070,412246,40703,,Robert,Casey,1960-04-13,M,sen,2007-01-04,2013-01-03,PA,1,Democrat
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
533,F000480,456958,,,Vince,Fong,1979-10-24,M,rep,2024-06-03,2025-01-03,CA,,Republican
534,R000619,456959,,,Michael,Rulli,1969-03-11,M,rep,2024-06-11,2025-01-03,OH,,Republican
535,L000604,456960,,,Greg,Lopez,1964-06-07,M,rep,2024-07-08,2025-01-03,CO,,Republican
536,H001097,456961,,,George,Helmy,1979-10-27,M,sen,2024-09-09,2025-01-03,NJ,1,Democrat


In [60]:
legislators_historical_df

Unnamed: 0,bioguide,govtrack,icpsr,wikipedia,first_name,last_name,birthday,gender,term_type,term_start,term_end,state,class,party
0,B000226,401222,507,,Richard,Bassett,1745-04-02,M,sen,1789-03-04,1793-03-03,DE,2,Anti-Administration
1,B000546,401521,786,,Theodorick,Bland,1742-03-21,M,rep,1789-03-04,1791-03-03,VA,,
2,B001086,402032,1260,,Aedanus,Burke,1743-06-16,M,rep,1789-03-04,1791-03-03,SC,,
3,C000187,402334,1538,,Daniel,Carroll,1730-07-22,M,rep,1789-03-04,1791-03-03,MD,,
4,C000538,402671,1859,,George,Clymer,1739-03-16,M,rep,1789-03-04,1791-03-03,PA,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12143,B001297,412619,21510,,Ken,Buck,1959-02-16,M,rep,2015-01-06,2017-01-03,CO,,Republican
12144,G000579,412731,21720,,Mike,Gallagher,1984-03-03,M,rep,2017-01-03,2019-01-03,WI,,Republican
12145,J000032,400199,29573,,Sheila,Jackson Lee,1950-01-12,F,rep,1995-01-04,1997-01-03,TX,,Democrat
12146,P000096,400309,29741,,Bill,Pascrell,1937-01-25,M,rep,1997-01-07,1999-01-03,NJ,,Democrat


In [58]:
legislators_df = pd.merge(legislators_current_df,legislators_historical_df,'outer')

In [59]:
legislators_df

Unnamed: 0,bioguide,govtrack,icpsr,wikipedia,first_name,last_name,birthday,gender,term_type,term_start,term_end,state,class,party
0,B000944,400050,29389,,Sherrod,Brown,1952-11-09,M,rep,1993-01-05,1995-01-03,OH,,Democrat
1,C000127,300018,39310,,Maria,Cantwell,1958-10-13,F,rep,1993-01-05,1995-01-03,WA,,Democrat
2,C000141,400064,15408,,Benjamin,Cardin,1943-10-05,M,rep,1987-01-06,1989-01-03,MD,,Democrat
3,C000174,300019,15015,,Thomas,Carper,1947-01-23,M,rep,1983-01-03,1985-01-03,DE,,Democrat
4,C001070,412246,40703,,Robert,Casey,1960-04-13,M,sen,2007-01-04,2013-01-03,PA,1,Democrat
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12681,B001297,412619,21510,,Ken,Buck,1959-02-16,M,rep,2015-01-06,2017-01-03,CO,,Republican
12682,G000579,412731,21720,,Mike,Gallagher,1984-03-03,M,rep,2017-01-03,2019-01-03,WI,,Republican
12683,J000032,400199,29573,,Sheila,Jackson Lee,1950-01-12,F,rep,1995-01-04,1997-01-03,TX,,Democrat
12684,P000096,400309,29741,,Bill,Pascrell,1937-01-25,M,rep,1997-01-07,1999-01-03,NJ,,Democrat
