# Combine modern with historic data

In [2]:
import pandas as pd
import numpy as np

In [46]:
# Digitized from pdfs here:
# https://www.ncsl.org/research/about-state-legislatures/partisan-composition.aspx
modern = pd.read_csv('./data/outputs/state_legislative_control_2009_2021.csv')
modern

Unnamed: 0,state,year,senate_total,senate_dem,senate_rep,senate_other,house_total,house_dem,house_rep,house_other
0,Alabama,2009,35,19,13,0,105,62,43,0
1,Alaska,2009,20,10,10,0,40,18,22,0
2,Arizona,2009,30,12,18,0,60,25,35,0
3,Arkansas,2009,35,27,8,0,100,71,28,1
4,California,2009,40,26,14,0,80,51,29,0
...,...,...,...,...,...,...,...,...,...,...
645,Virginia,2021,40,21,18,0,100,55,45,0
646,Washington,2021,49,29,20,0,98,57,41,0
647,West Virginia,2021,34,11,23,0,100,23,77,0
648,Wisconsin,2021,33,12,20,0,99,38,60,0


In [47]:
# Should be available through 2010/2011
# https://dataverse.harvard.edu/dataset.xhtml?persistentId=hdl:1902.1/20403
historic = pd.read_csv('./data/klarner/Partisan_Balance_For_Use2011_06_09b.csv')
select_cols = [
    'year', 'state',  'sen_dem_in_sess', 'sen_rep_in_sess',
     'sen_ind_in_sess', 'sen_tot_in_sess','hs_dem_in_sess',
     'hs_rep_in_sess', 'hs_ind_in_sess','hs_tot_in_sess'
]
renames = [
    'year', 'state', 'senate_dem', 'senate_rep',
    'senate_other', 'senate_total', 'house_dem',
    'house_rep', 'house_other', 'house_total'
]
historic = historic[select_cols]
# int_cols = [i for i in select_cols if i != 'state']
# historic[int_cols] = historic[int_cols].astype('int')
historic = historic.rename(columns=dict(zip(select_cols, renames)))
# Remove overlap with modern data above
historic = historic.query('year < 2009')
historic

Unnamed: 0,year,state,senate_dem,senate_rep,senate_other,senate_total,house_dem,house_rep,house_other,house_total
0,1939,Alabama,35.0,0.0,0.0,35.0,105.0,1.0,0.0,106.0
1,1940,Alabama,35.0,0.0,0.0,35.0,105.0,1.0,0.0,106.0
2,1941,Alabama,35.0,0.0,0.0,35.0,105.0,1.0,0.0,106.0
3,1942,Alabama,35.0,0.0,0.0,35.0,105.0,1.0,0.0,106.0
4,1943,Alabama,35.0,0.0,0.0,35.0,105.0,1.0,0.0,106.0
...,...,...,...,...,...,...,...,...,...,...
4085,1935,Wisconsin,,,,,,,,
4086,1936,Wisconsin,,,,,,,,
4092,1934,Wyoming,,,,,,,,
4093,1935,Wyoming,,,,,,,,


In [48]:
# There are some missing records, but keep null values for now
# Note that many of the missing values are from Nebraska,
# which is unicameral and technically nonpartisan.
missing = historic[historic.isna().any(axis=1)]
print('Missing state annual records: ', len(missing))
print('Covering: ', len(missing['year'].drop_duplicates()), 'years')
missing.head(10)

Missing state annual records:  259
Covering:  75 years


Unnamed: 0,year,state,senate_dem,senate_rep,senate_other,senate_total,house_dem,house_rep,house_other,house_total
2681,1937,Minnesota,,,,,,,,
2682,1938,Minnesota,,,,,,,,
3592,1934,Alabama,,,,,,,,
3593,1935,Alabama,,,,,,,,
3594,1936,Alabama,,,,,,,,
3595,1937,Alabama,,,,,,,,
3596,1938,Alabama,,,,,,,,
3602,1934,Alaska,,,,,,,,
3603,1935,Alaska,,,,,,,,
3604,1936,Alaska,,,,,,,,


# Combine

In [49]:
all_records = pd.concat([modern, historic])
all_records = all_records.sort_values(by=['state', 'year'], ascending=True)
all_records.to_csv('./data/outputs/state_legislative_control_1934_2021.csv', index=False)
all_records

Unnamed: 0,state,year,senate_total,senate_dem,senate_rep,senate_other,house_total,house_dem,house_rep,house_other
3592,Alabama,1934,,,,,,,,
3593,Alabama,1935,,,,,,,,
3594,Alabama,1936,,,,,,,,
3595,Alabama,1937,,,,,,,,
3596,Alabama,1938,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
449,Wyoming,2017,30.0,3.0,27.0,0.0,60.0,9.0,51.0,0.0
499,Wyoming,2018,30.0,3.0,27.0,0.0,60.0,9.0,51.0,0.0
549,Wyoming,2019,30.0,3.0,27.0,0.0,60.0,9.0,50.0,1.0
599,Wyoming,2020,30.0,3.0,27.0,0.0,60.0,9.0,50.0,1.0
