In [1]:
import os
import pandas as pd

In [57]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 50)

In [3]:
RAW_FILEPATH = os.path.join('..', 'raw')

### Read in DIME and Voteview

**Read in voteview** (https://voteview.com/articles/data_help_members)

In [70]:
VV_IDENTIFIERS = ['icpsr', 'bioname', 'congress', 'chamber']
VV_IDEO = ['nominate_dim1', 'nominate_log_likelihood', 'nominate_geo_mean_probability', 'nokken_poole_dim1']
VV_USECOLS = VV_IDENTIFIERS + VV_IDEO

vv = pd.read_csv(os.path.join(RAW_FILEPATH, 'HSall_members.csv'), usecols=VV_USECOLS)

vv['icpsr'] = vv['icpsr'].astype('str')

vv.sort_values(by=['congress', 'icpsr'], ascending=True, inplace=True)

# Fill forward, retain latest scores where available
for col in  VV_IDEO:
    vv[col] = vv.groupby('bioname')[col].apply(lambda x: x.ffill())

# Retain lastest obs
vv = vv.groupby('icpsr').tail(1)

12406

**Read in DIME**

In [82]:
DIME_IDENTIFIERS = ['cycle', 'name', 'ICPSR2', 'party', 'state', 'seat', 'district']
DIME_IDEOLOGIES = ['recipient.cfscore.dyn', 'dwdime']
DIME_USECOLS = DIME_IDENTIFIERS + DIME_IDEOLOGIES

dime = pd.read_csv(os.path.join(RAW_FILEPATH, 'dime_recipients_1979_2014.csv'), usecols=DIME_USECOLS)

# Drop committees (cf codebook p. 17)
dime = dime[~dime['seat'].isin(['federal:committee', 'state:committee', 'federal:527'])]

dime.columns = [col.lower() for col in dime.columns]

dime.sort_values(by=['cycle', 'icpsr2'], ascending=True, inplace=True)

# Fill forward, retain latest scores where available
for col in  DIME_IDEOLOGIES:
    dime[col] = dime.groupby('name')[col].apply(lambda x: x.ffill())

# Remove nominee from ICPSR (only for seem fpr presidential candidates)
dime['icpsr2'] = [s.replace('nominee', '') for s in dime['icpsr2']]

# Retain last
dime = dime.groupby('icpsr2').tail(1)

dime.rename(columns={'icpsr2': 'icpsr'}, inplace=True)

  interactivity=interactivity, compiler=compiler, result=result)


70434

### Merge

In [83]:
df = dime.merge(vv, how='outer', on='icpsr', indicator=True)

len(df)

80982

In [84]:
df.columns

Index(['cycle', 'icpsr', 'name', 'party', 'state', 'seat', 'district',
       'recipient.cfscore.dyn', 'dwdime', 'congress', 'chamber', 'bioname',
       'nominate_dim1', 'nominate_log_likelihood',
       'nominate_geo_mean_probability', 'nokken_poole_dim1', '_merge'],
      dtype='object')