In [1]:
import os
import pandas as pd

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 50)

In [3]:
RAW_FILEPATH = os.path.join('..', 'raw')

### Read in DIME and Voteview

**Read in voteview** (https://voteview.com/articles/data_help_members)

In [4]:
VV_IDENTIFIERS = ['icpsr', 'bioname', 'congress', 'chamber']
VV_IDEO = ['nominate_dim1', 'nominate_log_likelihood', 'nominate_geo_mean_probability', 'nokken_poole_dim1']
VV_USECOLS = VV_IDENTIFIERS + VV_IDEO

vv = pd.read_csv(os.path.join(RAW_FILEPATH, 'HSall_members.csv'), usecols=VV_USECOLS)

vv['icpsr'] = vv['icpsr'].astype('str')

vv.sort_values(by=['congress', 'icpsr'], ascending=True, inplace=True)

# Fill forward, retain latest scores where available
for col in  VV_IDEO:
    vv[col] = vv.groupby('bioname')[col].apply(lambda x: x.ffill())

# Retain lastest obs
vv = vv.groupby('icpsr').tail(1)

**Read in DIME**

In [5]:
DIME_IDENTIFIERS = ['cycle', 'name', 'ICPSR2', 'party', 'state', 'seat', 'district']
DIME_IDEOLOGIES = ['recipient.cfscore.dyn', 'dwdime']
DIME_USECOLS = DIME_IDENTIFIERS + DIME_IDEOLOGIES

dime = pd.read_csv(os.path.join(RAW_FILEPATH, 'dime_recipients_1979_2014.csv'), usecols=DIME_USECOLS)

# Drop committees (cf codebook p. 17)
dime = dime[~dime['seat'].isin(['federal:committee', 'state:committee', 'federal:527'])]

dime.columns = [col.lower() for col in dime.columns]

dime.sort_values(by=['cycle', 'icpsr2'], ascending=True, inplace=True)

# Fill forward, retain latest scores where available
for col in  DIME_IDEOLOGIES:
    dime[col] = dime.groupby('name')[col].apply(lambda x: x.ffill())

# Remove nominee from ICPSR (only for seem fpr presidential candidates)
dime['icpsr2'] = [s.replace('nominee', '') for s in dime['icpsr2']]

# Retain last
dime = dime.groupby('icpsr2').tail(1)

dime.rename(columns={'icpsr2': 'icpsr'}, inplace=True)

  interactivity=interactivity, compiler=compiler, result=result)


### Merge

In [9]:
df = dime.merge(vv, how='outer', on='icpsr')

df.rename(columns={'name': 'dime_name', 'bioname': 'vv_name'}, inplace=True)

len(df)

80982

In [10]:
df.head(10)

Unnamed: 0,cycle,icpsr,dime_name,party,state,seat,district,recipient.cfscore.dyn,dwdime,congress,chamber,vv_name,nominate_dim1,nominate_log_likelihood,nominate_geo_mean_probability,nokken_poole_dim1
0,1980.0,10511,"corman, james c.",100,CA,federal:house,CA21,-0.579,-0.473,96.0,House,"CORMAN, James Charles",-0.53,-153.0461,0.846,-0.553
1,1980.0,10525,"harsha, william h.",200,OH,federal:house,OH06,0.977,,96.0,House,"HARSHA, William Howard",0.188,-336.03193,0.686,0.218
2,1980.0,10528,"ichord, richard h.",100,MO,federal:house,MO08,0.383,,96.0,House,"ICHORD, Richard Howard, II",0.001,-457.45577,0.598,0.27
3,1980.0,10540,"nedzi, lucien",100,MI,federal:house,MI14,-0.6,,96.0,House,"NEDZI, Lucien Norbert",-0.427,-232.46035,0.773,-0.508
4,1980.0,10582,"duncan, robert blackford",100,OR,federal:house,OR03,0.117,-0.282,96.0,House,"DUNCAN, Robert Blackford",-0.341,-310.40422,0.706,-0.348
5,1980.0,10611,"murphy, john m.",100,NY,federal:house,NY17,-0.29,-0.379,96.0,House,"MURPHY, John Michael",-0.406,-235.86106,0.743,-0.389
6,1980.0,10613,"patten, edward j.",100,NJ,federal:house,NJ15,-0.506,,96.0,House,"PATTEN, Edward James",-0.424,-273.7622,0.759,-0.548
7,1980.0,10633,"van deerlin, lionel mc",100,CA,federal:house,CA42,-0.414,-0.368,96.0,House,"VAN DEERLIN, Lionel",-0.401,-213.20417,0.797,-0.47
8,1980.0,10638,"wydler, john w",200,NY,federal:house,NY05,0.573,,96.0,House,"WYDLER, John Waldemar",0.277,-412.27567,0.613,0.278
9,1980.0,10706,"buchanan, john h.",200,AL,federal:house,AL06,0.277,0.167,96.0,House,"BUCHANAN, John Hall, Jr.",0.146,-507.36818,0.597,0.029
