# Scratch work for Project 2

## Retrieve voteview.com data and write to csv
- Current Congress (117th)
- Senate only
- Calendar year 2021 (Rollcall IDs: RS1170001 to RS1170528)

## 3 tables
1. senators.csv: Senate Members (103) - Includes Harris, Biden, Trump
2. votes.csv: Bills Voted on (528)
3. senator_voates: Actual votes on the bills by senator id 'icpsr'
- 103 rows
- 528 columns

**Note:** This will take about 10 minutes to run as-is (12:19A 3/16)

In [474]:
# Import libraries as needed
import urllib.request, json 
import pandas as pd

In [475]:
def getRollcallDataById(rollcall_id):
    rc_url = "https://voteview.com/api/download?rollcall_id=" + rollcall_id
    with urllib.request.urlopen(rc_url) as url:
        data = json.loads(url.read().decode())
    return data

def createDataframeOfSenators(vote_json):
    votes = vote_json['rollcalls'][0]['votes']
    df_senators = pd.DataFrame(votes)
    df_senators = df_senators.drop(columns=['vote','paired_flag','cast_str', 'district', 'party','prob','x','y','flags','source'])
    df_senators['icpsr'] = df_senators['icpsr'].apply(str)
    df_senators['icpsr'] = df_senators['icpsr'].str[:-2]
    return df_senators

In [477]:
vote_data_for_sens = getRollcallDataById("RS1170004")
# Really just creating a DF of all the Senators initially
df_sens = createDataframeOfSenators(vote_data_for_sens)

In [478]:
# also need to extract Harris (41701.0) and Trump (99912.0) from RS1170001
vote_data_for_sens_tmp = getRollcallDataById("RS1170001")
df_sens_tmp = createDataframeOfSenators(vote_data_for_sens_tmp)
df_sens_har = df_sens_tmp.loc[df_sens_tmp['icpsr'] == '41701']
df_sens_tru = df_sens_tmp.loc[df_sens_tmp['icpsr'] == '99912']

In [479]:
df_sens=df_sens.append(df_sens_har)
df_sens=df_sens.append(df_sens_tru)

In [480]:
df_sens.reset_index(drop=True, inplace=True)
df_sens.head()

Unnamed: 0,last_name,img,first_name,party_short_name,icpsr,state,lis_member_id,bioguide_id,state_abbrev,seo_name,member_full,name,party_code
0,Baldwin,029940.jpg,Tammy,Democrat,29940,WI,S354,B001230,WI,tammy-baldwin,Baldwin (D-WI),"BALDWIN, Tammy",100
1,Barrasso,040707.jpg,John,Republican,40707,WY,S317,B001261,WY,john-a-barrasso,Barrasso (R-WY),"BARRASSO, John A.",200
2,Bennet,040910.jpg,Michael,Democrat,40910,CO,S330,B001267,CO,michael-f-bennet,Bennet (D-CO),"BENNET, Michael F.",100
3,Blackburn,020351.jpg,Marsha,Republican,20351,TN,S396,B001243,TN,marsha-blackburn,Blackburn (R-TN),"BLACKBURN, Marsha",200
4,Blumenthal,041101.jpg,Richard,Democrat,41101,CT,S341,B001277,CT,richard-blumenthal,Blumenthal (D-CT),"BLUMENTHAL, Richard",100


In [481]:
df_sens.to_csv('senators.csv', index=False)

In [482]:
# Create DF initial dataframe of column with senator IDs
df_senator_votes = df_sens[['icpsr']]
df_senator_votes.head()

Unnamed: 0,icpsr
0,29940
1,40707
2,40910
3,20351
4,41101


In [483]:
def getVoteTitle(vote_json):
    return vote_json['rollcalls'][0]['vote_title']
    
def getVoteDate(vote_json):
    return vote_json['rollcalls'][0]['date']
    
def getVoteId(vote_json):
    return vote_json['rollcalls'][0]['id']

def addVoteIdDateTitleToDataframe(vote_json):
    df_temp = pd.DataFrame([[getVoteId(vote_json),
                             getVoteDate(vote_json),
                             getVoteTitle(vote_json)]],
                            columns=['vote_id', 'date', 'title'])
    return pd.concat([df_votes, df_temp])


def addRollcallVoteToSenatorVotesDf(vote_json):
    # Get ID for the column header
    rollcall_id = vote_json['rollcalls'][0]['id']
    # Get Votes object of JSON
    votes = vote_json['rollcalls'][0]['votes']
    # Convert votes to DF
    df_rollcall_vote = pd.DataFrame(votes)
    # Just keep column for Senator ID (icpsr) and the vote
    df_rollcall_vote = df_rollcall_vote[['icpsr', 'vote']]
    df_rollcall_vote['icpsr'] = df_rollcall_vote['icpsr'].apply(str)
    if '.0' in df_rollcall_vote.at[0,'icpsr']:
        df_rollcall_vote['icpsr'] = df_rollcall_vote['icpsr'].str[:-2]
    # create dictionary from two columns
    dict_vote = df_rollcall_vote.set_index('icpsr').to_dict()['vote']
    df_senator_votes[rollcall_id] = df_senator_votes['icpsr'].map(dict_vote)
    return df_senator_votes

In [484]:
# Create initial dataframe to contain the rollcall votes
## Vote_id, Date, Title
df_votes = pd.DataFrame(columns=['vote_id', 'date', 'title'])

for x in range(1, 529):
#for x in range(1, 10):
    num = f"{x:04d}"
    rollcall_id = "RS117" + num
    vote_data_res = getRollcallDataById(rollcall_id)
    df_votes = addVoteIdDateTitleToDataframe(vote_data_res)
    addRollcallVoteToSenatorVotesDf(vote_data_res)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [485]:
df_senator_votes.head()

Unnamed: 0,icpsr,RS1170001,RS1170002,RS1170003,RS1170004,RS1170005,RS1170006,RS1170007,RS1170008,RS1170009,...,RS1170519,RS1170520,RS1170521,RS1170522,RS1170523,RS1170524,RS1170525,RS1170526,RS1170527,RS1170528
0,29940,Nay,Nay,Yea,Nay,Yea,Yea,Yea,Yea,Yea,...,Yea,Yea,Yea,Yea,Yea,Yea,Yea,Yea,Yea,Yea
1,40707,Nay,Nay,Yea,Nay,Yea,Nay,Nay,Nay,Yea,...,Abs,Abs,Abs,Abs,Abs,Abs,Abs,Abs,Abs,Abs
2,40910,Nay,Nay,Yea,Yea,Yea,Yea,Yea,Yea,Yea,...,Yea,Yea,Yea,Yea,Yea,Yea,Yea,Yea,Yea,Yea
3,20351,Nay,Nay,Nay,Nay,Yea,Nay,Nay,Nay,Nay,...,Abs,Abs,Abs,Abs,Abs,Abs,Abs,Abs,Abs,Abs
4,41101,Nay,Nay,Yea,Nay,Yea,Yea,Yea,Yea,Yea,...,Yea,Yea,Yea,Yea,Yea,Yea,Yea,Yea,Yea,Yea


In [486]:
df_senator_votes.to_csv('senator_votes.csv', index=False)

In [487]:
df_votes.reset_index(drop=True, inplace=True)
df_votes.head()

Unnamed: 0,vote_id,date,title
0,RS1170001,2021-01-06,"Shall the Objection Submitted by the Gentleman from Arizona, Mr. Gosar, and the Senator from Texas, Mr. Cruz, and Others Be Sustained?"
1,RS1170002,2021-01-07,"Shall the Objection Submitted by the Gentleman from Pennsylvania, Mr. Perry, and the Senator from Missouri, Mr. Hawley, Be Sustained?"
2,RS1170003,2021-01-20,"Confirmation: Avril Danica Haines, of New York, to be Director of National Intelligence"
3,RS1170004,2021-01-21,H.R. 335
4,RS1170005,2021-01-22,"Confirmation: Lloyd James Austin, of Georgia, to be Secretary of Defense"


In [488]:
df_votes.to_csv('votes.csv', index=False)