In [25]:
from curses.ascii import isalpha, isdigit
from plistlib import InvalidFileException
import sys
import os
import numpy as np
from scipy import spatial
from Bill import Bill
import tensorflow as tf
from keras.preprocessing.text import Tokenizer

In [26]:
def make_bills(folder:str):
    bill_locations = os.listdir(folder)
    bill_locations = list(map(lambda loc:folder + '/' + loc, bill_locations))
    bills = []
    for bill_file in bill_locations:
        if bill_file[-3:] == 'txt':
            try:
                bills.append(Bill(bill_file))
            except InvalidFileException as e:
                continue
    return bills

In [27]:
#import ideologyTags
SenatorsFile = '/Users/lucasgover/Desktop/Political-Sentiment-Analysis-PG-Capstone/Ideological_Tags/govtrack-stats-2020-house-ideology.csv'
HouseFile = '/Users/lucasgover/Desktop/Political-Sentiment-Analysis-PG-Capstone/Ideological_Tags/govtrack-stats-2020-house-ideology.csv'

StateAndNameToIdeology = dict()

with open (SenatorsFile, "r") as SenateIdeologyCSV:
    data = SenateIdeologyCSV.read()
    senators = data.split('\n')[1:]
    for senator in senators:
        splitSenator = senator.split(',')
        StateAndNameToIdeology[splitSenator[-3] + " " + splitSenator[-1][2:-1]] = float(splitSenator[3])
       
with open (HouseFile, "r") as HouseIdeologyCSV:
    data = HouseIdeologyCSV.read()
    reps = data.split('\n')[1:]
    for rep in reps:
        splitRep = rep.split(',')
        StateAndNameToIdeology[splitRep[-3] + " " + splitRep[-1][2:-1]] = float(splitRep[3])

In [28]:
def bill_to_ideology(bill:Bill, ideologiesDict:dict):
    totalIdeology = 0
    numSponsors = 0
    currentCongressPerson = bill.sponsor
    if (currentCongressPerson.state + " " + currentCongressPerson.full_name.split(',')[0]) in ideologiesDict:
        totalIdeology = ideologiesDict[(currentCongressPerson.state + " " + currentCongressPerson.full_name.split(',')[0])]
        numSponsors = 1
    for sponsor in bill.cosponsors:
        currentCongressPerson = sponsor
        if (currentCongressPerson.state + " " + currentCongressPerson.full_name.split(',')[0]) in ideologiesDict:
            totalIdeology += ideologiesDict[(currentCongressPerson.state + " " + currentCongressPerson.full_name.split(',')[0])]
            numSponsors += 1 
    return 2*(totalIdeology / numSponsors)-1

In [29]:
# Import Bills Here
BillsLocation = '/Users/lucasgover/Desktop/AI431_Projects/AIFinal/Bills_2021-2022'
bills = make_bills(BillsLocation)

In [30]:
texts = []
labels = []
for bill in bills:
    try:
        text = [(bill.title + " " + bill.text).replace(',','')]
        label = [bill_to_ideology(bill,StateAndNameToIdeology)]
        texts += text
        labels += label
    except:
        continue
    

In [31]:
print(len(texts))
print(len(labels))

1120
1120


In [32]:
import pandas as pd
senators = pd.read_csv('/Users/lucasgover/Downloads/dataverse_files/congress116-senate-accounts.csv')
senators['Token'] = senators['Token'].str.lower()
senator_accounts = list(senators['Token'])

reps = pd.read_csv('/Users/lucasgover/Downloads/dataverse_files/congress116-house-accounts.csv')
reps['Token'] = reps['Token'].str.lower()
reps_accounts = list(reps['Token'])

In [33]:
RepStateAndName = list()
SenStateAndName = list()
with open (SenatorsFile, "r") as SenateIdeologyCSV:
    data = SenateIdeologyCSV.read()
    senators = data.split('\n')[1:]
    for senator in senators:
        splitSenator = senator.split(',')
        SenStateAndName += [splitSenator[-3] + " " + splitSenator[-1][2:-1]]
       
with open (HouseFile, "r") as HouseIdeologyCSV:
    data = HouseIdeologyCSV.read()
    reps = data.split('\n')[1:]
    for rep in reps:
        splitRep = rep.split(',')
        RepStateAndName += [splitRep[-3] + " " + splitRep[-1][2:-1]]

In [34]:
def match_person_to_twitter_account(name,state,accounts):
    name = name.lower()
    state = state.lower()
    potential_matches = list(filter(lambda x : (name in x) and (state in x),accounts))
    if len(potential_matches) == 1:
        return potential_matches
    if len(potential_matches) == 2:
        return potential_matches
    elif len(potential_matches) > 1:
        return []
    potential_matches = list(filter(lambda x : (name in x),accounts))
    if len(potential_matches) == 1:
        return potential_matches
    if len(potential_matches) == 2:
        return potential_matches
    elif len(potential_matches) > 1:
        return []
    return []

In [35]:
account_dict = dict()
account_dict['CA Lee'] = 'repbarbaralee'
account_dict['DC Norton'] = 'eleanor4dc'
account_dict['IL Garc\\xc3\\xada'] = 'repchuygarcia'
account_dict['NY Ocasio-Cortez'] = 'aoc'
account_dict['MN Omar'] = 'ilhan'
account_dict['NJ Watson'] = 'repbonnie'
account_dict['CA Chu'] = 'repjudychu'
account_dict['FL Wilson'] = 'repwilson'
account_dict['PA Dean'] = 'repdean'
account_dict['IL Kelly'] = 'reprobinkelly'
account_dict['NY Vel\\xc3\\xa1zquez'] = 'nydiavelazquez'
account_dict['NC Adams'] = 'repadams'
account_dict['NY Maloney'] = 'repmaloney'
account_dict['TX Johnson'] = 'nathanfortexas'
account_dict['WA Smith'] = 'repadamsmith'
account_dict['IL Davis'] = 'repdannydavis'
account_dict['GA Johnson'] = 'rephankjohnson'
account_dict['MA Clark'] = 'repkclark'
account_dict['CA Davis'] = 'repjayapal'
account_dict['CA Levin'] = 'repmikelevin'
account_dict['KS Davids'] = 'repdavids'
account_dict['OH Ryan'] = 'timryan'
account_dict['MA Clark'] = 'repkclark'

twitter_accounts = []
twitter_to_ideology = dict()
for rep in RepStateAndName:
    name = rep.split(' ')[1]
    state = rep.split(' ')[0]
    if (state + " " + name) in account_dict:
        continue
    
    for i in match_person_to_twitter_account(name,state,reps_accounts):
        twitter_accounts += i
        twitter_to_ideology[i] = StateAndNameToIdeology[rep]
        
for sen in SenStateAndName:
    name = sen.split(' ')[1]
    state = sen.split(' ')[0]
    
    for i in match_person_to_twitter_account(name,state,senator_accounts):
        twitter_accounts += i
        twitter_to_ideology[i] = StateAndNameToIdeology[sen]
    

In [36]:
import tweepy
auth = tweepy.OAuthHandler('Bf3bgb0CVdvz4988NRiFFlwMv', 'YMpiTstrfCkNYwxGTInOyhFbHhxoksPA2kIXXfdXqmW8NfIdKj')
auth.set_access_token('4198637487-cpbfDyqrwLJ6AjNgLipKI1rR2GfWMAtk83qzTng', 'ZFQPPHxajg7W5QI1QxOjeIrs1OK3XN8IG7kPwydXJlIhS')

In [37]:
api = tweepy.API(auth)

In [38]:
ideologyTweets = []
counter = 0
for person in list(twitter_to_ideology.keys()):
    try:
        tweets = api.search_tweets('(from:' + person + ')',count=80,tweet_mode='extended')
        tweets = list(map(lambda x : x.full_text,tweets))
        ideologyTweets += list(zip([(2*twitter_to_ideology[person])-1] * len(tweets),tweets))
        print(counter / len(twitter_to_ideology.keys()))
        counter += 1
    except:
        counter += 1
        continue

0.0
0.0018115942028985507
0.0036231884057971015
0.005434782608695652
0.007246376811594203
0.009057971014492754
0.010869565217391304
0.012681159420289856
0.014492753623188406
0.016304347826086956
0.018115942028985508
0.019927536231884056
0.021739130434782608
0.02355072463768116
0.025362318840579712
0.02717391304347826
0.028985507246376812
0.030797101449275364
0.03260869565217391
0.034420289855072464
0.036231884057971016
0.03804347826086957
0.03985507246376811
0.041666666666666664
0.043478260869565216
0.04528985507246377
0.04710144927536232
0.04891304347826087
0.050724637681159424
0.05253623188405797
0.05434782608695652
0.05615942028985507
0.057971014492753624
0.059782608695652176
0.06159420289855073
0.06340579710144928
0.06521739130434782
0.06702898550724638
0.06884057971014493
0.07065217391304347
0.07246376811594203
0.07427536231884058
0.07608695652173914
0.07789855072463768
0.07971014492753623
0.08152173913043478
0.08333333333333333
0.08514492753623189
0.08695652173913043
0.0887681159

In [39]:
import csv  

header = ['Ideology','text']

with open('IdeologyBills.csv', 'w', encoding='UTF8') as f:
    writer = csv.writer(f)

    # write the header
    writer.writerow(header)

    # write the data
    for row in list(zip(labels,texts)):
        writer.writerow(row)
        print(row[0])
    for row in ideologyTweets:
        writer.writerow(row)
        print(row[0])

-0.5868019420209737
-0.07118045504082793
0.3250787745439494
-0.42596376706567285
-0.483655267503758
-0.5700343707581186
0.38259611139494987
-0.4218167729090573
0.46552683921424887
-0.3131126038831017
0.06350333235517658
-0.053493856239420046
-0.419189223637112
0.010067670301125986
-0.4088026927574129
-0.2964491843188365
0.07265400585078563
0.6359586619788742
0.6054874510943913
0.5778414395832994
0.4777779235442796
0.37279293826940285
-0.39812730055749246
0.6359586619788742
0.1751734776196585
-0.5354062617551149
-0.6934819271085879
-0.17939024852892005
-0.7625570533999478
0.8129984999003448
-0.40392239071005387
0.4936349258396966
0.020332624347830563
0.591105921574044
-0.6763836116091555
0.5500628580144944
-0.18192647421155028
0.3748726564397489
0.43127886778576063
-0.8597188626932031
0.47256425154640125
-0.22596050225721853
0.39174797510281323
0.4066037054827347
0.8129984999003448
-0.5899702800834783
0.07640906021084426
-0.04593238607348904
-0.9353392017845517
-0.9353392017845517
0.698