In [21]:
import numpy as np
import pandas as pd
import psycopg2
from sqlalchemy import create_engine
import os
import pymongo
from bson.json_util import loads, dumps

In [22]:
postgres_password = os.environ['POSTGRES_PASSWORD']
mongo_username = os.environ['MONGO_INITDB_ROOT_USERNAME']
mongo_password = os.environ['MONGO_INITDB_ROOT_PASSWORD']
mongo_init_db = os.environ['MONGO_INITDB_DATABASE']

In [3]:
engine = create_engine("postgresql+psycopg2://{user}:{pw}@postgres:5432/{db}"
                      .format(user="postgres",pw=postgres_password, db="contrans"))

In [5]:
charwords = pd.read_csv('charwords.csv')

In [7]:
charwords.to_sql('charwords', con=engine, chunksize=1000, index=False, if_exists='replace')

5500

In [12]:
charwords.drop(['Unnamed: 0'], axis=1)

Unnamed: 0,word,tf_idf,sponsor_id
0,bifia program,0.192886,
1,bifia,0.192886,
2,project,0.177575,
3,secured loan,0.142876,
4,assistant secretary,0.129462,
...,...,...,...
5495,engaged conduct punishable,0.092499,Z000017
5496,gang,0.090513,Z000017
5497,electric vehicles,0.086414,Z000017
5498,bytyqi,0.079694,Z000017


In [20]:
myquery = '''
select c.word, c.tf_idf
from charwords c
inner join members m
on c.sponsor_id = m.propublica_id
where m.last_name LIKE '%%CORTEZ'
'''
pd.read_sql_query(myquery, con=engine)

Unnamed: 0,word,tf_idf
0,climate corps,0.383722
1,civilian climate,0.378639
2,civilian climate corps,0.378639
3,climate,0.187214
4,corps,0.140386
5,public housing,0.135222
6,civilian,0.121353
7,housing,0.116316
8,climate service,0.105739
9,members civilian climate,0.089262


In [15]:
myquery = '''
select *
from members
limit 5
'''
pd.read_sql_query(myquery, con=engine)

Unnamed: 0,title,short_title,first_name,middle_name,last_name,suffix,congress,chamber,icpsr,state,...,office,phone,fax,missed_votes_pct,votes_with_party_pct,votes_against_party_pct,DWNOMINATE,propublica_id,propublica_endpoint,last_updated
0,Representative,Rep.,Alma,,ADAMS,,117.0,House,21545.0,NC,...,2436 Rayburn House Office Building,202-225-1510,,0.32,99.02,0.87,-0.465,A000370,https://api.propublica.org/congress/v1/members...,2022-11-10 09:30:11 -0500
1,Representative,Rep.,Robert,B.,ADERHOLT,,117.0,House,29701.0,AL,...,266 Cannon House Office Building,202-225-4876,,1.51,96.24,3.65,0.38,A000055,https://api.propublica.org/congress/v1/members...,2022-11-10 09:30:10 -0500
2,Representative,Rep.,Pete,,AGUILAR,,117.0,House,21506.0,CA,...,109 Cannon House Office Building,202-225-3201,,0.32,98.8,1.09,-0.296,A000371,https://api.propublica.org/congress/v1/members...,2022-11-10 09:30:11 -0500
3,Representative,Rep.,Rick,,ALLEN,,117.0,House,21516.0,GA,...,570 Cannon House Office Building,202-225-2823,,3.03,90.92,8.97,0.699,A000372,https://api.propublica.org/congress/v1/members...,2022-11-10 09:30:10 -0500
4,Representative,Rep.,Colin,,ALLRED,,117.0,House,21900.0,TX,...,114 Cannon House Office Building,202-225-2231,,1.08,98.14,1.75,-0.432,A000376,https://api.propublica.org/congress/v1/members...,2022-11-10 09:30:11 -0500


In [24]:
myclient = pymongo.MongoClient(f"mongodb://{mongo_username}:{mongo_password}@mongo:27017/{mongo_init_db}?authSource=admin")

In [25]:
contrans_db = myclient['contrans']
bills = contrans_db['bills']

In [26]:
bills.count_documents({})

17071

In [49]:
myquery = bills.find({'introduced_date': '2022-11-16'}, 
           {'_id':0, 'short_title':1 , 'sponsor_name':1, 'introduced_date': 1})

In [38]:
pd.DataFrame.from_records(loads(dumps(myquery)))

In [43]:
myquery = '''
select * from charwords
limit 5
'''
pd.read_sql_query(myquery, con=engine)

Unnamed: 0.1,Unnamed: 0,word,tf_idf,sponsor_id
0,0,bifia program,0.192886,
1,1,bifia,0.192886,
2,2,project,0.177575,
3,3,secured loan,0.142876,
4,4,assistant secretary,0.129462,


In [59]:
myquery = bills.find({'enacted':{'$ne':None}},
                     {'_id':0, 'enacted':1, 'short_title':1, 'sponsor_name':1})

In [52]:
laws = pd.DataFrame.from_records(loads(dumps(myquery)))

In [60]:
loads(dumps(myquery))

[{'short_title': 'Bulk Infant Formula to Retail Shelves Act',
  'sponsor_name': 'Suzan K. DelBene',
  'enacted': '2022-10-10'},
 {'short_title': 'SBIR and STTR Extension Act of 2022',
  'sponsor_name': 'Benjamin L. Cardin',
  'enacted': '2022-09-30'},
 {'short_title': 'A bill to amend section 301 of title 44, United States Code, to establish a term for the appointment of the Director of the Government Publishing Office.',
  'sponsor_name': 'Roy Blunt',
  'enacted': '2022-10-17'},
 {'short_title': "A bill to extend by 19 days the authorization for the special assessment for the Domestic Trafficking Victims' Fund.",
  'sponsor_name': 'Amy Klobuchar',
  'enacted': '2022-09-16'},
 {'short_title': 'To designate the clinic of the Department of Veterans Affairs in Mishawaka, Indiana, as the "Jackie Walorski VA Clinic".',
  'sponsor_name': 'Jim Banks',
  'enacted': '2022-09-30'},
 {'short_title': 'Formula Act',
  'sponsor_name': 'Earl Blumenauer',
  'enacted': '2022-07-21'},
 {'short_title': '