## SETUP and Connections

In [2]:
pip install dj_database_url

Collecting dj_database_url
  Downloading dj_database_url-0.5.0-py2.py3-none-any.whl (5.5 kB)
Installing collected packages: dj-database-url
Successfully installed dj-database-url-0.5.0
Note: you may need to restart the kernel to use updated packages.


In [1]:
import csv
import os
import zipfile


import dj_database_url
import psycopg2
import psycopg2.extras
import unidecode
import requests
import re

In [2]:
conn = psycopg2.connect(database="campaign-finance",
                        user="postgres",
                       password="",
                        host="172.16.238.13",
                        port="5432")
c = conn.cursor()

## Apply Dedupe to transactions

In [3]:
print("updates canon account numbers on transactions where matches occured")
c.execute("UPDATE transactions " 
                  " set canon_account_id = entity_map.canon_id" 
                  " from entity_map " 
                  "where transactions.original_account_id = entity_map.original_id")
print("Done.")
conn.commit()

updates canon account numbers on transactions where matches occured
Done.


In [4]:
print("update canon account ids for those without a match")
c.execute("update transactions " 
                  " set canon_account_id = original_account_id " 
                  " where canon_account_id is null " )
print("Done.")
conn.commit()

update canon account ids for those without a match
Done.


In [22]:
conn.commit()

## Some committees don't have good IDs use committee name instead

In [6]:
print("fixing bad committee IDs")
c.execute("UPDATE committees " 
                  " set sboe_committee_id = name" 
                  " where sboe_committee_id = '---' " )
print("Done.")
conn.commit()

fixing bad committee IDs
Done.


In [11]:
print("updates canon committee id on transactions ")
c.execute("UPDATE transactions " 
                  " set canon_committee_sboe_id = original_committee_sboe_id" 
                  )
print("Done.")
conn.commit()

updates canon committee id on transactions 
Done.


 ## User Friendly Views of contributions and expenditures

In [12]:
print("Creating Contributions View")
c.execute("CREATE VIEW contributions AS "
          "(SELECT transactions.transaction_id AS trans_id, "
          " transactions.original_committee_sboe_id AS original_receiver_id, "
          " transactions.original_account_id AS original_source_id, "
          " transactions.transaction_type, "
          " 'C' AS transaction_category, "
          " transactions.date_occured, "
          " transactions.amount, "
          " transactions.report_name, "
          " transactions.account_code, "
          " transactions.form_of_payment, "
          " transactions.purpose, "
          " transactions.candidate_referendum_name AS candidate_refereendum_name, "
          " transactions.declaration, "
          " transactions.canon_account_id AS canon_source_id, "
          " transactions.canon_committee_sboe_id AS canon_receiver_id "
          "FROM transactions "
          " WHERE transactions.transaction_type IN ('CONTRIB TO REIMBURSE', 'DONATION', 'FORGIVEN LOAN', "
          " 'GENERAL', 'INDIVIDUAL', 'INTEREST', 'LOAN', "
          " 'OUTSIDE SOURCE', 'OUTSTANDING LOAN', 'NON-PARTY COMM', 'NONPROFIT', 'PARTY COMM'))")     
conn.commit()
print("Done.")

Creating Contributions View
Done.


In [13]:
print("Creating Expenses View")
c.execute("CREATE VIEW expenses AS "
          "SELECT transactions.transaction_id AS trans_id, "
          " transactions.original_committee_sboe_id AS orginal_source_id, "
          " transactions.original_account_id AS original_receiver_id, "
          " transactions.transaction_type, "
          " 'E' AS transaction_category, "
          " transactions.date_occured, "
          " transactions.amount, "
          " transactions.report_name, "
          " transactions.account_code, "
          " transactions.form_of_payment, "
          " transactions.purpose, "
          " transactions.candidate_referendum_name AS candidate_refereendum_name, "
          " transactions.declaration, "
          " transactions.canon_committee_sboe_id AS canon_source_id, "
          " transactions.canon_account_id AS canon_receiver_id "
          "FROM transactions "
          "WHERE transactions.transaction_type IN ('CONT TO OTHER COMM', 'COORD PARTY EXP', 'DEBT PAYMENT', "
          " 'INDEPENDENT EXP', 'LOAN REPAYMENT', 'NONMONETARY GIFT', 'OPERATING EXP', "
          " 'REFUND')")
conn.commit()
print("Done.")

Creating Expenses View
Done.


In [14]:
print("Creating Contributions with Names View")
c.execute("CREATE VIEW  contributions_with_names AS "
          "SELECT contributions.trans_id AS transaction_id, "
          " accounts.account_id AS source_id, "
          " accounts.name AS source_name, "
          " committees.comm_id AS receiver_id, "
          " committees.name AS receiver_name, "
          " contributions.transaction_type, "
          " contributions.transaction_category, "
          " contributions.date_occured, "
          " contributions.amount "
          "FROM contributions "
          " INNER JOIN committees ON contributions.canon_receiver_id = committees.sboe_committee_id "
          " INNER JOIN accounts ON contributions.canon_source_id = accounts.account_id ")
conn.commit()
print("Done.")

Creating Contributions with Names View
Done.


In [15]:
print("Creating Expenses with Names View")
c.execute("CREATE VIEW  expenses_with_names AS "
          "SELECT expenses.trans_id AS transaction_id, "
          " committees.comm_id AS source_id, "
          " committees.name AS source_name, "
          " accounts.account_id AS receiver_id, "
          " accounts.name AS receiver_name, "
          " expenses.transaction_type, "
          " expenses.transaction_category, "
          " expenses.date_occured, "
          " expenses.amount "
          "FROM expenses "
          " INNER JOIN committees ON expenses.canon_source_id = committees.sboe_committee_id "
          " INNER JOIN accounts ON expenses.canon_receiver_id = accounts.account_id ")
conn.commit()
print("Done.")

Creating Expenses with Names View
Done.


In [20]:
print("Additional Info regarding accounts - is vendor")
c.execute("UPDATE accounts  "
          " SET is_vendor = 1 WHERE account_id in ( "
          " SELECT distinct canon_receiver_id "
          " FROM public.expenses )")
conn.commit()
print("Done.")

Additional Info regarding accounts - is vendor
Done.


In [23]:
print("Additional Info regarding accounts - is donor")
c.execute("UPDATE accounts  "
          " SET is_donor = 1 WHERE account_id in ( "
          " SELECT distinct canon_source_id "
          " FROM public.contributions )")
conn.commit()
print("Done.")

Additional Info regarding accounts - is donor
Done.


## Process and clean additional Committee Data 

In [24]:
print('creating committee list table...')
c.execute("CREATE TABLE public.committee_list "
          "(id SERIAL PRIMARY KEY, "
          " sboeid_url VARCHAR(1024), "
          " sboeid VARCHAR(1024), "
          " status_url VARCHAR(1024), "
          " status VARCHAR(1024), "
          " candidatename_parententityname_url VARCHAR(1024), "
          " candidatename_parententityname VARCHAR(1024), "
          " committeename_url VARCHAR(1024), "
          " committeename VARCHAR(1024)) ")
print("Done.")

creating committee list table...
Done.


In [25]:
directory = os.fsencode("./data/committee_list")
    
for filename in os.listdir(directory):
    full_filename = os.path.join(directory, filename)
    
    with open(full_filename, 'rU') as csv_file: 
        c.copy_expert("COPY committee_list " 
                  "(committeename_url, committeename, " 
                  " sboeid_url, sboeid, " 
                  " status_url, status, " 
                  " candidatename_parententityname_url, candidatename_parententityname) " 
                  "FROM STDIN CSV HEADER", csv_file)

    conn.commit()
print("Done.") 

Done.


  with open(full_filename, 'rU') as csv_file:


In [26]:
print('creating committee doc list table...')
c.execute("CREATE TABLE public.committee_doc_list "
          "(doc_id SERIAL PRIMARY KEY, "
          " committee_name VARCHAR(200), "
          " year VARCHAR(4), "
          " sboe_committee_id VARCHAR(200), "
          " doctype VARCHAR(200), "
          " docname VARCHAR(200), "
          " receivedimage VARCHAR(50), "
          " startdate VARCHAR(50), "
          " enddate VARCHAR(50), "
          " image VARCHAR(200), "
          " image_url VARCHAR(2000), "
          " data_text VARCHAR(200), "
          " data_url VARCHAR(2000))")
print("Done.")

creating committee doc list table...
Done.


In [27]:
directory = os.fsencode("./data/committee_doc_list")
    
for filename in os.listdir(directory):
    full_filename = os.path.join(directory, filename)
    print(full_filename)
    with open(full_filename, 'rU') as csv_file: 
        c.copy_expert("COPY committee_doc_list " 
                  "(committee_name, year, " 
                  " doctype, docname, " 
                  " receivedimage, startdate, enddate, " 
                  " image_url, image, "
                  " data_text, data_url) " 
                  "FROM STDIN CSV HEADER", csv_file)

    conn.commit()

print("Done.")    

b'./data/committee_doc_list/Campaign Document Search By Entity-Level2-Part1(1).csv'


  with open(full_filename, 'rU') as csv_file:


b'./data/committee_doc_list/Campaign Document Search By Entity-Level2-Part1(2).csv'
b'./data/committee_doc_list/Campaign Document Search By Entity-Level2-Part1(3).csv'
b'./data/committee_doc_list/Campaign Document Search By Entity-Level2-Part1(4).csv'
b'./data/committee_doc_list/Campaign Document Search By Entity-Level2-Part1(5).csv'
b'./data/committee_doc_list/Campaign Document Search By Entity-Level2-Part1(6).csv'
b'./data/committee_doc_list/Campaign Document Search By Entity_1613742718(1).csv'
b'./data/committee_doc_list/Campaign Document Search By Entity_1613742718(2).csv'
b'./data/committee_doc_list/Campaign Document Search By Entity_1613742718(3).csv'
b'./data/committee_doc_list/Campaign Document Search By Entity_1613742718(4).csv'
b'./data/committee_doc_list/Campaign Document Search By Entity_1613742718(5).csv'
b'./data/committee_doc_list/Campaign Document Search By Entity_1613742718(6).csv'
b'./data/committee_doc_list/Campaign Document Search By Entity_1613742718(7).csv'
Done.


In [28]:
print('fixing committee ids ...')
c.execute("update committee_doc_list "
          "set committee_name = split_part(committee_name, '[', 1), "
          " sboe_committee_id = substring(split_part(committee_name, '[', 2), 1, 16) "
          )
print("Done.")

fixing committee ids ...
Done.


In [37]:
print('creating active candidate committees list table...')
c.execute("CREATE TABLE public.active_candidate_committees "
          "(id SERIAL PRIMARY KEY, "
          " sboe_id VARCHAR(200), "
          " current_status VARCHAR(200), "
          " committee_name VARCHAR(200), "
          " committee_type VARCHAR(200), "
          " committee_street_1 VARCHAR(1024), "
          " committee_street_2 VARCHAR(1024), "
          " committee_city VARCHAR(200), "
          " committee_state VARCHAR(50), "
          " committee_full_zip VARCHAR(50), "
          " candidate_first_name VARCHAR(200), "
          " candidate_middle_name VARCHAR(200), "
          " candidate_last_name VARCHAR(200), "
          " treasurer_first_name VARCHAR(200), "
          " treasurer_middle_name VARCHAR(200), "
          " treasurer_last_name VARCHAR(200), "
          " treasurer_email VARCHAR(200), "
          " asst_treasurer_first_name VARCHAR(200), "
          " asst_treasurer_middle_name VARCHAR(200), "
          " asst_treasurer_last_name VARCHAR(200), "
          " asst_treasurer_email VARCHAR(200), "
          " treasurer_street_1 VARCHAR(200), "
          " treasurer_street_2 VARCHAR(200), "
          " treasurer_city VARCHAR(200), "
          " treasurer_state VARCHAR(200), "
          " treasurer_full_zip VARCHAR(200), "
          " party VARCHAR(200), "
          " office VARCHAR(200), "
          " juris VARCHAR(200))")
print("Done.")

creating active candidate committees list table...
Done.


In [40]:
conn.commit()

In [38]:
directory = os.fsencode("./data/active_committee_list")
    
for filename in os.listdir(directory):
    full_filename = os.path.join(directory, filename)
    print(full_filename)
    with open(full_filename, 'rU') as csv_file: 
        c.copy_expert("COPY active_candidate_committees " 
                  "( sboe_id, "
                      "current_status, "
                      "committee_name, "
                      "committee_type, "
                      "committee_street_1, "
                      "committee_street_2, "
                      "committee_city, "
                      "committee_state, "
                      "committee_full_zip, "
                      "candidate_first_name, "
                      "candidate_middle_name, "
                      "candidate_last_name, "
                      "treasurer_first_name, "
                      "treasurer_middle_name, "
                      "treasurer_last_name, "
                      "treasurer_email, "
                      "asst_treasurer_first_name, "
                      "asst_treasurer_middle_name, "
                      "asst_treasurer_last_name, "
                      "asst_treasurer_email, "
                      "treasurer_street_1, "
                      "treasurer_street_2, "
                      "treasurer_city, "
                      "treasurer_state, "
                      "treasurer_full_zip, "
                      "party, "
                      "office, "
                      "juris) " 
                  "FROM STDIN CSV HEADER", csv_file)
    conn.commit()

print("Done.")   

b'./data/active_committee_list/active-candidate-committees-20200327.csv'
Done.


  with open(full_filename, 'rU') as csv_file:


## Update the committee table with info from active candidate committees

print("applying active candiate info to committees - jurisdiction")
c.execute("UPDATE committees " 
                  " set juris = acc.juris " 
                  " from active_candidate_committees acc where committees.sboe_committee_id = acc.sboe_id " )
print("Done.")
conn.commit()

print("applying active candiate info to committees - party")
c.execute("UPDATE committees " 
                  " set party = acc.party " 
                  " from active_candidate_committees acc where committees.sboe_committee_id = acc.sboe_id " )
print("Done.")
conn.commit()

print("applying active candiate info to committees - office")
c.execute("UPDATE committees " 
                  " set office = acc.office " 
                  " from active_candidate_committees acc where committees.sboe_committee_id = acc.sboe_id " )
print("Done.")
conn.commit()

In [41]:
print("applying active candiate info to committees - candidate_id")
c.execute("UPDATE committees " 
                  " set candidate_id = acc.id " 
                  " from active_candidate_committees acc where committees.sboe_committee_id = acc.sboe_id " )
print("Done.")
conn.commit()

applying active candiate info to committees - candidate_id
Done.


In [42]:
print("applying active candiate info to committees - treasurer_id")
c.execute("UPDATE committees " 
                  " set treasurer_id = acc.id " 
                  " from active_candidate_committees acc where committees.sboe_committee_id = acc.sboe_id " )
print("Done.")
conn.commit()

applying active candiate info to committees - treasurer_id
Done.


In [43]:
print("applying active candiate info to committees - asst_treasurer_id")
c.execute("UPDATE committees " 
                  " set asst_treasurer_id = acc.id " 
                  " from active_candidate_committees acc where committees.sboe_committee_id = acc.sboe_id " )
print("Done.")
conn.commit()

applying active candiate info to committees - asst_treasurer_id
Done.


## Pivot the active candidate committees to committee_persons

In [7]:
conn.commit()

In [44]:
print('creating active committee person list table...')
c.execute("CREATE TABLE public.committee_person "
          "(id SERIAL PRIMARY KEY, "
          " first_name VARCHAR(200), "
          " middle_name VARCHAR(200), "
          " last_name VARCHAR(200), "
          " street_1 VARCHAR(200), "
          " street_2 VARCHAR(1024), "
          " city VARCHAR(1024), "
          " state VARCHAR(200), "
          " full_zip VARCHAR(50), "
          " email VARCHAR(50), "
          " role INT, "
          " committee_id VARCHAR(50))")
conn.commit()
print("Done.")

creating active committee person list table...
Done.


In [45]:
print("applying active candiate info to committee person table - treasurer")
c.execute("INSERT INTO public.committee_person " 
                  " (first_name, middle_name, last_name, street_1, street_2, city, state, full_zip, email, role, committee_id) " 
                  " SELECT treasurer_first_name, treasurer_middle_name, treasurer_last_name, "
                  " treasurer_street_1, treasurer_street_2, treasurer_city, treasurer_state, "
                  " treasurer_full_zip, treasurer_email, '2', sboe_id " 
                  "from active_candidate_committees")
print("Done.")
conn.commit()

applying active candiate info to committee person table - treasurer
Done.


In [46]:
print("applying active candiate info to committee person table - candidate")
c.execute("INSERT INTO public.committee_person " 
                  " (first_name, middle_name, last_name, street_1, street_2, city, state, full_zip, email, role, committee_id) " 
                  " SELECT candidate_first_name, candidate_middle_name, candidate_last_name, '', '', '', '', '', '', 1, sboe_id "
                  "from active_candidate_committees")
print("Done.")
conn.commit()

applying active candiate info to committee person table - candidate
Done.


In [47]:
print("applying active candiate info to committee person table - asst treasurer")
c.execute("INSERT INTO public.committee_person " 
                  " (first_name, middle_name, last_name, street_1, street_2, city, state, full_zip, email, role, committee_id) " 
                  " SELECT asst_treasurer_first_name, asst_treasurer_middle_name, asst_treasurer_last_name, '', '', '', '', '', '', 3, sboe_id "
                  "from active_candidate_committees")
print("Done.")
conn.commit()

applying active candiate info to committee person table - asst treasurer
Done.


In [48]:
print('creating candidates view...')
c.execute("CREATE VIEW candidates AS "
          "SELECT id, first_name, middle_name, last_name, "
          " street_1, street_2, city, state, full_zip, "
          " email, role, committee_id "
          "FROM committee_person where role = 1")
conn.commit()
print("Done.")

creating candidates view...
Done.


In [49]:
print('creating treasurers view...')
c.execute("CREATE VIEW treasurers AS "
          "SELECT id, first_name, middle_name, last_name, "
          " street_1, street_2, city, state, full_zip, "
          " email, role, committee_id "
          "FROM public.committee_person where role = 2")
conn.commit()
print("Done.")

creating treasurers view...
Done.


In [50]:
print('creating asst_treasurers view...')
c.execute("CREATE VIEW asst_treasurers AS "
          "SELECT id, first_name, middle_name, last_name, "
          " street_1, street_2, city, state, full_zip, "
          " email, role, committee_id "
          "FROM public.committee_person where role = 3")
conn.commit()
print("Done.")

creating asst_treasurers view...
Done.
