Permalink
Browse files

Adjusted database to new FEC formats.

  • Loading branch information...
Ethan Phelps-Goodman
Ethan Phelps-Goodman committed Apr 17, 2012
1 parent d9efb37 commit 98161fb3aefaffa18dd4594523754d3c8f27dde4
Showing with 159 additions and 82 deletions.
  1. +2 −2 dcapi/aggregates/fec/handlers.py
  2. +57 −43 dcdata/fec/create_fec.sql
  3. +24 −17 dcdata/fec/importer.py
  4. +76 −20 dcdata/fec/postload.sql
@@ -132,7 +132,7 @@ class CandidateItemizedDownloadHandler(EntityTopListHandler):
stmt = """
select contributor_name, date, amount, contributor_type, transaction_type,
- organization, occupation, city, state, zipcode,
+ employer, occupation, city, state, zipcode,
candidate_name, party, race, status
from fec_candidate_itemized i
inner join matchbox_entityattribute a on i.candidate_id = a.value and a.namespace = 'urn:fec:candidate'
@@ -150,7 +150,7 @@ class CommitteeItemizedDownloadHandler(EntityTopListHandler):
stmt = """
select contributor_name, date, amount, contributor_type, contributor_committee_id, transaction_type,
- organization, occupation, city, state, zipcode,
+ employer, occupation, city, state, zipcode,
committee_name, committee_id, committee_designation, committee_type, committee_party, interest_group, connected_org
from fec_committee_itemized i
inner join matchbox_entityattribute a on i.committee_id = a.value and a.namespace = 'urn:fec:committee'
View
@@ -43,61 +43,75 @@ create table fec_indiv_import (
filer_id varchar(9),
amendment varchar(1),
report_type varchar(3),
- election_type varchar(1),
+ election_type varchar(5),
microfilm_location varchar(11),
transaction_type varchar(3),
- contributor_lender_transfer varchar(34),
- city varchar(18),
+ entity_type varchar(3),
+ contributor_name varchar(200),
+ city varchar(30),
state varchar(2),
- zipcode varchar(5),
- occupation varchar(35),
- transaction_month varchar(2),
- transaction_day varchar(2),
- transaction_century varchar(2),
- transaction_year varchar(2),
- amount varchar(7),
+ zipcode varchar(9),
+ employer varchar(38),
+ occupation varchar(38),
+ date varchar(8),
+ amount numeric(14,2),
other_id varchar(9),
- fec_record varchar(7)
+ transaction_id varchar(32),
+ file_num varchar(22),
+ memo_code varchar(1),
+ memo_text varchar(100),
+ fec_record varchar(19)
);
drop table if exists fec_pac2cand_import;
create table fec_pac2cand_import (
- filer_id VARCHAR(9),
- amendment VARCHAR(1),
- report_type VARCHAR(3),
- election_type VARCHAR(1),
- microfilm_location VARCHAR(11),
- transaction_type VARCHAR(3),
- transaction_month VARCHAR(2),
- transaction_day VARCHAR(2),
- transaction_century VARCHAR(2),
- transaction_year VARCHAR(2),
- amount VARCHAR(7),
- other_id VARCHAR(9),
- candidate_id VARCHAR(9),
- fec_record VARCHAR(7)
+ filer_id varchar(9),
+ amendment varchar(1),
+ report_type varchar(3),
+ election_type varchar(5),
+ microfilm_location varchar(11),
+ transaction_type varchar(3),
+ entity_type varchar(3),
+ contributor_name varchar(200),
+ city varchar(30),
+ state varchar(2),
+ zipcode varchar(9),
+ employer varchar(38),
+ occupation varchar(38),
+ date varchar(8),
+ amount numeric(14,2),
+ other_id varchar(9),
+ candidate_id varchar(9),
+ transaction_id varchar(32),
+ file_num varchar(22),
+ memo_code varchar(1),
+ memo_text varchar(100),
+ fec_record varchar(19)
);
drop table if exists fec_pac2pac_import;
CREATE TABLE fec_pac2pac_import (
- filer_id VARCHAR(9),
- amendment VARCHAR(1),
- report_type VARCHAR(3),
- election_type VARCHAR(1),
- microfilm_location VARCHAR(11),
- transaction_type VARCHAR(3),
- contributor_lender_transfer VARCHAR(34),
- city VARCHAR(18),
- state VARCHAR(2),
- zipcode VARCHAR(5),
- occupation VARCHAR(35),
- transaction_month VARCHAR(2),
- transaction_day VARCHAR(2),
- transaction_century VARCHAR(2),
- transaction_year VARCHAR(2),
- amount VARCHAR(7),
- other_id VARCHAR(9),
- fec_record VARCHAR(7)
+ filer_id varchar(9),
+ amendment varchar(1),
+ report_type varchar(3),
+ election_type varchar(5),
+ microfilm_location varchar(11),
+ transaction_type varchar(3),
+ entity_type varchar(3),
+ contributor_name varchar(200),
+ city varchar(30),
+ state varchar(2),
+ zipcode varchar(9),
+ employer varchar(38),
+ occupation varchar(38),
+ date varchar(8),
+ amount numeric(14,2),
+ other_id varchar(9),
+ transaction_id varchar(32),
+ file_num varchar(22),
+ memo_code varchar(1),
+ memo_text varchar(100),
+ fec_record varchar(19)
);
drop table if exists fec_candidate_summaries_import;
View
@@ -37,32 +37,39 @@ def __init__(self, processing_dir, config=FEC_CONFIG):
def update_csv(self):
- self.log.info("Downloading files to %s..." % self.processing_dir)
- self.download()
+ try:
+ self.log.info("Downloading files to %s..." % self.processing_dir)
+ self.download()
- self.log.info("Extracting files...")
- self.extract()
+ self.log.info("Extracting files...")
+ self.extract()
- self.log.info("Converting to unicode...")
- self.fix_unicode()
+ self.log.info("Converting to unicode...")
+ self.fix_unicode()
- self.log.info("Converting to CSV...")
- self.fec_2_csv()
+ self.log.info("Converting to CSV...")
+ self.fec_2_csv()
+ except Exception as e:
+ self.log.error(e)
+ raise
def update_db(self):
+ try:
+ c = connection.cursor()
- c = connection.cursor()
+ self.execute_file(c, SQL_PRELOAD_FILE)
- self.execute_file(c, SQL_PRELOAD_FILE)
+ self.log.info("Uploading data...")
+ self.upload(c)
- self.log.info("Uploading data...")
- self.upload(c)
+ self.log.info("Processing uploaded data...")
+ self.execute_file(c, SQL_POSTLOAD_FILE)
- self.log.info("Processing uploaded data...")
- self.execute_file(c, SQL_POSTLOAD_FILE)
-
- self.log.info("Done.")
+ self.log.info("Done.")
+ except Exception as e:
+ self.log.error(e)
+ raise
def _download_file(self, conf):
filename = conf.url.split("/")[-1]
@@ -124,5 +131,5 @@ def upload(self, c):
for conf in self.FEC_CONFIG:
infile = open(os.path.join(self._working_dir(conf), conf.dta_file.split(".")[0] + ".csv"), 'r')
c.execute("DELETE FROM %s" % conf.sql_table)
- c.copy_expert("COPY %s FROM STDIN CSV HEADER" % conf.sql_table, infile)
+ c.copy_expert("COPY %s FROM STDIN CSV HEADER DELIMITER '%s'" % (conf.sql_table, ',' if conf.schema_file else '|'), infile)
View
@@ -16,31 +16,87 @@ create index fec_candidates_candidate_id on fec_candidates (candidate_id);
drop table if exists fec_indiv;
create table fec_indiv as
-select fec_record, filer_id, amendment, lower(transaction_type) as transaction_type, contributor_lender_transfer as contributor_name, city, state, zipcode, election_type,
- regexp_replace(occupation, '/[^/]*$', '') as organization, regexp_replace(occupation, '^.*/', '') as occupation,
- (transaction_century || transaction_year || transaction_month || transaction_day)::date as date,
- case when transaction_type = '22Y' then -abs(overpunch(amount)) else overpunch(amount) end as amount
+select
+ filer_id,
+ amendment,
+ report_type,
+ election_type,
+ microfilm_location,
+ lower(transaction_type) as transaction_type,
+ entity_type,
+ contributor_name,
+ city,
+ state,
+ zipcode,
+ employer,
+ occupation,
+ (substring(date for 4 from 5) || substring(date for 2) || substring(date for 2 from 3))::date as date,
+ case when transaction_type = '22Y' then -abs(amount) else amount end as amount,
+ other_id,
+ transaction_id,
+ file_num,
+ memo_code,
+ memo_text,
+ fec_record
from fec_indiv_import;
create index fec_indiv_filer_id on fec_indiv (filer_id);
drop table if exists fec_pac2cand;
create table fec_pac2cand as
-select fec_record, filer_id, lower(transaction_type) as transaction_type,
- (transaction_century || transaction_year || transaction_month || transaction_day)::date as date,
- overpunch(amount) as amount,
- other_id, candidate_id
+select
+ filer_id,
+ amendment,
+ report_type,
+ election_type,
+ microfilm_location,
+ lower(transaction_type) as transaction_type,
+ entity_type,
+ contributor_name,
+ city,
+ state,
+ zipcode,
+ employer,
+ occupation,
+ (substring(date for 4 from 5) || substring(date for 2) || substring(date for 2 from 3))::date as date,
+ case when transaction_type = '22Y' then -abs(amount) else amount end as amount,
+ other_id,
+ candidate_id,
+ transaction_id,
+ file_num,
+ memo_code,
+ memo_text,
+ fec_record
from fec_pac2cand_import;
create index fec_pac2cand_other_id on fec_pac2cand (other_id);
-
+create index fec_pac2cand_cand_id on fec_pac2cand (candidate_id);
drop table if exists fec_pac2pac;
create table fec_pac2pac as
-select fec_record, filer_id, lower(transaction_type) as transaction_type, contributor_lender_transfer as contributor_name, city, state, zipcode, occupation,
- (transaction_century || transaction_year || transaction_month || transaction_day)::date as date,
- overpunch(amount) as amount,
- other_id
+select
+ filer_id,
+ amendment,
+ report_type,
+ election_type,
+ microfilm_location,
+ lower(transaction_type) as transaction_type,
+ entity_type,
+ contributor_name,
+ city,
+ state,
+ zipcode,
+ employer,
+ occupation,
+ (substring(date for 4 from 5) || substring(date for 2) || substring(date for 2 from 3))::date as date,
+ case when transaction_type = '22Y' then -abs(amount) else amount end as amount,
+ other_id,
+ transaction_id,
+ file_num,
+ memo_code,
+ memo_text,
+ fec_record
from fec_pac2pac_import;
-
+create index fec_pac2pac_filer_id on fec_pac2pac (filer_id);
+create index fec_pac2pac_other_id on fec_pac2pac (other_id);
drop table if exists fec_candidate_summaries;
create table fec_candidate_summaries as
@@ -106,19 +162,19 @@ drop table if exists fec_candidate_itemized;
create table fec_candidate_itemized as
select
contributor_name, date, amount, contributor_type, transaction_type,
- organization, occupation, i.city, i.state, i.zipcode,
+ employer, occupation, i.city, i.state, i.zipcode,
candidate_name, party_designation1 as party, race, incumbent_challenger_open as status, committee_id, candidate_id
from fec_candidates c
inner join (
select filer_id as committee_id, 'indiv' as contributor_type, contributor_name,
- city, state, zipcode, organization, occupation,
+ i.city, i.state, i.zipcode, employer, occupation,
date, amount, transaction_type
- from fec_indiv
+ from fec_indiv i
union all
select other_id, 'pac', committee_name,
- city, state, zipcode, connected_org, '',
+ t.city, t.state, t.zipcode, connected_org, '',
date, amount, transaction_type
from fec_pac2cand t
inner join fec_committees c on (c.committee_id = t.filer_id)) i using (committee_id);
@@ -129,12 +185,12 @@ drop table if exists fec_committee_itemized;
create table fec_committee_itemized as
select
contributor_name, date, amount, contributor_type, contributor_committee_id, transaction_type,
- organization, occupation, i.city, i.state, i.zipcode,
+ employer, occupation, i.city, i.state, i.zipcode,
committee_name, committee_id, committee_designation, committee_type, committee_party, interest_group, connected_org, candidate_id
from fec_committees c
inner join (
select filer_id as committee_id, 'indiv' as contributor_type, contributor_name, '' as contributor_committee_id,
- city, state, zipcode, organization, occupation,
+ city, state, zipcode, employer, occupation,
date, amount, transaction_type
from fec_indiv

0 comments on commit 98161fb

Please sign in to comment.