Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

wip

* signature and effective dates are now being stored effectively.
* Any form with a nil value is filtered out (datomic does not store nil
  values).
* alis have been renamed lobbying issues codes for the purpose of
  clarity.
  • Loading branch information...
commit 2ac3f3ebe48dc5939bc3324890cc7c68787d6282 1 parent a25627a
Citizen zmaril authored
3  project.clj
View
@@ -8,5 +8,6 @@
[org.clojure/data.json "0.2.5"]
[me.raynes/fs "1.4.4"]
[instaparse "1.3.2"]
- [org.jordanlewis/data.union-find "0.1.0"]]
+ [org.jordanlewis/data.union-find "0.1.0"]
+ [clj-time "0.7.0"]]
:main echelon.core)
90 src/echelon/ali.clj
View
@@ -1,92 +1,2 @@
(ns echelon.ali
(:require [datomic.api :refer [tempid]]))
-
-(def alis
- {"ACC" "Accounting"
- "ADV" "Advertising"
- "AER" "Aerospace"
- "AGR" "Agriculture"
- "ALC" "Alcohol & Drug Abuse"
- "ANI" "Animals"
- "APP" "Apparel/Clothing Industry/Textiles"
- "ART" "Arts/Entertainment"
- "AUT" "Automotive Industry"
- "AVI" "Aviation/Aircraft/Airlines"
- "BAN" "Banking"
- "BEV" "Beverage Industry"
- "BNK" "Bankruptcy"
- "BUD" "Budget/Appropriations"
- "CAW" "Clean Air & Water (Quality)"
- "CDT" "Commodities (Big Ticket)"
- "CHM" "Chemicals/Chemical Industry"
- "CIV" "Civil Rights/Civil Liberties"
- "COM" "Communications/Broadcasting/Radio/TV"
- "CON" "Constitution"
- "CPI" "Computer Industry"
- "CPT" "Copyright/Patent/Trademark"
- "CSP" "Consumer Issues/Safety/Protection"
- "DEF" "Defense"
- "DIS" "Disaster Planning/Emergencies"
- "DOC" "District of Columbia"
- "ECN" "Economics/Economic Development"
- "EDU" "Education"
- "ENG" "Energy/Nuclear"
- "ENV" "Environmental/Superfund"
- "FAM" "Family Issues/Abortion/Adoption"
- "FIN" "Financial Institutions/Investments/Securities"
- "FIR" "Firearms/Guns/Ammunition"
- "FOO" "Food Industry (Safety, Labeling, etc.)"
- "FOR" "Foreign Relations"
- "FUE" "Fuel/Gas/Oil"
- "GAM" "Gaming/Gambling/Casino"
- "GOV" "Government Issues"
- "HCR" "Health Issues"
- "HOM" "Homeland Security"
- "HOU" "Housing"
- "IMM" "Immigration"
- "IND" "Indian/Native American Affairs"
- "INS" "Insurance"
- "INT" "Intelligence and Surveillance"
- "LAW" "Law Enforcement/Crime/Criminal Justice"
- "LBR" "Labor Issues/Antitrust/Workplace"
- "MAN" "Manufacturing"
- "MAR" "Marine/Maritime/Boating/Fisheries"
- "MED" "Medical/Disease Research/Clinical Labs"
- "MIA" "Media (Information/Publishing)"
- "MMM" "Medicare/Medicaid"
- "MON" "Minting/Money/Gold Standard"
- "NAT" "Natural Resources"
- "PHA" "Pharmacy"
- "POS" "Postal"
- "REL" "Religion"
- "RES" "Real Estate/Land Use/Conservation"
- "RET" "Retirement"
- "ROD" "Roads/Highway"
- "RRR" "Railroads"
- "SCI" "Science/Technology"
- "SMB" "Small Business"
- "SPO" "Sports/Athletics"
- "TAR" "Miscellaneous Tariff Bills"
- "TAX" "Taxation/Internal Revenue Code"
- "TEC" "Telecommunications"
- "TOB" "Tobacco"
- "TOR" "Torts"
- "TOU" "Travel/Tourism"
- "TRA" "Transportation"
- "TRD" "Trade (Domestic & Foreign)"
- "TRU" "Trucking/Shipping"
- "UNM" "Unemployment"
- "URB" "Urban Development/Municipalities"
- "UTI" "Utilities"
- "VET" "Veterans"
- "WAS" "Waste (hazardous/solid/interstate/nuclear)"
- "WEL" "Welfare"})
-
-(defn string->ali [code]
- (keyword (str "lobbying.alis/" code)))
-
-(def alis-attributes
- (for [[code description] alis]
- {:db/id (tempid :db.part/user)
- :db/ident (keyword (str "lobbying.alis/" code))
- :db/doc (str "Code for activities relating to \"" description "\".")}))
50 src/echelon/load.clj
View
@@ -1,11 +1,12 @@
(ns echelon.load
(:require [datomic.api :as d :refer [db q]]
[clojure.data.json :as json]
- [echelon.ali :refer [string->ali]]
[echelon.text :refer [clean]]
- [echelon.schema :refer [schema]]
+ [echelon.schema :refer [schema string->issue-code]]
+ [echelon.util :refer [contains-nil?]]
[me.raynes.fs :as fs]
- [clojure.pprint :refer [pprint]]))
+ [clojure.pprint :refer [pprint]]
+ [clj-time.format :as f]))
(def datadir "/home/zmaril/data/sopr_html/")
@@ -22,7 +23,14 @@
:record/type :being.record/being
:being/id (str (d/squuid))})
-(defn registration-datoms [m]
+(defn parse-time [s]
+ (if (= s "03/031/2008")
+ (java.util.Date. "03/31/2008")
+ (some->> s
+ (f/parse (f/formatters :date-hour-minute-second))
+ (.toDate))))
+
+(defn registration-datoms [[f m]]
(let [lobbyists (:lobbyists m)
contact-being-id (d/tempid :db.part/user)
client-being-id (d/tempid :db.part/user)
@@ -49,9 +57,10 @@
:lobbying.form/senate-id
(get-in m [:registrant :registrant_senate_id])
- ;(get-in m [:datetimes :signature_date])
- ;:lobbying.registration/effective-date
- ;(get-in m [:datetimes :effective_date])
+ :lobbying.form/signature-date
+ (-> m :datetimes :signature_date parse-time)
+ :lobbying.registration/effective-date
+ (-> m :datetimes :effective_date parse-time)
:lobbying.form/client
(let [c (:client m)]
@@ -109,7 +118,7 @@
(:lobbying_issues_detail m)
:lobbying.activity/issue-codes
(->> m :lobbying_issues
- (map (comp string->ali :issue_code)))
+ (map (comp string->issue-code :issue_code)))
:lobbying.activity/lobbyists
(map-indexed
#(do {:record/type :lobbying.record/lobbyist
@@ -128,24 +137,13 @@
registration)))
(defn load-data! [conn]
- (->> (list-registration-forms)
- (map (comp
- deref
- (partial d/transact conn)
- registration-datoms
- #(json/read-str % :key-fn keyword)
- slurp))
- doall
- count
- (str "Found this many files:" )
- println)
-
- (comment
- (->> (list-ld2-forms)
- (filter (complement nil?))
- (apply concat)
- (pmap (comp (partial add-ld2-form! conn) json/read-str slurp))
- doall)))
+ (doseq [datoms
+ (map (comp
+ registration-datoms
+ (juxt identity (comp #(json/read-str % :key-fn keyword) slurp)))
+ (list-registration-forms))
+ :when (not (contains-nil? datoms))]
+ @(d/transact conn datoms)))
(defn load-schema! [conn]
(d/transact conn schema))
96 src/echelon/schema.clj
View
@@ -1,6 +1,96 @@
(ns echelon.schema
- (:require [echelon.ali :refer [alis-attributes]]
- [datomic.api :refer [tempid]]))
+ (:require [datomic.api :refer [tempid]]))
+
+;; Lobbying codes taken from http://1.usa.gov/1lm74is
+(def issue-codes
+ {"ACC" "Accounting"
+ "ADV" "Advertising"
+ "AER" "Aerospace"
+ "AGR" "Agriculture"
+ "ALC" "Alcohol & Drug Abuse"
+ "ANI" "Animals"
+ "APP" "Apparel/Clothing Industry/Textiles"
+ "ART" "Arts/Entertainment"
+ "AUT" "Automotive Industry"
+ "AVI" "Aviation/Aircraft/Airlines"
+ "BAN" "Banking"
+ "BEV" "Beverage Industry"
+ "BNK" "Bankruptcy"
+ "BUD" "Budget/Appropriations"
+ "CAW" "Clean Air & Water (Quality)"
+ "CDT" "Commodities (Big Ticket)"
+ "CHM" "Chemicals/Chemical Industry"
+ "CIV" "Civil Rights/Civil Liberties"
+ "COM" "Communications/Broadcasting/Radio/TV"
+ "CON" "Constitution"
+ "CPI" "Computer Industry"
+ "CPT" "Copyright/Patent/Trademark"
+ "CSP" "Consumer Issues/Safety/Protection"
+ "DEF" "Defense"
+ "DIS" "Disaster Planning/Emergencies"
+ "DOC" "District of Columbia"
+ "ECN" "Economics/Economic Development"
+ "EDU" "Education"
+ "ENG" "Energy/Nuclear"
+ "ENV" "Environmental/Superfund"
+ "FAM" "Family Issues/Abortion/Adoption"
+ "FIN" "Financial Institutions/Investments/Securities"
+ "FIR" "Firearms/Guns/Ammunition"
+ "FOO" "Food Industry (Safety, Labeling, etc.)"
+ "FOR" "Foreign Relations"
+ "FUE" "Fuel/Gas/Oil"
+ "GAM" "Gaming/Gambling/Casino"
+ "GOV" "Government Issues"
+ "HCR" "Health Issues"
+ "HOM" "Homeland Security"
+ "HOU" "Housing"
+ "IMM" "Immigration"
+ "IND" "Indian/Native American Affairs"
+ "INS" "Insurance"
+ "INT" "Intelligence and Surveillance"
+ "LAW" "Law Enforcement/Crime/Criminal Justice"
+ "LBR" "Labor Issues/Antitrust/Workplace"
+ "MAN" "Manufacturing"
+ "MAR" "Marine/Maritime/Boating/Fisheries"
+ "MED" "Medical/Disease Research/Clinical Labs"
+ "MIA" "Media (Information/Publishing)"
+ "MMM" "Medicare/Medicaid"
+ "MON" "Minting/Money/Gold Standard"
+ "NAT" "Natural Resources"
+ "PHA" "Pharmacy"
+ "POS" "Postal"
+ "REL" "Religion"
+ "RES" "Real Estate/Land Use/Conservation"
+ "RET" "Retirement"
+ "ROD" "Roads/Highway"
+ "RRR" "Railroads"
+ "SCI" "Science/Technology"
+ "SMB" "Small Business"
+ "SPO" "Sports/Athletics"
+ "TAR" "Miscellaneous Tariff Bills"
+ "TAX" "Taxation/Internal Revenue Code"
+ "TEC" "Telecommunications"
+ "TOB" "Tobacco"
+ "TOR" "Torts"
+ "TOU" "Travel/Tourism"
+ "TRA" "Transportation"
+ "TRD" "Trade (Domestic & Foreign)"
+ "TRU" "Trucking/Shipping"
+ "UNM" "Unemployment"
+ "URB" "Urban Development/Municipalities"
+ "UTI" "Utilities"
+ "VET" "Veterans"
+ "WAS" "Waste (hazardous/solid/interstate/nuclear)"
+ "WEL" "Welfare"})
+
+(defn string->issue-code [code]
+ (keyword (str "lobbying.issue-code/" code)))
+
+(def issue-code-attributes
+ (for [[code description] issue-codes]
+ {:db/id (tempid :db.part/user)
+ :db/ident (keyword (str "lobbying.issue-code/" code))
+ :db/doc (str "Code for activities relating to \"" description "\".")}))
;;Helper functions for datomic. We're not doing too many fancy things
;;here with datomic and the main struggle has just been understanding
@@ -187,7 +277,7 @@
(def schema
(vec (concat data-attributes
- alis-attributes
+ issue-code-attributes
address-attributes
being-framework-attributes
client-attributes
5 src/echelon/util.clj
View
@@ -30,3 +30,8 @@
keys
(group-by uf)
vals)))
+
+(defn contains-nil? [arg]
+ (if (coll? arg)
+ (reduce #(or %1 %2) false (map contains-nil? arg))
+ (nil? arg)))
Please sign in to comment.
Something went wrong with that request. Please try again.