Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

New schema

Mostly back to where we were before, but with far more data for each
form.
  • Loading branch information...
commit 49d774acda41f02a8e424f1837fdb061d2f456df 1 parent 98cdb35
@zmaril zmaril authored
Showing with 216 additions and 83 deletions.
  1. +28 −22 src/echelon/core.clj
  2. +150 −33 src/echelon/load.clj
  3. +38 −28 src/echelon/schema.clj
View
50 src/echelon/core.clj
@@ -13,37 +13,40 @@
(let [f #(-> (d/q '[:find (count ?being)
:in $ ?type
:where
- [?r :being/type ?type]
- [?r :being/represents ?being]]
+ [?r :record/type ?type]
+ [?r :record/represents ?being]]
dbc
%)
ffirst)]
- {:clients (f :being.type/:client)
- :firms (f :being.type/:firm)}))
+ {:clients (f :lobbying.record/client)
+ :registrant (f :lobbying.record/registrant)
+ :lobbyist (f :lobbying.record/lobbyist)
+ :activity (f :lobbying.record/activity)
+ :contact (f :lobbying.record/contact)}))
(defn merges-for-beings [dbc [b1 & b2s]]
(let [records (map
#(d/q '[:find ?record
:in $ ?being
- :where [?record :being/represents ?being]]
+ :where [?record :record/represents ?being]]
dbc
%)
b2s)
- adds (mapv #(vector :db/add (ffirst %) :being/represents b1)
+ adds (mapv #(vector :db/add (ffirst %) :record/represents b1)
records)
retracts (map #(vector :db.fn/retractEntity %) b2s)
data (vec (concat adds retracts))]
data))
-(def rules '[[(name-of ?record ?name) [?record :client/name ?name]]
- [(name-of ?record ?name) [?record :firm/name ?name]]])
+(def rules '[[(name-of ?record ?name) [?record :lobbying.client/name ?name]]
+ [(name-of ?record ?name) [?record :lobbying.registrant/name ?name]]])
(defn beings-and-names [dbc]
(d/q '[:find ?being ?name :in $ %
:where
- [?being :being/type :being.type/:being]
- [?record :being/represents ?being]
+ [?being :record/type :being.record/being]
+ [?record :record/represents ?being]
(name-of ?record ?name)]
dbc
rules))
@@ -81,11 +84,15 @@
(defn load-data []
(d/delete-database uri)
(d/create-database uri)
- (def c (d/connect uri))
- (println "Loading Database...")
- (load-database! c)
- (println "Loaded!")
- (println (how-many? (db c))))
+ (let [c (d/connect uri)]
+ (println "Loading Database...")
+ (load-database! c)
+ (println "Loaded!")
+ (println (how-many? (db c)))))
+
+(defn print-status []
+ (let [c (d/connect uri)]
+ (println (how-many? (db c)))))
(defn match-data []
(as-> (db (d/connect uri)) hypothetical
@@ -95,8 +102,8 @@
:in $ %
:with ?record
:where
- [?being :being/type :being.type/:being]
- [?record :being/represents ?being]
+ [?being :record/type :being.record/being]
+ [?record :record/represents ?being]
(name-of ?record ?name)]
hypothetical
rules)
@@ -107,12 +114,11 @@
(sort-by (comp first second))
pprint
with-out-str
- (spit "names-output.clj"))))
+ (spit "output/names-output.clj"))))
(defn -main [arg]
(condp = arg
- "load" (load-data)
- "match" (do
- (load-data)
- (match-data)))
+ "load" (load-data)
+ "match" (match-data)
+ "status" (print-status))
(java.lang.System/exit 0))
View
183 src/echelon/load.clj
@@ -1,13 +1,15 @@
(ns echelon.load
(:require [datomic.api :as d :refer [db q]]
[clojure.data.json :as json]
+ [echelon.ali :refer [string->ali]]
[echelon.text :refer [clean]]
+ [echelon.schema :refer [schema]]
[me.raynes.fs :as fs]
[clojure.pprint :refer [pprint]]))
(def datadir "/home/zmaril/data/original/sopr_html/")
-(defn list-ld1-forms []
+(defn list-registration-forms []
(mapcat #(fs/glob (str datadir "/" % "/REG/*"))
(range 2008 2015)))
@@ -15,43 +17,128 @@
(for [y (range 2004 2015) q (range 1 5)]
(fs/glob (str datadir "LD2/" y "/Q" q "/*"))))
+(defn new-being [id]
+ {:db/id id
+ :record/type :being.record/being
+ :being/id (str (d/squuid))})
-(def counter (atom 0))
-
-(defn ld1-datoms [m]
- (let [client-name (some-> m :client :client_name clean)
- firm-name (some-> m :registrant :registrant_name clean)
- document-id (some-> m :document_id)
- blank? (some nil? [client-name firm-name document-id])]
- (when blank?
- (swap! counter inc))
- (if blank?
- []
- [{:db/id #db/id[:db.part/user -1]
- :being/type :being.type/:being}
- {:db/id #db/id[:db.part/user -2]
- :being/type :being.type/:client
- :client/name client-name
- :being/represents #db/id[:db.part/user -1]}
- {:db/id #db/id[:db.part/user -3]
- :being/type :being.type/:being}
- {:db/id #db/id[:db.part/user -4]
- :being/type :being.type/:firm
- :firm/name firm-name
- :being/represents #db/id[:db.part/user -3]}
- {:db/id #db/id[:db.part/tx -1]
- :data/document-id document-id
- :data/source :data.source/sopr-html}])))
+(defn registration-datoms [m]
+ (let [lobbyists (-> m :lobbyists :lobbyists)
+ contact-being-id (d/tempid :db.part/user)
+ client-being-id (d/tempid :db.part/user)
+ registrant-being-id (d/tempid :db.part/user)
+ activity-being-id (d/tempid :db.part/user)
+ lobbyist-being-ids
+ (repeatedly (count lobbyists) #(d/tempid :db.part/user))
+ affliated-organization-beings-ids []
+ foreign-entities-beings-ids []
+ beings
+ (map new-being (concat [contact-being-id client-being-id registrant-being-id
+ activity-being-id]
+ lobbyist-being-ids
+ affliated-organization-beings-ids
+ foreign-entities-beings-ids))
+ registration {:db/id (d/tempid :db.part/user)
+ :record/type :lobbying.record/registration
+ :lobbying.form/source :lobbying.form/sopr-html
+ :lobbying.form/document-id (:document_id m)
+
+ :lobbying.form/house-id
+ (get-in m [:identifiers :registrant_house_id])
+ :lobbying.form/senate-id
+ (get-in m [:identifiers :registrant_senate_id])
+
+ ;:lobbying.form/signature-date
+ ;(get-in m [:datetimes :signature_date])
+ ;:lobbying.registration/effective-date
+ ;(get-in m [:datetimes :effective_date])
+
+ :lobbying.form/client
+ (let [c (:client m)]
+ {:record/type :lobbying.record/client
+ :record/represents client-being-id
+ :lobbying.client/name (:client_name c)
+ :lobbying.client/description (:client_general_description c)
+
+ :lobbying.client/main-address
+ {:address/first-line (:client_address c)
+ :address/zipcode (:client_zip c)
+ :address/city (:client_city c)
+ :address/state (:client_state c)
+ :address/country (:client_country c)}
+
+ :lobbying.client/principal-place-of-business
+ {:address/zipcode (:client_ppb_zip c)
+ :address/city (:client_ppb_city c)
+ :address/state (:client_ppb_state c)
+ :address/country (:client_ppb_country c)}})
+
+ :lobbying.form/registrant
+ (let [r (:registrant m)]
+ {:record/type :lobbying.record/registrant
+ :record/represents registrant-being-id
+ :lobbying.registrant/name (:registrant_name r)
+ :lobbying.registrant/description (:registrant_general_description r)
+
+ :lobbying.registrant/main-address
+ {:address/first-line (:registrant_address_one r)
+ :address/second-line (:registrant_address_two r)
+ :address/zipcode (:registrant_zip r)
+ :address/city (:registrant_city r)
+ :address/state (:registrant_state r)
+ :address/country (:registrant_country r)}
+
+ :lobbying.registrant/principal-place-of-business
+ {:address/zipcode (:registrant_ppb_zip r)
+ :address/city (:registrant_ppb_city r)
+ :address/state (:registrant_ppb_state r)
+ :address/country (:registrant_ppb_country r)}})
+
+ :lobbying.form/contact
+ (let [r (:registrant m)]
+ {:record/type :lobbying.record/contact
+ :record/represents contact-being-id
+ :lobbying.contact/name (:registrant_contact r)
+ :lobbying.contact/phone (:registrant_phone r )
+ :lobbying.contact/email (:registrant_email r)})
+
+ :lobbying.registration/activity
+ {:record/type :lobbying.record/activity
+ :record/represents activity-being-id
+ :lobbying.activity/general-details
+ (-> m :lobbying_issues_detail :lobbying_issues_detail)
+ :lobbying.activity/issue-codes
+ (->> m :lobbying_issues :lobbying_issues
+ (map (comp string->ali :issue_code)))
+ :lobbying.activity/lobbyists
+ (map-indexed
+ #(do {:record/type :lobbying.record/lobbyist
+ :record/represents (nth lobbyist-being-ids %1)
+ :data/position %1
+ :lobbying.lobbyist/first-name
+ (:lobbyist_first_name %2)
+ :lobbying.lobbyist/last-name
+ (:lobbyist_last_name %2)
+ :lobbying.lobbyist/suffix
+ (:lobbyist_suffix %2)
+ :lobbying.lobbyist/covered-official-position
+ (:lobbyist_covered_official_position %2)})
+ lobbyists)}}]
+ (conj (vec beings)
+ registration)))
(defn load-data! [conn]
- (->> (list-ld1-forms)
+ (->> (list-registration-forms)
(map (comp
(partial d/transact conn)
- ld1-datoms
+ registration-datoms
#(json/read-str % :key-fn keyword)
slurp))
- doall)
+ doall
+ count
+ (str "Found this many files:" )
+ println)
(comment
(->> (list-ld2-forms)
(filter (complement nil?))
@@ -60,11 +147,41 @@
doall)))
(defn load-schema! [conn]
- (d/transact conn (read-string (slurp "src/echelon/schema.edn"))))
+ (d/transact conn schema))
(defn load-database! [conn]
(println "Schema loading...")
(load-schema! conn)
(println "Data loading...")
- (load-data! conn)
- (println (str "There are " @counter " blank forms")))
+ (load-data! conn))
+
+(comment
+ (->> (list-registration-forms)
+ (map (comp
+ registration-datoms
+ #(json/read-str % :key-fn keyword)
+ slurp))
+ first
+ doall))
+(comment
+ (->> (list-registration-forms)
+ (map (comp
+ #(json/read-str % :key-fn keyword)
+ slurp))
+ first
+ doall))
+
+
+(comment
+ (let [uri "datomic:free://localhost:4334/echelon"]
+ (d/delete-database uri)
+ (d/create-database uri)
+ (load-schema! (d/connect uri))
+ (->> (list-registration-forms)
+ (take 1)
+ (map (comp
+ (partial d/transact (d/connect uri))
+ registration-datoms
+ #(json/read-str % :key-fn keyword)
+ slurp))
+ doall)))
View
66 src/echelon/schema.clj
@@ -1,15 +1,16 @@
(ns echelon.schema
- (:require [echelon.ali :refer [alis-attributes]]))
+ (:require [echelon.ali :refer [alis-attributes]]
+ [datomic.api :refer [tempid]]))
;;Helper functions for datomic. We're not doing too many fancy things
;;here with datomic and the main struggle has just been understanding
;;the layout of the data, so we've abstracted away the creation of the
;;attribute maps for datomic.
-(defn- enum [key] {:db/id #db/id[:db.part/user] :db/ident key})
+(defn- enum [key] {:db/id (tempid :db.part/user) :db/ident key})
(defn- proto-prop [prop doc]
- {:db/id #db/id [:db.part/db]
+ {:db/id (tempid :db.part/db)
:db/ident prop
:db/doc doc
:db.install/_attribute :db.part/db})
@@ -19,6 +20,10 @@
(-> (proto-prop prop doc)
(merge m))))
+(def long-prop
+ (prop-fn {:db/valueType :db.type/long
+ :db/cardinality :db.cardinality/one}))
+
(def string-prop
(prop-fn {:db/valueType :db.type/string
:db/cardinality :db.cardinality/one}))
@@ -47,10 +52,14 @@
;;Various grouping of attributes from the schema
+(def data-attributes
+ [(long-prop :data/position
+ "Remembering the order in which we received the given data.")])
+
(def being-framework-attributes
- [(enum :being.records/being)
+ [(enum :being.record/being)
(string-prop :being/id "A uuid for the being")
- (ref-prop :records/type "A record's type.")
+ (ref-prop :record/type "A record's type.")
(ref-prop :record/represents
"Indicates that the record entity with this
attribute represents a being. This should be the
@@ -60,11 +69,12 @@
[(string-prop :address/first-line "First line for an address")
(string-prop :address/second-line "Second line for an address")
(string-prop :address/zipcode "Zipcode for an address")
+ (string-prop :address/city "City for an address")
(string-prop :address/state "State for an address")
(string-prop :address/country "Country for an address")])
(def client-attributes
- [(enum :lobbying.records/client)
+ [(enum :lobbying.record/client)
(string-prop :lobbying.client/name "Client name.")
(string-prop :lobbying.client/description "Client description.")
(component-prop :lobbying.client/main-address "Main address for the client.")
@@ -73,7 +83,7 @@
performed (bit.ly/1s3ZbG7)")])
(def registrant-attributes
- [(enum :lobbying.records/registrant)
+ [(enum :lobbying.record/registrant)
(string-prop :lobbying.registrant/name "Registrant name.")
(string-prop :lobbying.registrant/description "Registrant description.")
(component-prop :lobbying.registrant/main-address
@@ -83,14 +93,14 @@
performed (bit.ly/1s3ZbG7)")])
(def contact-attributes
- [(enum :lobbying.records/contact)
+ [(enum :lobbying.record/contact)
(string-prop :lobbying.contact/name-prefix "Contact name prefix.")
(string-prop :lobbying.contact/name "Contact name.")
(string-prop :lobbying.contact/phone "Contact phone.")
(string-prop :lobbying.contact/email "Contact email.")])
(def lobbyist-attributes
- [(enum :lobbying.records/lobbyist)
+ [(enum :lobbying.record/lobbyist)
(string-prop :lobbying.lobbyist/first-name "First name of lobbyist.")
(string-prop :lobbying.lobbyist/last-name "Last name of lobbyist.")
(string-prop :lobbying.lobbyist/suffix "Suffix of lobbyist.")
@@ -98,7 +108,7 @@
"No idea, this is often blank.")])
(def activity-attributes
- [(enum :lobbying.records/activity)
+ [(enum :lobbying.record/activity)
(string-prop :lobbying.activity/general-details
"Details about the lobbying generally done by the
registrant for the client on various issues.")
@@ -114,9 +124,9 @@
(component-props :lobbying.activity/lobbyists
"The foreign entities for the activity.")])
-;;(enum :lobbying.records/affiliated-organization)
-;;(enum :lobbying.records/foreign-entity)
-;;(enum :lobbying.records/individual)
+;;(enum :lobbying.record/affiliated-organization)
+;;(enum :lobbying.record/foreign-entity)
+;;(enum :lobbying.record/individual)
(def common-form-attributes
[;;Common parts of each form
@@ -138,11 +148,11 @@
"Potentially used, if the registrant is an individual for the form.")
(ref-prop :lobbying.form/source "Where the data came from.")
(enum :lobbying.form/sopr-html)
- (string-prop :lobbying.formd/document-id
+ (string-prop :lobbying.form/document-id
"Id of a document (provided by sopr).")])
(def registration-form-attributes
- [(enum :lobbying.records/registration)
+ [(enum :lobbying.record/registration)
(instant-prop :lobbying.registration/effective-date
"No idea what this one actually means.")
(component-props :lobbying.registration/affiliated-organizations
@@ -153,7 +163,7 @@
"Initial description of lobbying activity")])
(def report-form-attributes
- [(enum :lobbying.records/report)
+ [(enum :lobbying.record/report)
(component-props :lobbying.report/removed-foreign-entities
"Removed foreign entities.")
(component-props :lobbying.report/added-foreign-entities
@@ -176,15 +186,15 @@
])
(def schema
- (concat alis-attributes
- address-attributes
- being-framework-attributes
- client-attributes
- registrant-attributes
- contact-attributes
- lobbyist-attributes
- activity-attributes
- transaction-annotations-attributes
- common-form-attributes
- registration-form-attributes
- report-form-attributes))
+ (vec (concat data-attributes
+ alis-attributes
+ address-attributes
+ being-framework-attributes
+ client-attributes
+ registrant-attributes
+ contact-attributes
+ lobbyist-attributes
+ activity-attributes
+ common-form-attributes
+ registration-form-attributes
+ report-form-attributes)))
Please sign in to comment.
Something went wrong with that request. Please try again.