Browse files

cleaning up the code and commenting

  • Loading branch information...
1 parent 50df294 commit 1800ab092abb061c7f6a8e96b1f5cfaa8354d90c @rippinrobr committed Nov 15, 2011
Showing with 13 additions and 12 deletions.
  1. +2 −1 hugoclr.clj
  2. +2 −2 hugoclr/data/csv.clj
  3. +9 −9 hugoclr/parser.clj
@@ -1,6 +1,7 @@
;; I ran gacutil -i HtmlAgilityPack in the libs dir as administrator
;; to load the dll into the gac for simplicity.
-(System.Reflection.Assembly/LoadWithPartialName "HtmlAgilityPack")
+;;(System.Reflection.Assembly/LoadWithPartialName "HtmlAgilityPack")
+(assembly-load-file "..\\libs\\HtmlAgilityPack.dll")
(ns hugoclr
@@ -1,10 +1,10 @@
-(defn clean
+(defn- clean
(.Replace (.Replace (.Replace (.Replace (.Replace val "," ";") "<em>" "") "</em>" "") "&#8217;" "'") "&amp;" "&"))
-(defn delimit
+(defn- delimit
[year books]
(map #(str year "," (:winner %) ","
(clean (:title %)) ","
@@ -23,53 +23,53 @@
(println (str "fetching " url ))
(.Load (new HtmlAgilityPack.HtmlWeb) url))
-(defn get-html-elements [url xpath]
+(defn- get-html-elements [url xpath]
"Gets all <a> that match the xpath and returns a collection of .NET objects that
represents the <a> nodes"
(let [nodes (.SelectNodes (.DocumentNode (hugoclr.parser/fetch-url url)) xpath)]
-(defn validate-award-link
+(defn- validate-award-link
"Filters out all non-award links so that I only retrieve pages that list nominees and
[url] (re-matches #".*hugo-history.*/.+" (.Value (first (.Attributes url)))))
-(defn get-year
+(defn- get-year
"Gets the year for the category being parsed. It retrieves the year from the <h2> tag.
It traverses the DOM heirarchy to get to the h2 tag and grab the text. Then it takes the
first 4 chars which represents the year."
[p-node] (apply str (take 4 (.InnerHtml (second (.ChildNodes (.ParentNode p-node)))))))
-(defn get-work-title
+(defn- get-work-title
"parses the books/works title from the em tags."
[li-node] (.InnerHtml (first (.ChildNodes li-node))))
-(defn get-category-heading
+(defn- get-category-heading
"parses the category's title. We could use this code to grab all of the categories. I'm
only interested in the novels."
[p-node] (.InnerHtml (first (.SelectNodes p-node "./strong"))))
-(defn check-for-winner
+(defn- check-for-winner
"checks to see if the class attribute of the li tag is set to winner. If so the work in
question was the winner in the HUGO category."
(if (and (not (nil? (.Attributes li-node))) (> (.Count (.Attributes li-node)) 0))
(= "winner" (.Value (first (.Attributes li-node))))
-(defn create-work-record
+(defn- create-work-record
"Simply creates a record that represents a work "
(Work. (check-for-winner li-node) (get-work-title li-node)
(last (re-matches #".*</em>\s*(by|,)\s+(.*)\s+[\[\(].*" (.InnerHtml li-node)))
(last (re-matches #".*[\(\[](.*)[\)\]].*" (.InnerHtml li-node)))))
-(defn create-works-seq
+(defn- create-works-seq
"Creates all of the works in a given category represented by the sequence of li tags passed
[lis] (map create-work-record (seq lis)))
-(defn create-category-record
+(defn- create-category-record
"Creates a record that represents the category represented by the passed in ul tag."
(let [p-node (.PreviousSibling (.PreviousSibling ul))

0 comments on commit 1800ab0

Please sign in to comment.