Permalink
Browse files

cleaning up the code and commenting

  • Loading branch information...
1 parent 50df294 commit 1800ab092abb061c7f6a8e96b1f5cfaa8354d90c @rippinrobr committed Nov 15, 2011
Showing with 13 additions and 12 deletions.
  1. +2 −1 hugoclr.clj
  2. +2 −2 hugoclr/data/csv.clj
  3. +9 −9 hugoclr/parser.clj
View
@@ -1,6 +1,7 @@
;; I ran gacutil -i HtmlAgilityPack in the libs dir as administrator
;; to load the dll into the gac for simplicity.
-(System.Reflection.Assembly/LoadWithPartialName "HtmlAgilityPack")
+;;(System.Reflection.Assembly/LoadWithPartialName "HtmlAgilityPack")
+(assembly-load-file "..\\libs\\HtmlAgilityPack.dll")
(ns hugoclr
(:gen-class)
@@ -1,10 +1,10 @@
(ns hugoclr.data.csv)
-(defn clean
+(defn- clean
[val]
(.Replace (.Replace (.Replace (.Replace (.Replace val "," ";") "<em>" "") "</em>" "") "&#8217;" "'") "&amp;" "&"))
-(defn delimit
+(defn- delimit
[year books]
(map #(str year "," (:winner %) ","
(clean (:title %)) ","
View
@@ -23,53 +23,53 @@
(println (str "fetching " url ))
(.Load (new HtmlAgilityPack.HtmlWeb) url))
-(defn get-html-elements [url xpath]
+(defn- get-html-elements [url xpath]
"Gets all <a> that match the xpath and returns a collection of .NET objects that
represents the <a> nodes"
(let [nodes (.SelectNodes (.DocumentNode (hugoclr.parser/fetch-url url)) xpath)]
nodes))
-(defn validate-award-link
+(defn- validate-award-link
"Filters out all non-award links so that I only retrieve pages that list nominees and
winners."
[url] (re-matches #".*hugo-history.*/.+" (.Value (first (.Attributes url)))))
-(defn get-year
+(defn- get-year
"Gets the year for the category being parsed. It retrieves the year from the <h2> tag.
It traverses the DOM heirarchy to get to the h2 tag and grab the text. Then it takes the
first 4 chars which represents the year."
[p-node] (apply str (take 4 (.InnerHtml (second (.ChildNodes (.ParentNode p-node)))))))
-(defn get-work-title
+(defn- get-work-title
"parses the books/works title from the em tags."
[li-node] (.InnerHtml (first (.ChildNodes li-node))))
-(defn get-category-heading
+(defn- get-category-heading
"parses the category's title. We could use this code to grab all of the categories. I'm
only interested in the novels."
[p-node] (.InnerHtml (first (.SelectNodes p-node "./strong"))))
-(defn check-for-winner
+(defn- check-for-winner
"checks to see if the class attribute of the li tag is set to winner. If so the work in
question was the winner in the HUGO category."
[li-node]
(if (and (not (nil? (.Attributes li-node))) (> (.Count (.Attributes li-node)) 0))
(= "winner" (.Value (first (.Attributes li-node))))
false))
-(defn create-work-record
+(defn- create-work-record
"Simply creates a record that represents a work "
[li-node]
(Work. (check-for-winner li-node) (get-work-title li-node)
(last (re-matches #".*</em>\s*(by|,)\s+(.*)\s+[\[\(].*" (.InnerHtml li-node)))
(last (re-matches #".*[\(\[](.*)[\)\]].*" (.InnerHtml li-node)))))
-(defn create-works-seq
+(defn- create-works-seq
"Creates all of the works in a given category represented by the sequence of li tags passed
in"
[lis] (map create-work-record (seq lis)))
-(defn create-category-record
+(defn- create-category-record
"Creates a record that represents the category represented by the passed in ul tag."
[ul]
(let [p-node (.PreviousSibling (.PreviousSibling ul))

0 comments on commit 1800ab0

Please sign in to comment.