Permalink
Browse files

cloder to a working version

  • Loading branch information...
ngrunwald committed Nov 11, 2010
1 parent 10c01a1 commit d080986b9ee442a8134a9c1d7c4e165f005039e8
Showing with 53 additions and 25 deletions.
  1. +1 −0 .gitignore
  2. +5 −5 project.clj
  3. +0 −20 src/clj_html_scrapper/core.clj
  4. +47 −0 src/clj_htmlunit/core.clj
View
@@ -0,0 +1 @@
+lib
View
@@ -1,6 +1,6 @@
(defproject clj-htmlunit "0.0.1-SNAPSHOT"
- :description "FIXME: write"
- :dependencies [[org.clojure/clojure "1.2.0-beta1"]
- [org.clojure/clojure-contrib "1.2.0-beta1"]
- [net.sourceforge.htmlunit/htmlunit "2.7"]]
- :dev-dependencies [[swank-clojure "1.2.1"]])
+ :description "Clojure simple wrapper for htmlunit"
+ :dependencies [[org.clojure/clojure "1.2.0"]
+ [org.clojure/clojure-contrib "1.2.0"]
+ [net.sourceforge.htmlunit/htmlunit "2.8"]]
+ :dev-dependencies [[swank-clojure "1.3.0-SNAPSHOT"]])
@@ -1,20 +0,0 @@
-(ns clj-htmlunit.core
- (:import (com.gargoylesoftware.htmlunit WebClient)))
-
-(def *client* (new WebClient))
-
-(defn get-page
- [url]
- (.getPage *client* url))
-
-(defn get-nodes-by-xpath
- [node xpath]
- (.getByXPath node xpath))
-
-(defn get-node-anchors
- [node]
- (get-nodes-by-xpath node "//a"))
-
-(defn get-nodes-anchors
- [nodes]
- (flatten (map #(get-node-anchors %) nodes)))
View
@@ -0,0 +1,47 @@
+(ns clj-htmlunit.core
+ (:import (com.gargoylesoftware.htmlunit WebClient BrowserVersion))
+ (:use [clojure.string :only [upper-case]]))
+
+(def *client*)
+
+(defn make-client
+ ([]
+ (make-client (.getNickname (BrowserVersion/getDefault))))
+ ([version]
+ (let [v (upper-case version)]
+ (new WebClient
+ (cond
+ (= v "IE7") BrowserVersion/INTERNET_EXPLORER_7
+ (= v "IE6") BrowserVersion/INTERNET_EXPLORER_6
+ (= v "IE8") BrowserVersion/INTERNET_EXPLORER_8
+ (= v "FF3") BrowserVersion/FIREFOX_3
+ (= v "FF3.6") BrowserVersion/FIREFOX_3_6
+ :else (throw (Exception. (str "Browser " version " is unknown"))))))))
+
+(defn get-page
+ [client url]
+ (.getPage client url))
+
+(defn get-nodes-by-xpath
+ [node xpath]
+ (.getByXPath node xpath))
+
+(defn get-first-node-by-xpath
+ [node xpath]
+ (.getFirstByXPath node xpath))
+
+(defn get-node-anchors
+ [node]
+ (get-nodes-by-xpath node "//a"))
+
+(defn get-nodes-anchors
+ [nodes]
+ (flatten (map #(get-node-anchors %) nodes)))
+
+(defn get-node-attributes
+ [node]
+ (let [attrs (.getAttributes node)
+ length (.getLength attrs)
+ items (map #(.item attrs %) (range 0 length))
+ hash (reduce #(merge %1 {(keyword (.getName %2)) (.getValue %2)}) {} items)]
+ hash))

0 comments on commit d080986

Please sign in to comment.