Permalink
Browse files

Add download feedbooks clj file

  • Loading branch information...
1 parent df23a59 commit 01d099ff305c3e31f517e26af3728fcfeabdd7e7 feng committed Apr 4, 2011
@@ -1,10 +1,13 @@
package feedbooks;
+// this is the first step of download public books of feedbooks
+
import java.io.File;
import java.io.IOException;
import java.net.URL;
public class FeedBooksDownload {
+
public static void main(String[] args) throws IOException {
@@ -0,0 +1,29 @@
+; every books in feedbooks has an an atom, it has all meta & links to comments,ect
+
+(ns feedbooks.download-entry-atom
+ (:use [clojure.contrib.pprint :only [pprint]])
+ (:require [clojure.contrib.lazy-xml :as xml])
+ (:import [java.io File]
+ [java.net URL]
+ [feedbooks DownloadManager]))
+
+
+(def seedroot (File. "/tmp/feedbooks/pop"))
+
+(defn download []
+ (let [files (filter #(re-find #"opds" (str %)) (file-seq seedroot))]
+ (doseq [file files]
+ (let [xml-tree (xml/parse-trim file)
+ entries (filter #(= (:tag %) :entry) (:content xml-tree))]
+ (doseq [entry entries]
+ (let [link (filter (fn [e]
+ (and
+ (= (:tag e) :link)
+ (= (-> e :attrs :type) "application/atom+xml;type=entry")))
+ (:content entry))
+ href (-> (first link) :attrs :href)
+ filename (last (re-find #"book/(.+)" href))
+ uri (URL. href)
+ dest (File. (str "/tmp/books/" filename))]
+ (DownloadManager/download uri dest)
+ (println filename " " href)))))))
@@ -0,0 +1,57 @@
+;generate download list -> meta.txt, feeded to DownloadManager.java
+
+(ns feedbooks.gen-download-list
+ (:use [clojure.contrib.pprint :only [pprint]])
+ (:require [clojure.contrib.lazy-xml :as xml])
+ (:import [java.io File FileWriter]
+ [java.net URL]
+ [feedbooks DownloadManager]))
+
+
+(def seedroot (File. "/tmp/books/"))
+
+(def allfiles (filter #(re-find #"atom" (str %)) (file-seq seedroot)))
+
+(defn get-links [xml]
+ (filter #(= (:tag %) :link) (:content xml)))
+
+(defn get-id [file]
+ (last (re-find #"books/(\d+)" (str file))))
+
+(defn cover-uri [id]
+ (str "http://covers.feedbooks.net/book/" id ".jpg?t=1301658884"))
+
+(defn thumb-uri [id]
+ (str "http://covers.feedbooks.net/book/" id ".jpg?size=thumbnail&t=1301658884"))
+
+(defn categories-uri [id]
+ (str "http://www.feedbooks.com/book/" id "/categories.atom"))
+
+(defn comments-uri [id]
+ (str "http://www.feedbooks.com/book/" id "/comments.atom"))
+
+(defn similar-uri [id]
+ (str "http://www.feedbooks.com/book/" id "/similar.atom"))
+
+(defn download-links [links]
+ (map #(-> % :attrs :href)
+ (filter
+ #(= "http://opds-spec.org/acquisition" (-> % :attrs :rel)) links)))
+
+(defn get-name [uri]
+ (re-find #"\d+\..+$" uri))
+
+(defn download []
+ (binding [*out* (FileWriter. "/tmp/meta.txt")]
+ (doseq [file allfiles]
+ (let [xml-tree (xml/parse-trim file)
+ links (get-links xml-tree)
+ file-links (download-links links)
+ id (get-id file)]
+ (println (str id ".jpg\t") (cover-uri id))
+ (println (str id "-thumb.jpg\t") (thumb-uri id))
+ (println (str id "-categories.atom\t") (categories-uri id))
+ (println (str id "-comments.atom\t") (comments-uri id))
+ (println (str id "-similar.atom\t") (similar-uri id))
+ (doseq [link file-links]
+ (println (get-name link) "\t" link))))))

0 comments on commit 01d099f

Please sign in to comment.