Skip to content
Browse files

Basic Flipkart parsing

  • Loading branch information...
1 parent 27aed25 commit 3e6db502b65409532366610ba062eb7da68c0465 @swaroopch committed Jul 27, 2013
Showing with 110 additions and 8 deletions.
  1. +27 −1 resources/book.mustache
  2. +1 −1 resources/header.mustache
  3. +2 −2 src/isbnnetinclj2/handler.clj
  4. +67 −4 src/isbnnetinclj2/store.clj
  5. +13 −0 src/isbnnetinclj2/utils.clj
View
28 resources/book.mustache
@@ -2,12 +2,38 @@
<div class="container-fluid">
<div class="row-fluid">
- <div class="span8 offset2">
+ <div class="span4 offset2">
<dl>
+ {{#isbn}}
<dt>ISBN</dt>
<dd>{{isbn}}</dd>
+ {{/isbn}}
+
+ {{#title}}
+ <dt>Title</dt>
+ <dd>{{title}}</dd>
+ {{/title}}
+
+ {{#author}}
+ <dt>Author</dt>
+ <dd>{{author}}</dd>
+ {{/author}}
+
+ {{#publisher}}
+ <dt>Publisher</dt>
+ <dd>{{publisher}}</dd>
+ {{/publisher}}
</dl>
</div>
+ <div class="span4">
+ <p>
+ {{#image}}
+ <img src="{{image}}"
+ alt="{{title}}"
+ title="Image from Flipkart for {{title}}"/>
+ {{/image}}
+ </p>
+ </div>
</div>
</div>
View
2 resources/header.mustache
@@ -1,7 +1,7 @@
<!DOCTYPE html>
<html lang="en">
<head>
- <title>{{title}} | A quick way to find online book prices in India</title>
+ <title>{{pageTitle}} | A quick way to find online book prices in India</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
View
4 src/isbnnetinclj2/handler.clj
@@ -8,11 +8,11 @@
(defn front-page
[]
- (mus/render-file "frontpage" {:title "isbn.net.in"}))
+ (mus/render-file "frontpage" {:pageTitle "isbn.net.in"}))
(defn about-page
[]
- (mus/render-file "about" {:title "About isbn.net.in"}))
+ (mus/render-file "about" {:pageTitle "About isbn.net.in"}))
(defroutes app-routes
(GET "/" [] (front-page))
View
71 src/isbnnetinclj2/store.clj
@@ -1,8 +1,71 @@
(ns isbnnetinclj2.store
- (:require [stencil.core :as mus]))
+ (:require [taoensso.timbre :as log]
+ [isbnnetinclj2.utils :as utils]
+ [net.cgrand.enlive-html :as html]
+ [clojure.string :as string]
+ [stencil.core :as mus]))
+
+
+(defn pick-from-content
+ [content path]
+ (first
+ (html/select
+ content
+ path)))
+
+
+(defn parse-text-from-content
+ [content path]
+ (string/trim
+ (pick-from-content content path)))
+
+
+(defn parse-price-from-content
+ [content path]
+ (let [text (parse-text-from-content content path)]
+ (if (empty? text)
+ (Integer/MAX_VALUE)
+ (try
+ (Float/parseFloat
+ (last
+ (re-seq #"\d+(?:\.\d+)?"
+ (string/trim
+ (string/replace
+ (str text)
+ ","
+ "")))))
+ (catch Exception x
+ (do
+ (log/error (str x))
+ (str x)))))))
+
+
+(defn flipkart-url
+ [isbn]
+ (format
+ "http://www.flipkart.com/books/pr?q=%s&sid=bks&as=off&as-show=off&otracker=start&affid=INSwaroCom"
+ isbn))
+
+
+(defn fetch-flipkart
+ [isbn]
+ (log/debug (format "Fetching flipkart for %s" isbn))
+ (let [url (flipkart-url isbn)
+ content (utils/fetch-page url)]
+ {:isbn isbn
+ :title (parse-text-from-content
+ content
+ [:div.mprod-summary-title :h1 html/content])
+ :image (get-in (pick-from-content
+ content
+ [:div#mprodimg-id :img])
+ [:attrs :data-src])}))
+
(defn book-page
[isbn]
- (mus/render-file "book"
- {:isbn isbn
- :title isbn}))
+ (let [flipkart-details (fetch-flipkart isbn)]
+ (mus/render-file "book"
+ (merge {:isbn isbn
+ :pageTitle (:title flipkart-details)}
+ flipkart-details))))
View
13 src/isbnnetinclj2/utils.clj
@@ -0,0 +1,13 @@
+(ns isbnnetinclj2.utils
+ (:require [net.cgrand.enlive-html :as html]
+ [clj-time.core :as time]))
+
+
+(defn fetch-page
+ [url]
+ (html/html-resource (java.net.URL. url)))
+
+
+(defn twenty-four-hours-ago
+ []
+ (time/minus (time/now) (time/days 1)))

0 comments on commit 3e6db50

Please sign in to comment.
Something went wrong with that request. Please try again.