-
Notifications
You must be signed in to change notification settings - Fork 10
/
cache.clj
114 lines (100 loc) · 3.29 KB
/
cache.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
;;;; Skyscraper - Cache backends
(ns skyscraper.cache
(:refer-clojure :exclude [load load-string])
(:require [clojure.edn :as edn]
[clojure.java.io :as io])
(:import [java.io Closeable InputStream OutputStream]))
;;; Netstrings
(let [bytes-type (type (byte-array []))]
(defn- byte-array? [item]
(= (type item) bytes-type)))
(defn- write-netstring
[^OutputStream stream item]
(let [^bytes b (cond (byte-array? item) item
(string? item) (.getBytes item)
:otherwise (.getBytes (pr-str item)))]
(.write stream (.getBytes (str (count b))))
(.write stream (int \:))
(.write stream b)
(.write stream (int \,))))
(defn- read-netstring
[^InputStream stream]
(loop [len 0]
(let [ch (.read stream)]
(cond (<= 48 ch 57) (recur (+ (* 10 len) ch -48))
(= ch 58) (let [arr (byte-array len)]
(.read stream arr)
(assert (= (.read stream) 44))
arr)
:otherwise (throw (Exception. "colon needed after length"))))))
;;; Actual cache
(defprotocol CacheBackend
"Provides facilities for caching downloaded blobs (typically HTML),
potentially enriched with some metadata (typically headers), in
some kind of storage. Implementations of this protocol can be passed
as `:html-cache` and `:processed-cache` options to
[[skyscraper.core/scrape]]."
(save-blob [cache key blob metadata])
(load-blob [cache key]))
;; An implementation of CacheBackend that stores the blobs in a
;; filesystem under a specific directory (root-dir). The blobs are
;; stored as netstrings (http://cr.yp.to/proto/netstrings.txt),
;; prefixed with metadata EDN also stored as a netstring. The
;; filenames correspond to the stored keys. root-dir must end in a
;; path separator (/).
(deftype FSCache
[root-dir]
CacheBackend
(save-blob [cache key blob metadata]
(let [meta-str (pr-str metadata)
file (str root-dir key)]
(io/make-parents file)
(with-open [f (io/output-stream file)]
(write-netstring f metadata)
(write-netstring f blob))))
(load-blob [cache key]
(try
(with-open [f (io/input-stream (str root-dir key))]
(let [meta-blob (read-netstring f)
blob (read-netstring f)]
{:meta (edn/read-string (String. meta-blob))
:blob blob}))
(catch Exception _ nil)))
Closeable
(close [cache]
nil))
(defn fs
"Creates a filesystem-based cache backend with a given root directory."
[root-dir]
(FSCache. (str root-dir "/")))
;; A dummy implementation of CacheBackend that doesn't actually cache data.
(deftype NullCache
[]
CacheBackend
(save-blob [_ _ _ _] nil)
(load-blob [_ _] nil)
Closeable
(close [_] nil))
(defn null
"Creates a null cache backend."
[]
(NullCache.))
(extend-protocol CacheBackend
nil
(save-blob [_ _ _ _] nil)
(load-blob [_ _] nil))
;; An in-memory implementation of CacheBackend backed by two atoms.
(deftype MemoryCache
[storage]
CacheBackend
(save-blob [cache key blob metadata]
(swap! storage assoc key {:blob blob, :meta metadata}))
(load-blob [cache key]
(@storage key))
Closeable
(close [cache]
nil))
(defn memory
"Creates a memory cache backend."
[]
(MemoryCache. (atom {})))