Skip to content

Commit cdf8f42

Browse files
committed
Initial introduction of request caching
1 parent 8fbfe5a commit cdf8f42

File tree

5 files changed

+412
-34
lines changed

5 files changed

+412
-34
lines changed

Project.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ authors = ["TEC <git@tecosaur.net>"]
44
version = "0.1.0"
55

66
[deps]
7+
BaseDirs = "18cc8868-cbac-4acf-b575-c8ff214dc66f"
8+
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
79
Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
810
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
911
StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
@@ -20,6 +22,8 @@ TableExt = "Tables"
2022
XMLExt = "XML"
2123

2224
[compat]
25+
BaseDirs = "1.0"
26+
Dates = "1.6"
2327
Downloads = "1.6"
2428
JSON3 = "1"
2529
PrecompileTools = "1.2"

src/RestClient.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ export setfield
1414

1515
include("types.jl")
1616
include("interface.jl")
17+
include("caching.jl")
1718
include("requests.jl")
1819
include("utilities.jl")
1920
include("endpoint.jl")
@@ -28,4 +29,6 @@ macro reexport()
2829
:(export Single, List, nextpage)
2930
end
3031

32+
__init__() = atexit(cleancache)
33+
3134
end

src/caching.jl

Lines changed: 341 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,341 @@
1+
using Dates
2+
using BaseDirs
3+
4+
"""
5+
CACHE_STALE_DURATION = 2 weeks (in seconds)
6+
7+
How long a cached entry will be kept for after becoming stale,
8+
before being deleted.
9+
"""
10+
const CACHE_STALE_DURATION = 2 * 24 * 60 * 60
11+
12+
13+
# Request caching and reading
14+
15+
"""
16+
dumpresponse(io::IO,, res::Response, body::IO)
17+
18+
Dump a response to an IO stream, including the response (`res`) URL, headers,
19+
and message `body`.
20+
21+
The `body` is restored to its original position after writing.
22+
"""
23+
function dumpresponse(io::IO, res::Downloads.Response, body::IO)
24+
println(io, res.url)
25+
println(io, res.message)
26+
for (k, v) in res.headers
27+
println(io, k, ": ", v)
28+
end
29+
println(io)
30+
mark(body)
31+
write(io, body)
32+
reset(body)
33+
nothing
34+
end
35+
36+
"""
37+
tryreadresponse(io::IO) -> Response
38+
39+
Read a `Response` from an IO stream including the url, status, and headers.
40+
41+
The message body is not read, and the IO stream is left positioned at the
42+
beginning of the message body.
43+
"""
44+
function tryreadresponse(io::IO)
45+
url = readline(io)
46+
message = readline(io)
47+
components = split(message, ' ', limit=3)
48+
length(components) >= 2 || return
49+
proto = eachsplit(components[1], '/') |> first |> lowercase
50+
status = tryparse(Int, components[2])
51+
!isnothing(status) || return
52+
headers = Pair{String, String}[]
53+
while !eof(io)
54+
if peek(io) == UInt8('\n')
55+
read(io, 1)
56+
break
57+
end
58+
name = readuntil(io, ':')
59+
!eof(io) && read(io, UInt8) == UInt8(' ') || return
60+
value = readline(io)
61+
push!(headers, name => value)
62+
end
63+
Downloads.Response(proto, url, status, message, headers)
64+
end
65+
66+
67+
# Caching requests to an endpoint
68+
69+
cachedir() = BaseDirs.User.cache(BaseDirs.Project("RestClient"))
70+
71+
function cachekey(url::String, headers::Union{<:AbstractVector, <:AbstractDict}, payload::Union{<:IO, Nothing})
72+
h = hash(url)
73+
for (k, v) in headers
74+
h = hash(v, hash(k, h))
75+
end
76+
if !isnothing(payload)
77+
mark(payload)
78+
# This sort of hashing is quite sub-optimal, but
79+
# it's way faster than most network speeds and
80+
# I don't think it's worth having some adversarial
81+
# protection here. The only stdlibs available are
82+
# CRC32c (trivially reversible) and slow SHA implementations.
83+
# If this proves to be a bottleneck, we could grab
84+
# `KangarooTwelve` or similar.
85+
while !eof(payload)
86+
peek(payload) # Refill buffer
87+
for b in readavailable(payload)
88+
h = hash(b, h)
89+
end
90+
end
91+
reset(payload)
92+
end
93+
string(h, base=10+26)
94+
end
95+
96+
"""
97+
cachelifetime(req::Request, res::Response) -> Union{DateTime, Integer, Nothing}
98+
99+
Determine the expiry time of a cached response, based on the request and response.
100+
101+
# Specialisation
102+
103+
To specialise this function for a specific endpoint, define one of the following methods:
104+
105+
```
106+
cachelifetime(req::Request{kind, <:AbstractEndpoint}, res::Response)
107+
cachelifetime([conf::RequestConfig], endpoint::AbstractEndpoint, res::Response)
108+
```
109+
"""
110+
function cachelifetime end
111+
112+
cachelifetime(req::Request, res::Response) =
113+
cachelifetime(req.config, req.endpoint, res)
114+
115+
cachelifetime(@nospecialize(::RequestConfig), endpoint::AbstractEndpoint, res::Response) =
116+
cachelifetime(endpoint, res)
117+
118+
cachelifetime(@nospecialize(::AbstractEndpoint), res::Response) =
119+
cachelifetime(res)
120+
121+
cachelifetime(@nospecialize ::Response) = nothing
122+
123+
"""
124+
expirytime(headers::Vector{Pair{String, String}}) -> Integer
125+
126+
Determine the expiry time of a response based on the headers.
127+
128+
The time is returned as a Unix timestamp, or `0` if the response should not be cached.
129+
130+
If set, the `Cache-Control` header is used to determine the expiry time.
131+
The directives `no-cache`, `no-store`, and `private` prevent caching, while
132+
`must-revalidate` forces revalidation, and `max-age` and `s-maxage` set the
133+
expiry time.
134+
135+
If `Cache-Control` is not set, the `Expires` header is used to determine the
136+
expiry time.
137+
138+
Should the headers not contain any information about expiry, `0` is returned.
139+
"""
140+
function expirytime(headers::Vector{Pair{String, String}})
141+
cachecontrol = ""
142+
expiry = ""
143+
for (k, v) in headers
144+
if k == "cache-control"
145+
cachecontrol = lowercase(v)
146+
elseif k == "expires"
147+
expiry = v
148+
end
149+
end
150+
isempty(cachecontrol) && return 0
151+
directives = map(strip, split(cachecontrol, ','))
152+
any(d -> d ("no-cache", "no-store", "private"), directives) && return 0
153+
"must-revalidate" directives && return trunc(Int, time())
154+
for directive in directives
155+
if '=' in directive
156+
components = split(directive, '=', limit=2)
157+
if length(components) == 2 && first(components) ("max-age", "s-maxage")
158+
age = tryparse(UInt, strip(last(components)))
159+
!isnothing(age) && return trunc(Int, time()) + age
160+
end
161+
end
162+
end
163+
if !isempty(expiry)
164+
dtime = tryparse(HTTP_DATE_FORMAT, expiry)
165+
!isnothing(dtime) && return trunc(Int, datetime2unix(dtime))
166+
end
167+
0
168+
end
169+
170+
171+
# File age checking
172+
173+
"""
174+
expirytime(path::String) -> Integer
175+
176+
Return the expiry time of a cached response stored at `path`.
177+
178+
On supporting Linux systems, this uses a filesystem extended attribute to store
179+
the expiry time.
180+
"""
181+
function expirytime(path::String)
182+
@something(expirytime_xattr(path),
183+
Some(expirytime_read(path)))
184+
end
185+
186+
@static if Sys.islinux()
187+
function expirytime_xattr(path::String)
188+
unixtime = Ref{Int64}()
189+
nb = @ccall getxattr(path::Cstring, "user.expirytime"::Cstring, unixtime::Ptr{Int64}, sizeof(Int64)::Csize_t)::Csize_t
190+
if nb > 0
191+
unixtime[]
192+
end
193+
end
194+
195+
function setexpiry(path::String, unixtime::Integer)
196+
timeref = Ref(Int64(unixtime))
197+
@ccall setxattr(path::Cstring, "user.expirytime"::Cstring, timeref::Ptr{Int64}, sizeof(Int64)::Csize_t, 0::Cint)::Cint
198+
nothing
199+
end
200+
else
201+
expirytime_xattr(::String) = nothing
202+
setexpiry(::String) = nothing
203+
end
204+
205+
function expirytime_read(path::String)
206+
res = open(tryreadresponse, path)
207+
if !isnothing(res)
208+
expirytime(res.headers)
209+
end
210+
end
211+
212+
213+
# Performing a cached request
214+
215+
"""
216+
http_cached(method::String, url::String, payload::Union{<:IO, Nothing};
217+
headers::Union{<:AbstractVector, <:AbstractDict} = Pair{String, String}[],
218+
timeout::Float64 = Inf) -> Tuple{Response, IO, Bool}
219+
220+
Perform an HTTP request, using a cached response if available.
221+
"""
222+
function http_cached(method::String, url::String, payload::Union{<:IO, Nothing} = nothing;
223+
headers::Union{<:AbstractVector, <:AbstractDict} = Pair{String, String}[],
224+
timeout::Float64 = Inf)
225+
ckey = cachekey(url, headers, payload)
226+
cfile = joinpath(cachedir(), ckey * ".http")
227+
isfile(cfile) || @goto freshreq
228+
etime = expirytime(cfile)
229+
return if etime >= ceil(Int, time())
230+
io = open(cfile)
231+
res = tryreadresponse(io)
232+
if isnothing(res)
233+
close(io)
234+
rm(cfile)
235+
@goto freshreq
236+
end
237+
@debug debug_request("CACHE", url, headers, payload)
238+
res, io, true
239+
elseif etime + CACHE_STALE_DURATION >= ceil(Int, time())
240+
io = open(cfile)
241+
res = tryreadresponse(io)
242+
if isnothing(res)
243+
close(io)
244+
rm(cfile)
245+
@goto freshreq
246+
end
247+
mheaders = copy(headers)
248+
@debug S"{inverse,magenta,bold: CACHE } checking validity of stale entry"
249+
for (k, v) in res.headers
250+
if k == "etag"
251+
push!(mheaders, "if-none-match" => v)
252+
break
253+
elseif k == "last-modified"
254+
push!(mheaders, "if-modified-since" => v)
255+
break
256+
end
257+
end
258+
if length(mheaders) == length(headers)
259+
mtime = Dates.format(HTTP_DATE_FORMAT, ctime(cfile))
260+
push!(mheaders, "if-modified-since" => mtime)
261+
end
262+
eres, buf = http_request(method, url, payload; headers=mheaders, timeout)
263+
if eres.status == 304
264+
close(buf)
265+
@debug S"{inverse,magenta,bold: CACHE } stale entry is valid"
266+
fperm = filemode(cfile)
267+
chmod(cfile, fperm | 0o200)
268+
setexpiry(cfile, trunc(Int, time()))
269+
chmod(cfile, fperm)
270+
res, io, true
271+
else
272+
close(io)
273+
res, buf, false
274+
end
275+
else
276+
@goto freshreq
277+
end
278+
@label freshreq
279+
res, buf = http_request(method, url, payload; headers, timeout)
280+
res, buf, false
281+
end
282+
283+
function cachesave(req::Request, url::String, headers, payload, res::Response, body::IO)
284+
cachetime = cachelifetime(req, res)
285+
etime = if isnothing(cachetime)
286+
expirytime(res.headers)
287+
elseif cachetime isa DateTime
288+
datetime2unix(cachetime)
289+
else
290+
trunc(Int, time()) + cachetime
291+
end
292+
iszero(etime) && return
293+
cdir = cachedir()
294+
isdir(cdir) || mkpath(cdir)
295+
cfile = joinpath(cdir, cachekey(url, headers, payload) * ".http")
296+
open(cfile, "w") do io
297+
dumpresponse(io, res, body)
298+
end
299+
setexpiry(cfile, etime)
300+
chmod(cfile, 0o100444 & filemode(cfile)) # Make read-only
301+
nothing
302+
end
303+
304+
305+
# Cleanup
306+
307+
const CLEANUP_BLOCK_MAX_SIZE = 100
308+
309+
"""
310+
cleancache()
311+
312+
Remove some of the files under `cachedir()` that have expired.
313+
314+
Statistically, this will eventually remove all expired files.
315+
"""
316+
function cleancache()
317+
cachefiles = readdir(cachedir(), join=true)
318+
isempty(cachefiles) && return
319+
for i in eachindex(cachefiles)
320+
j = rand(axes(cachefiles, 1))
321+
cachefiles[i], cachefiles[j] = cachefiles[j], cachefiles[i]
322+
end
323+
bsize = clamp(length(cachefiles) ÷ 6, 1, CLEANUP_BLOCK_MAX_SIZE)
324+
strike = false
325+
cleantime = trunc(Int, time()) - CACHE_STALE_DURATION
326+
for block in Iterators.partition(cachefiles, bsize)
327+
nexpired = 0
328+
for cfile in block
329+
if expirytime(cfile) < cleantime
330+
rm(cfile)
331+
nexpired += 1
332+
end
333+
end
334+
if iszero(nexpired)
335+
strike && break
336+
strike = true
337+
else
338+
strike = false
339+
end
340+
end
341+
end

0 commit comments

Comments
 (0)