|
| 1 | +using Dates |
| 2 | +using BaseDirs |
| 3 | + |
| 4 | +""" |
| 5 | + CACHE_STALE_DURATION = 2 weeks (in seconds) |
| 6 | +
|
| 7 | +How long a cached entry will be kept for after becoming stale, |
| 8 | +before being deleted. |
| 9 | +""" |
| 10 | +const CACHE_STALE_DURATION = 2 * 24 * 60 * 60 |
| 11 | + |
| 12 | + |
| 13 | +# Request caching and reading |
| 14 | + |
| 15 | +""" |
| 16 | + dumpresponse(io::IO,, res::Response, body::IO) |
| 17 | +
|
| 18 | +Dump a response to an IO stream, including the response (`res`) URL, headers, |
| 19 | +and message `body`. |
| 20 | +
|
| 21 | +The `body` is restored to its original position after writing. |
| 22 | +""" |
| 23 | +function dumpresponse(io::IO, res::Downloads.Response, body::IO) |
| 24 | + println(io, res.url) |
| 25 | + println(io, res.message) |
| 26 | + for (k, v) in res.headers |
| 27 | + println(io, k, ": ", v) |
| 28 | + end |
| 29 | + println(io) |
| 30 | + mark(body) |
| 31 | + write(io, body) |
| 32 | + reset(body) |
| 33 | + nothing |
| 34 | +end |
| 35 | + |
| 36 | +""" |
| 37 | + tryreadresponse(io::IO) -> Response |
| 38 | +
|
| 39 | +Read a `Response` from an IO stream including the url, status, and headers. |
| 40 | +
|
| 41 | +The message body is not read, and the IO stream is left positioned at the |
| 42 | +beginning of the message body. |
| 43 | +""" |
| 44 | +function tryreadresponse(io::IO) |
| 45 | + url = readline(io) |
| 46 | + message = readline(io) |
| 47 | + components = split(message, ' ', limit=3) |
| 48 | + length(components) >= 2 || return |
| 49 | + proto = eachsplit(components[1], '/') |> first |> lowercase |
| 50 | + status = tryparse(Int, components[2]) |
| 51 | + !isnothing(status) || return |
| 52 | + headers = Pair{String, String}[] |
| 53 | + while !eof(io) |
| 54 | + if peek(io) == UInt8('\n') |
| 55 | + read(io, 1) |
| 56 | + break |
| 57 | + end |
| 58 | + name = readuntil(io, ':') |
| 59 | + !eof(io) && read(io, UInt8) == UInt8(' ') || return |
| 60 | + value = readline(io) |
| 61 | + push!(headers, name => value) |
| 62 | + end |
| 63 | + Downloads.Response(proto, url, status, message, headers) |
| 64 | +end |
| 65 | + |
| 66 | + |
| 67 | +# Caching requests to an endpoint |
| 68 | + |
| 69 | +cachedir() = BaseDirs.User.cache(BaseDirs.Project("RestClient")) |
| 70 | + |
| 71 | +function cachekey(url::String, headers::Union{<:AbstractVector, <:AbstractDict}, payload::Union{<:IO, Nothing}) |
| 72 | + h = hash(url) |
| 73 | + for (k, v) in headers |
| 74 | + h = hash(v, hash(k, h)) |
| 75 | + end |
| 76 | + if !isnothing(payload) |
| 77 | + mark(payload) |
| 78 | + # This sort of hashing is quite sub-optimal, but |
| 79 | + # it's way faster than most network speeds and |
| 80 | + # I don't think it's worth having some adversarial |
| 81 | + # protection here. The only stdlibs available are |
| 82 | + # CRC32c (trivially reversible) and slow SHA implementations. |
| 83 | + # If this proves to be a bottleneck, we could grab |
| 84 | + # `KangarooTwelve` or similar. |
| 85 | + while !eof(payload) |
| 86 | + peek(payload) # Refill buffer |
| 87 | + for b in readavailable(payload) |
| 88 | + h = hash(b, h) |
| 89 | + end |
| 90 | + end |
| 91 | + reset(payload) |
| 92 | + end |
| 93 | + string(h, base=10+26) |
| 94 | +end |
| 95 | + |
| 96 | +""" |
| 97 | + cachelifetime(req::Request, res::Response) -> Union{DateTime, Integer, Nothing} |
| 98 | +
|
| 99 | +Determine the expiry time of a cached response, based on the request and response. |
| 100 | +
|
| 101 | +# Specialisation |
| 102 | +
|
| 103 | +To specialise this function for a specific endpoint, define one of the following methods: |
| 104 | +
|
| 105 | +``` |
| 106 | +cachelifetime(req::Request{kind, <:AbstractEndpoint}, res::Response) |
| 107 | +cachelifetime([conf::RequestConfig], endpoint::AbstractEndpoint, res::Response) |
| 108 | +``` |
| 109 | +""" |
| 110 | +function cachelifetime end |
| 111 | + |
| 112 | +cachelifetime(req::Request, res::Response) = |
| 113 | + cachelifetime(req.config, req.endpoint, res) |
| 114 | + |
| 115 | +cachelifetime(@nospecialize(::RequestConfig), endpoint::AbstractEndpoint, res::Response) = |
| 116 | + cachelifetime(endpoint, res) |
| 117 | + |
| 118 | +cachelifetime(@nospecialize(::AbstractEndpoint), res::Response) = |
| 119 | + cachelifetime(res) |
| 120 | + |
| 121 | +cachelifetime(@nospecialize ::Response) = nothing |
| 122 | + |
| 123 | +""" |
| 124 | + expirytime(headers::Vector{Pair{String, String}}) -> Integer |
| 125 | +
|
| 126 | +Determine the expiry time of a response based on the headers. |
| 127 | +
|
| 128 | +The time is returned as a Unix timestamp, or `0` if the response should not be cached. |
| 129 | +
|
| 130 | +If set, the `Cache-Control` header is used to determine the expiry time. |
| 131 | +The directives `no-cache`, `no-store`, and `private` prevent caching, while |
| 132 | +`must-revalidate` forces revalidation, and `max-age` and `s-maxage` set the |
| 133 | +expiry time. |
| 134 | +
|
| 135 | +If `Cache-Control` is not set, the `Expires` header is used to determine the |
| 136 | +expiry time. |
| 137 | +
|
| 138 | +Should the headers not contain any information about expiry, `0` is returned. |
| 139 | +""" |
| 140 | +function expirytime(headers::Vector{Pair{String, String}}) |
| 141 | + cachecontrol = "" |
| 142 | + expiry = "" |
| 143 | + for (k, v) in headers |
| 144 | + if k == "cache-control" |
| 145 | + cachecontrol = lowercase(v) |
| 146 | + elseif k == "expires" |
| 147 | + expiry = v |
| 148 | + end |
| 149 | + end |
| 150 | + isempty(cachecontrol) && return 0 |
| 151 | + directives = map(strip, split(cachecontrol, ',')) |
| 152 | + any(d -> d ∈ ("no-cache", "no-store", "private"), directives) && return 0 |
| 153 | + "must-revalidate" ∈ directives && return trunc(Int, time()) |
| 154 | + for directive in directives |
| 155 | + if '=' in directive |
| 156 | + components = split(directive, '=', limit=2) |
| 157 | + if length(components) == 2 && first(components) ∈ ("max-age", "s-maxage") |
| 158 | + age = tryparse(UInt, strip(last(components))) |
| 159 | + !isnothing(age) && return trunc(Int, time()) + age |
| 160 | + end |
| 161 | + end |
| 162 | + end |
| 163 | + if !isempty(expiry) |
| 164 | + dtime = tryparse(HTTP_DATE_FORMAT, expiry) |
| 165 | + !isnothing(dtime) && return trunc(Int, datetime2unix(dtime)) |
| 166 | + end |
| 167 | + 0 |
| 168 | +end |
| 169 | + |
| 170 | + |
| 171 | +# File age checking |
| 172 | + |
| 173 | +""" |
| 174 | + expirytime(path::String) -> Integer |
| 175 | +
|
| 176 | +Return the expiry time of a cached response stored at `path`. |
| 177 | +
|
| 178 | +On supporting Linux systems, this uses a filesystem extended attribute to store |
| 179 | +the expiry time. |
| 180 | +""" |
| 181 | +function expirytime(path::String) |
| 182 | + @something(expirytime_xattr(path), |
| 183 | + Some(expirytime_read(path))) |
| 184 | +end |
| 185 | + |
| 186 | +@static if Sys.islinux() |
| 187 | + function expirytime_xattr(path::String) |
| 188 | + unixtime = Ref{Int64}() |
| 189 | + nb = @ccall getxattr(path::Cstring, "user.expirytime"::Cstring, unixtime::Ptr{Int64}, sizeof(Int64)::Csize_t)::Csize_t |
| 190 | + if nb > 0 |
| 191 | + unixtime[] |
| 192 | + end |
| 193 | + end |
| 194 | + |
| 195 | + function setexpiry(path::String, unixtime::Integer) |
| 196 | + timeref = Ref(Int64(unixtime)) |
| 197 | + @ccall setxattr(path::Cstring, "user.expirytime"::Cstring, timeref::Ptr{Int64}, sizeof(Int64)::Csize_t, 0::Cint)::Cint |
| 198 | + nothing |
| 199 | + end |
| 200 | +else |
| 201 | + expirytime_xattr(::String) = nothing |
| 202 | + setexpiry(::String) = nothing |
| 203 | +end |
| 204 | + |
| 205 | +function expirytime_read(path::String) |
| 206 | + res = open(tryreadresponse, path) |
| 207 | + if !isnothing(res) |
| 208 | + expirytime(res.headers) |
| 209 | + end |
| 210 | +end |
| 211 | + |
| 212 | + |
| 213 | +# Performing a cached request |
| 214 | + |
| 215 | +""" |
| 216 | + http_cached(method::String, url::String, payload::Union{<:IO, Nothing}; |
| 217 | + headers::Union{<:AbstractVector, <:AbstractDict} = Pair{String, String}[], |
| 218 | + timeout::Float64 = Inf) -> Tuple{Response, IO, Bool} |
| 219 | +
|
| 220 | +Perform an HTTP request, using a cached response if available. |
| 221 | +""" |
| 222 | +function http_cached(method::String, url::String, payload::Union{<:IO, Nothing} = nothing; |
| 223 | + headers::Union{<:AbstractVector, <:AbstractDict} = Pair{String, String}[], |
| 224 | + timeout::Float64 = Inf) |
| 225 | + ckey = cachekey(url, headers, payload) |
| 226 | + cfile = joinpath(cachedir(), ckey * ".http") |
| 227 | + isfile(cfile) || @goto freshreq |
| 228 | + etime = expirytime(cfile) |
| 229 | + return if etime >= ceil(Int, time()) |
| 230 | + io = open(cfile) |
| 231 | + res = tryreadresponse(io) |
| 232 | + if isnothing(res) |
| 233 | + close(io) |
| 234 | + rm(cfile) |
| 235 | + @goto freshreq |
| 236 | + end |
| 237 | + @debug debug_request("CACHE", url, headers, payload) |
| 238 | + res, io, true |
| 239 | + elseif etime + CACHE_STALE_DURATION >= ceil(Int, time()) |
| 240 | + io = open(cfile) |
| 241 | + res = tryreadresponse(io) |
| 242 | + if isnothing(res) |
| 243 | + close(io) |
| 244 | + rm(cfile) |
| 245 | + @goto freshreq |
| 246 | + end |
| 247 | + mheaders = copy(headers) |
| 248 | + @debug S"{inverse,magenta,bold: CACHE } checking validity of stale entry" |
| 249 | + for (k, v) in res.headers |
| 250 | + if k == "etag" |
| 251 | + push!(mheaders, "if-none-match" => v) |
| 252 | + break |
| 253 | + elseif k == "last-modified" |
| 254 | + push!(mheaders, "if-modified-since" => v) |
| 255 | + break |
| 256 | + end |
| 257 | + end |
| 258 | + if length(mheaders) == length(headers) |
| 259 | + mtime = Dates.format(HTTP_DATE_FORMAT, ctime(cfile)) |
| 260 | + push!(mheaders, "if-modified-since" => mtime) |
| 261 | + end |
| 262 | + eres, buf = http_request(method, url, payload; headers=mheaders, timeout) |
| 263 | + if eres.status == 304 |
| 264 | + close(buf) |
| 265 | + @debug S"{inverse,magenta,bold: CACHE } stale entry is valid" |
| 266 | + fperm = filemode(cfile) |
| 267 | + chmod(cfile, fperm | 0o200) |
| 268 | + setexpiry(cfile, trunc(Int, time())) |
| 269 | + chmod(cfile, fperm) |
| 270 | + res, io, true |
| 271 | + else |
| 272 | + close(io) |
| 273 | + res, buf, false |
| 274 | + end |
| 275 | + else |
| 276 | + @goto freshreq |
| 277 | + end |
| 278 | + @label freshreq |
| 279 | + res, buf = http_request(method, url, payload; headers, timeout) |
| 280 | + res, buf, false |
| 281 | +end |
| 282 | + |
| 283 | +function cachesave(req::Request, url::String, headers, payload, res::Response, body::IO) |
| 284 | + cachetime = cachelifetime(req, res) |
| 285 | + etime = if isnothing(cachetime) |
| 286 | + expirytime(res.headers) |
| 287 | + elseif cachetime isa DateTime |
| 288 | + datetime2unix(cachetime) |
| 289 | + else |
| 290 | + trunc(Int, time()) + cachetime |
| 291 | + end |
| 292 | + iszero(etime) && return |
| 293 | + cdir = cachedir() |
| 294 | + isdir(cdir) || mkpath(cdir) |
| 295 | + cfile = joinpath(cdir, cachekey(url, headers, payload) * ".http") |
| 296 | + open(cfile, "w") do io |
| 297 | + dumpresponse(io, res, body) |
| 298 | + end |
| 299 | + setexpiry(cfile, etime) |
| 300 | + chmod(cfile, 0o100444 & filemode(cfile)) # Make read-only |
| 301 | + nothing |
| 302 | +end |
| 303 | + |
| 304 | + |
| 305 | +# Cleanup |
| 306 | + |
| 307 | +const CLEANUP_BLOCK_MAX_SIZE = 100 |
| 308 | + |
| 309 | +""" |
| 310 | + cleancache() |
| 311 | +
|
| 312 | +Remove some of the files under `cachedir()` that have expired. |
| 313 | +
|
| 314 | +Statistically, this will eventually remove all expired files. |
| 315 | +""" |
| 316 | +function cleancache() |
| 317 | + cachefiles = readdir(cachedir(), join=true) |
| 318 | + isempty(cachefiles) && return |
| 319 | + for i in eachindex(cachefiles) |
| 320 | + j = rand(axes(cachefiles, 1)) |
| 321 | + cachefiles[i], cachefiles[j] = cachefiles[j], cachefiles[i] |
| 322 | + end |
| 323 | + bsize = clamp(length(cachefiles) ÷ 6, 1, CLEANUP_BLOCK_MAX_SIZE) |
| 324 | + strike = false |
| 325 | + cleantime = trunc(Int, time()) - CACHE_STALE_DURATION |
| 326 | + for block in Iterators.partition(cachefiles, bsize) |
| 327 | + nexpired = 0 |
| 328 | + for cfile in block |
| 329 | + if expirytime(cfile) < cleantime |
| 330 | + rm(cfile) |
| 331 | + nexpired += 1 |
| 332 | + end |
| 333 | + end |
| 334 | + if iszero(nexpired) |
| 335 | + strike && break |
| 336 | + strike = true |
| 337 | + else |
| 338 | + strike = false |
| 339 | + end |
| 340 | + end |
| 341 | +end |
0 commit comments