Skip to content

Commit ede7b29

Browse files
authored
feat: use leantar in lake exe cache (#5710)
* feat: use `leantar` in `lake exe cache` * run pack in parallel * bump leantar * version check * use hex for hashes * chore: bump to 0.1.2, add .exe on windows * fix: file extension on linux * fix: arch is reported as arm64 sometimes * fix: statically linked 0.1.3
1 parent f3dd5c3 commit ede7b29

File tree

3 files changed

+93
-23
lines changed

3 files changed

+93
-23
lines changed

Cache/IO.lean

Lines changed: 85 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Authors: Arthur Paulino
77
import Lean.Data.HashMap
88
import Lean.Data.RBMap
99
import Lean.Data.RBTree
10+
import Lean.Data.Json.Printer
1011

1112
/-- Removes a parent path from the beginning of a path -/
1213
def System.FilePath.withoutParent (path parent : FilePath) : FilePath :=
@@ -16,8 +17,15 @@ def System.FilePath.withoutParent (path parent : FilePath) : FilePath :=
1617
| x, [] => x
1718
mkFilePath $ aux path.components parent.components
1819

19-
def UInt64.asTarGz (n : UInt64) : String :=
20-
s!"{n}.tar.gz"
20+
def Nat.toHexDigits (n : Nat) : Nat → (res : String := "") → String
21+
| 0, s => s
22+
| len+1, s =>
23+
let b := UInt8.ofNat (n >>> (len * 8))
24+
Nat.toHexDigits n len <|
25+
s.push (Nat.digitChar (b >>> 4).toNat) |>.push (Nat.digitChar (b &&& 15).toNat)
26+
27+
def UInt64.asLTar (n : UInt64) : String :=
28+
s!"{Nat.toHexDigits n.toNat 8}.ltar"
2129

2230
namespace Cache.IO
2331

@@ -51,12 +59,23 @@ def CURLBIN :=
5159
-- change file name if we ever need a more recent version to trigger re-download
5260
IO.CACHEDIR / s!"curl-{CURLVERSION}"
5361

62+
/-- leantar version at https://github.com/digama0/leangz -/
63+
def LEANTARVERSION :=
64+
"0.1.3"
65+
66+
def LEANTARBIN :=
67+
-- change file name if we ever need a more recent version to trigger re-download
68+
IO.CACHEDIR / s!"leantar-{LEANTARVERSION}{if System.Platform.isWindows then ".exe" else ""}"
69+
5470
def LAKEPACKAGESDIR : FilePath :=
5571
"lake-packages"
5672

5773
def getCurl : IO String := do
5874
return if (← CURLBIN.pathExists) then CURLBIN.toString else "curl"
5975

76+
def getLeanTar : IO String := do
77+
return if (← LEANTARBIN.pathExists) then LEANTARBIN.toString else "leantar"
78+
6079
abbrev PackageDirs := Lean.RBMap String FilePath compare
6180

6281
/-- Whether this is running on Mathlib repo or not -/
@@ -72,6 +91,7 @@ def getPackageDirs : IO PackageDirs := return .ofList [
7291
("MathlibExtras", if ← isMathlibRoot then "." else mathlibDepPath),
7392
("Aesop", LAKEPACKAGESDIR / "aesop"),
7493
("Std", LAKEPACKAGESDIR / "std"),
94+
("Cli", LAKEPACKAGESDIR / "Cli"),
7595
("ProofWidgets", LAKEPACKAGESDIR / "proofwidgets"),
7696
("Qq", LAKEPACKAGESDIR / "Qq")
7797
]
@@ -136,6 +156,45 @@ def validateCurl : IO Bool := do
136156
| _ => throw $ IO.userError "Invalidly formatted version of `curl`"
137157
| _ => throw $ IO.userError "Invalidly formatted response from `curl --version`"
138158

159+
def Version := Nat × Nat × Nat
160+
deriving Inhabited, DecidableEq
161+
162+
instance : Ord Version := let _ := @lexOrd; lexOrd
163+
instance : LE Version := leOfOrd
164+
165+
def parseVersion (s : String) : Option Version := do
166+
let [maj, min, patch] := s.splitOn "." | none
167+
some (maj.toNat!, min.toNat!, patch.toNat!)
168+
169+
def validateLeanTar : IO Unit := do
170+
if (← LEANTARBIN.pathExists) then return
171+
if let some version ← some <$> runCmd "leantar" #["--version"] <|> pure none then
172+
let "leantar" :: v :: _ := version.splitOn " "
173+
| throw $ IO.userError "Invalidly formatted response from `leantar --version`"
174+
let some v := parseVersion v | throw $ IO.userError "Invalidly formatted version of `leantar`"
175+
-- currently we need exactly one version of leantar, change this to reflect compatibility
176+
if v = (parseVersion LEANTARVERSION).get! then return
177+
let win := System.Platform.getIsWindows ()
178+
let target ← if win then
179+
pure "x86_64-pc-windows-msvc"
180+
else
181+
let mut arch ← (·.trim) <$> runCmd "uname" #["-m"] false
182+
if arch = "arm64" then arch := "aarch64"
183+
unless arch ∈ ["x86_64", "aarch64"] do
184+
throw $ IO.userError s!"unsupported architecture {arch}"
185+
pure <|
186+
if System.Platform.getIsOSX () then s!"{arch}-apple-darwin"
187+
else s!"{arch}-unknown-linux-musl"
188+
IO.println s!"leantar is too old; downloading more recent version"
189+
IO.FS.createDirAll IO.CACHEDIR
190+
let ext := if win then "zip" else "tar.gz"
191+
let _ ← runCmd "curl" #[
192+
s!"https://github.com/digama0/leangz/releases/download/v{LEANTARVERSION}/leantar-v{LEANTARVERSION}-{target}.{ext}",
193+
"-L", "-o", s!"{LEANTARBIN}.{ext}"]
194+
let _ ← runCmd "tar" #["-xf", s!"{LEANTARBIN}.{ext}",
195+
"-C", IO.CACHEDIR.toString, "--strip-components=1"]
196+
let _ ← runCmd "mv" #[(IO.CACHEDIR / s!"leantar").toString, LEANTARBIN.toString]
197+
139198
/-- Recursively gets all files from a directory with a certain extension -/
140199
partial def getFilesWithExtension
141200
(fp : FilePath) (extension : String) (acc : Array FilePath := #[]) :
@@ -150,7 +209,7 @@ namespace HashMap
150209

151210
def filter (hashMap : HashMap) (set : Lean.RBTree String compare) (keep : Bool) : HashMap :=
152211
hashMap.fold (init := default) fun acc path hash =>
153-
let contains := set.contains hash.asTarGz
212+
let contains := set.contains hash.asLTar
154213
let add := if keep then contains else !contains
155214
if add then acc.insert path hash else acc
156215

@@ -169,9 +228,9 @@ Each build file also has a `Bool` indicating whether that file is required for c
169228
def mkBuildPaths (path : FilePath) : IO $ Array (FilePath × Bool) := do
170229
let packageDir ← getPackageDir path
171230
return #[
231+
(packageDir / LIBDIR / path.withExtension "trace", true),
172232
(packageDir / LIBDIR / path.withExtension "olean", true),
173233
(packageDir / LIBDIR / path.withExtension "ilean", true),
174-
(packageDir / LIBDIR / path.withExtension "trace", true),
175234
(packageDir / IRDIR / path.withExtension "c", true),
176235
(packageDir / LIBDIR / path.withExtension "extra", false)]
177236

@@ -185,21 +244,25 @@ def allExist (paths : Array (FilePath × Bool)) : IO Bool := do
185244
def packCache (hashMap : HashMap) (overwrite : Bool) : IO $ Array String := do
186245
mkDir CACHEDIR
187246
IO.println "Compressing cache"
188-
let mut acc := default
247+
let mut acc := #[]
248+
let mut tasks := #[]
189249
for (path, hash) in hashMap.toList do
190-
let zip := hash.asTarGz
250+
let zip := hash.asLTar
191251
let zipPath := CACHEDIR / zip
192252
let buildPaths ← mkBuildPaths path
193253
if ← allExist buildPaths then
194254
if overwrite || !(← zipPath.pathExists) then
195-
discard $ runCmd "tar" $ #["-I", "gzip -9", "-cf", zipPath.toString] ++
196-
((← buildPaths.filterM (·.1.pathExists)) |>.map (·.1.toString))
255+
tasks := tasks.push <| ← IO.asTask do
256+
runCmd (← getLeanTar) $ #[zipPath.toString] ++
257+
((← buildPaths.filterM (·.1.pathExists)) |>.map (·.1.toString))
197258
acc := acc.push zip
259+
for task in tasks do
260+
_ ← IO.ofExcept task.get
198261
return acc
199262

200263
/-- Gets the set of all cached files -/
201264
def getLocalCacheSet : IO $ Lean.RBTree String compare := do
202-
let paths ← getFilesWithExtension CACHEDIR "gz"
265+
let paths ← getFilesWithExtension CACHEDIR "ltar"
203266
return .fromList (paths.data.map (·.withoutParent CACHEDIR |>.toString)) _
204267

205268
def isPathFromMathlib (path : FilePath) : Bool :=
@@ -215,19 +278,22 @@ def unpackCache (hashMap : HashMap) : IO Unit := do
215278
let hashMap := hashMap.filter (← getLocalCacheSet) true
216279
let size := hashMap.size
217280
if size > 0 then
281+
let now ← IO.monoMsNow
218282
IO.println s!"Decompressing {size} file(s)"
219283
let isMathlibRoot ← isMathlibRoot
220-
hashMap.forM fun path hash => do
221-
let _ ← IO.asTask do
222-
match path.parent with
223-
| none | some path => do
224-
let packageDir ← getPackageDir path
225-
mkDir $ packageDir / LIBDIR / path
226-
mkDir $ packageDir / IRDIR / path
284+
let child ← IO.Process.spawn
285+
{ cmd := ← getLeanTar, args := #["-x", "-j", "-"], stdin := .piped }
286+
let (stdin, child) ← child.takeStdin
287+
let config : Array Lean.Json := hashMap.fold (init := #[]) fun config path hash =>
288+
let pathStr := s!"{CACHEDIR / hash.asLTar}"
227289
if isMathlibRoot || !isPathFromMathlib path then
228-
runCmd "tar" #["-xzf", s!"{CACHEDIR / hash.asTarGz}"]
290+
config.push <| .str pathStr
229291
else -- only mathlib files, when not in the mathlib4 repo, need to be redirected
230-
runCmd "tar" #["-xzf", s!"{CACHEDIR / hash.asTarGz}", "-C", mathlibDepPath.toString]
292+
config.push <| .mkObj [("file", pathStr), ("base", mathlibDepPath.toString)]
293+
stdin.putStr <| Lean.Json.compress <| .arr config
294+
let exitCode ← child.wait
295+
if exitCode != 0 then throw $ IO.userError s!"leantar failed with error code {exitCode}"
296+
IO.println s!"unpacked in {(← IO.monoMsNow) - now} ms"
231297
else IO.println "No cache files to decompress"
232298

233299
/-- Retrieves the azure token from the environment -/
@@ -241,7 +307,7 @@ instance : Ord FilePath where
241307

242308
/-- Removes all cache files except for what's in the `keep` set -/
243309
def cleanCache (keep : Lean.RBTree FilePath compare := default) : IO Unit := do
244-
for path in ← getFilesWithExtension CACHEDIR "gz" do
310+
for path in ← getFilesWithExtension CACHEDIR "ltar" do
245311
if !keep.contains path then IO.FS.removeFile path
246312

247313
end Cache.IO

Cache/Main.lean

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,15 @@ def toPaths (args : List String) : List FilePath :=
5353
def curlArgs : List String :=
5454
["get", "get!", "get-", "put", "put!", "commit", "commit!"]
5555

56+
def leanTarArgs : List String :=
57+
["get", "get!", "pack", "pack!", "unpack"]
58+
5659
open Cache IO Hashing Requests in
5760
def main (args : List String) : IO Unit := do
5861
let hashMemo ← getHashMemo
5962
let hashMap := hashMemo.hashMap
60-
let goodCurl := !(curlArgs.contains (args.headD "") && !(← validateCurl))
63+
let goodCurl ← pure !curlArgs.contains (args.headD "") <||> validateCurl
64+
if leanTarArgs.contains (args.headD "") then validateLeanTar
6165
match args with
6266
| ["get"] => getFiles hashMap false goodCurl true
6367
| ["get!"] => getFiles hashMap true goodCurl true
@@ -69,7 +73,7 @@ def main (args : List String) : IO Unit := do
6973
| ["pack!"] => discard $ packCache hashMap true
7074
| ["unpack"] => unpackCache hashMap
7175
| ["clean"] =>
72-
cleanCache $ hashMap.fold (fun acc _ hash => acc.insert $ CACHEDIR / hash.asTarGz) .empty
76+
cleanCache $ hashMap.fold (fun acc _ hash => acc.insert $ CACHEDIR / hash.asLTar) .empty
7377
| ["clean!"] => cleanCache
7478
| ["put"] => putFiles (← packCache hashMap false) false (← getToken)
7579
| ["put!"] => putFiles (← packCache hashMap false) true (← getToken)

Cache/Requests.lean

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def mkGetConfigContent (hashMap : IO.HashMap) : IO String := do
3131
-- We sort the list so that the large files in `MathlibExtras` are requested first.
3232
hashMap.toArray.qsort (fun ⟨p₁, _⟩ ⟨_, _⟩ => p₁.components.head? = "MathlibExtras")
3333
|>.foldlM (init := "") fun acc ⟨_, hash⟩ => do
34-
let fileName := hash.asTarGz
34+
let fileName := hash.asLTar
3535
-- Below we use `String.quote`, which is intended for quoting for use in Lean code
3636
-- this does not exactly match the requirements for quoting for curl:
3737
-- ```
@@ -47,7 +47,7 @@ def mkGetConfigContent (hashMap : IO.HashMap) : IO String := do
4747

4848
/-- Calls `curl` to download a single file from the server to `CACHEDIR` (`.cache`) -/
4949
def downloadFile (hash : UInt64) : IO Bool := do
50-
let fileName := hash.asTarGz
50+
let fileName := hash.asLTar
5151
let url ← mkFileURL fileName none
5252
let path := IO.CACHEDIR / fileName
5353
let out ← IO.Process.output

0 commit comments

Comments
 (0)