From 5dd45b508ee76160d03525bc9a7073c1f8e428aa Mon Sep 17 00:00:00 2001 From: Michal Kozakiewicz Date: Thu, 9 Apr 2026 14:29:51 +0000 Subject: [PATCH 1/7] Add --trace flag for automatic file access tracing via fsatrace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running with `taskrunner --trace`, the subprocess is wrapped with fsatrace (LD_PRELOAD-based file system tracer) to discover which files are actually read and written during execution. After the command finishes, a report is printed to stderr showing project-relative file paths categorized as reads and writes. This helps verify that snapshot inputs are complete — e.g. running `--trace --force` on the docs BUILD script revealed that mdbook reads 56 files from apps/ and libs/ that aren't declared in the snapshot. Trace mode propagates to nested taskrunner calls via _taskrunner_trace env var. Requires fsatrace to be installed (clear error if missing). Co-Authored-By: Claude Opus 4.6 (1M context) --- src/App.hs | 41 ++++++++++++++++++--- src/CliArgs.hs | 4 ++ src/Trace.hs | 84 ++++++++++++++++++++++++++++++++++++++++++ src/Types.hs | 1 + taskrunner.cabal | 3 +- test/Spec.hs | 31 ++++++++++++++-- test/t/trace-basic.out | 9 +++++ test/t/trace-basic.txt | 10 +++++ 8 files changed, 174 insertions(+), 9 deletions(-) create mode 100644 src/Trace.hs create mode 100644 test/t/trace-basic.out create mode 100644 test/t/trace-basic.txt diff --git a/src/App.hs b/src/App.hs index dbf3969..1c6e65c 100644 --- a/src/App.hs +++ b/src/App.hs @@ -10,10 +10,10 @@ import Universum hiding (force) import System.Environment (setEnv, lookupEnv, getEnvironment) import System.Process (CreateProcess (..), StdStream (CreatePipe, UseHandle), proc, waitForProcess, createPipe, readCreateProcess, withCreateProcess) import System.IO - ( openBinaryFile, hSetBuffering, BufferMode(..), hFlush ) + ( openBinaryFile, hSetBuffering, BufferMode(..), hFlush, openTempFile ) import qualified System.FilePath as FilePath import System.FilePath (()) -import System.Directory ( createDirectoryIfMissing, doesFileExist, getCurrentDirectory, createDirectory ) +import System.Directory ( createDirectoryIfMissing, doesFileExist, getCurrentDirectory, createDirectory, removeFile ) import qualified Data.ByteString.Char8 as B8 import Control.Concurrent.Async (async, wait, cancel) import Control.Exception.Base (handle) @@ -44,6 +44,7 @@ import Control.Monad.EarlyReturn (withEarlyReturn, earlyReturn) import Data.Time.Format.ISO8601 (iso8601Show) import Data.Time.Clock (getCurrentTime) import qualified Data.ByteString.Lazy.Char8 as BL8 +import qualified Trace getSettings :: IO Settings getSettings = do @@ -62,6 +63,7 @@ getSettings = do mainBranch <- map toText <$> lookupEnv "TASKRUNNER_MAIN_BRANCH" quietMode <- (==Just "1") <$> lookupEnv "TASKRUNNER_QUIET" githubTokenRefreshThresholdSeconds <- maybe 300 read <$> lookupEnv "TASKRUNNER_GITHUB_TOKEN_REFRESH_THRESHOLD_SECONDS" + traceMode <- (==Just "1") <$> lookupEnv "_taskrunner_trace" pure Settings { stateDirectory , rootDirectory @@ -78,6 +80,7 @@ getSettings = do , force = False , quietMode , githubTokenRefreshThresholdSeconds + , trace = traceMode } main :: IO () @@ -85,7 +88,10 @@ main = do (args :: CliArgs) <- getCliArgs settings' <- getSettings let f = args.force - let settings = (settings' :: Settings) { force = f } + let traceMode = args.trace || settings'.trace + let settings = (settings' :: Settings) { force = f, trace = traceMode } + + when traceMode Trace.checkFsatrace let jobName = fromMaybe (FilePath.takeFileName args.cmd) args.name @@ -127,6 +133,17 @@ main = do responsePipeReadFd <- handleToFd responsePipeRead hSetBuffering responsePipeWrite LineBuffering + m_traceFile <- if settings.trace then do + (fp, h) <- openTempFile settings.stateDirectory "trace.log" + hClose h + pure (Just fp) + else + pure Nothing + + let (actualCmd, actualArgs) = case m_traceFile of + Just traceFile -> Trace.wrapWithFsatrace traceFile args.cmd args.args + Nothing -> (args.cmd, args.args) + -- Recursive: AppState is used before process is started (mostly for logging) rec @@ -146,13 +163,15 @@ main = do -- TODO: should we use delegate_ctlc or DIY? See https://hackage.haskell.org/package/process-1.6.20.0/docs/System-Process.html#g:4 -- -> We should DIY because we need to flush stream etc. (Nothing, Just stdoutPipe, Just stderrPipe, processHandle) <- Process.createProcess - (proc args.cmd args.args) { std_in = UseHandle devnull, std_out = CreatePipe + (proc actualCmd actualArgs) { std_in = UseHandle devnull, std_out = CreatePipe , std_err = CreatePipe , env=Just $ nubOrdOn fst $ [ ("BASH_FUNC_snapshot%%", "() {\n" <> $(embedStringFile "src/snapshot.sh") <> "\n}") , ("_taskrunner_request_pipe", show requestPipeWriteFd) , ("_taskrunner_response_pipe", show responsePipeReadFd) - ] <> parentEnv + ] + <> (if settings.trace then [("_taskrunner_trace", "1")] else []) + <> parentEnv } logDebug appState $ "Running command: " <> show (args.cmd : args.args) @@ -183,6 +202,18 @@ main = do logDebug appState $ "Command " <> show (args.cmd : args.args) <> " exited with code " <> show exitCode logDebugParent m_parentRequestPipe $ "Subtask " <> toText jobName <> " finished with " <> show exitCode + whenJust m_traceFile \traceFile -> do + traceExists <- doesFileExist traceFile + if traceExists then do + traceContent <- Text.readFile traceFile + let entries = Trace.parseTraceOutput traceContent + let filtered = Trace.filterTraceEntries settings.rootDirectory entries + let report = Trace.formatTraceReport settings.rootDirectory filtered + Text.hPutStr toplevelStderr report + removeFile traceFile + else + logWarn appState "Trace file not found after execution; fsatrace may have failed to start." + m_hashToSave <- readIORef appState.hashToSaveRef when (skipped && isNothing m_hashToSave && appState.isToplevel) do diff --git a/src/CliArgs.hs b/src/CliArgs.hs index f28311b..5fe1fe5 100644 --- a/src/CliArgs.hs +++ b/src/CliArgs.hs @@ -6,6 +6,7 @@ import Options.Applicative data CliArgs = CliArgs { name :: Maybe String -- Optional name argument , force :: Bool -- Skip cache + , trace :: Bool -- Trace file system access (requires fsatrace) , cmd :: String -- The command to run , args :: [String] -- List of arguments for the command } deriving (Show) @@ -21,6 +22,9 @@ commandParser = CliArgs ( long "force" <> short 'f' <> help "Skip cache and fuzzy cache" ) + <*> switch + ( long "trace" + <> help "Trace file system access during task execution (requires fsatrace)" ) <*> argument str ( metavar "CMD" <> help "The command to run" ) diff --git a/src/Trace.hs b/src/Trace.hs new file mode 100644 index 0000000..c6c086c --- /dev/null +++ b/src/Trace.hs @@ -0,0 +1,84 @@ +module Trace + ( checkFsatrace + , wrapWithFsatrace + , parseTraceOutput + , TraceEntry(..) + , TraceOp(..) + , filterTraceEntries + , formatTraceReport + ) where + +import Universum + +import qualified Data.Text as Text +import System.Directory (findExecutable) +import System.FilePath (makeRelative, isRelative) +import Data.List (nub) +import Utils (bail) + +data TraceOp = TraceRead | TraceWrite | TraceMove | TraceDelete | TraceTouch | TraceQuery + deriving (Show, Eq, Ord) + +data TraceEntry = TraceEntry + { op :: TraceOp + , path :: FilePath + } deriving (Show, Eq, Ord) + +checkFsatrace :: IO () +checkFsatrace = do + m <- findExecutable "fsatrace" + when (isNothing m) $ + bail "fsatrace is not installed. Install it from https://github.com/jacereda/fsatrace and ensure it is on your PATH." + +wrapWithFsatrace :: FilePath -> String -> [String] -> (String, [String]) +wrapWithFsatrace traceFile cmd args = + ("fsatrace", ["rwmd", traceFile, "--", cmd] ++ args) + +parseTraceOutput :: Text -> [TraceEntry] +parseTraceOutput content = + mapMaybe parseLine (Text.lines content) + where + parseLine line = + case Text.uncons line of + Just (opChar, rest) | Text.isPrefixOf "|" rest -> + case charToOp opChar of + Just op -> Just TraceEntry { op, path = toString (Text.drop 1 rest) } + Nothing -> Nothing + _ -> Nothing + + charToOp 'r' = Just TraceRead + charToOp 'w' = Just TraceWrite + charToOp 'm' = Just TraceMove + charToOp 'd' = Just TraceDelete + charToOp 't' = Just TraceTouch + charToOp 'q' = Just TraceQuery + charToOp _ = Nothing + +filterTraceEntries :: FilePath -> [TraceEntry] -> [TraceEntry] +filterTraceEntries rootDir entries = + nub $ filter isProjectFile entries + where + allSystemPrefixes = ["/usr", "/lib", "/lib64", "/etc", "/proc", "/dev", "/sys", "/tmp", "/nix", "/var"] + -- Don't exclude system prefixes that are ancestors of the root directory + systemPrefixes = filter (\sp -> not (sp `isPrefixOf` rootDir)) allSystemPrefixes + + isProjectFile entry = + let p = entry.path + rel = makeRelative rootDir p + in rootDir `isPrefixOf` p + && isRelative rel + && not (any (`isPrefixOf` p) systemPrefixes) + +formatTraceReport :: FilePath -> [TraceEntry] -> Text +formatTraceReport rootDir entries = + let reads_ = nub $ sort [makeRelative rootDir e.path | e <- entries, e.op == TraceRead] + writes = nub $ sort [makeRelative rootDir e.path | e <- entries, e.op == TraceWrite] + + section :: Text -> [FilePath] -> Text + section _ [] = "" + section title paths = title <> "\n" <> Text.unlines (map (\p -> " " <> toText p) paths) + + in "\n=== File System Trace Report ===\n\n" + <> section "Files read:" reads_ + <> (if not (null reads_) && not (null writes) then "\n" else "") + <> section "Files written:" writes diff --git a/src/Types.hs b/src/Types.hs index 64eb6c1..143bf71 100644 --- a/src/Types.hs +++ b/src/Types.hs @@ -22,6 +22,7 @@ data Settings = Settings , force :: Bool , quietMode :: Bool , githubTokenRefreshThresholdSeconds :: Int + , trace :: Bool } deriving (Show) type JobName = String diff --git a/taskrunner.cabal b/taskrunner.cabal index d21c30e..87087f1 100644 --- a/taskrunner.cabal +++ b/taskrunner.cabal @@ -1,6 +1,6 @@ cabal-version: 2.2 --- This file has been generated from package.yaml by hpack version 0.36.0. +-- This file has been generated from package.yaml by hpack version 0.37.0. -- -- see: https://github.com/sol/hpack @@ -31,6 +31,7 @@ library Control.Monad.EarlyReturn RemoteCache SnapshotCliArgs + Trace Types Utils other-modules: diff --git a/test/Spec.hs b/test/Spec.hs index 20976f6..b83cdb6 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -12,6 +12,7 @@ import System.IO import System.IO.Temp (withSystemTempDirectory) import System.Exit (ExitCode(..)) import System.Environment (getEnv, lookupEnv) +import System.Directory (findExecutable) import System.FilePath.Glob as Glob import System.FilePath qualified as FP import Data.Default (Default(..)) @@ -40,19 +41,26 @@ goldenTests = do skipS3Explicit <- (==Just "1") <$> lookupEnv "SKIP_S3_TESTS" hasS3Creds <- hasS3Credentials let skipS3 = skipS3Explicit || not hasS3Creds + skipFsatraceExplicit <- (==Just "1") <$> lookupEnv "SKIP_FSATRACE_TESTS" + hasFsatrace <- isJust <$> findExecutable "fsatrace" + let skipFsatrace = skipFsatraceExplicit || not hasFsatrace inputFiles0 <- sort <$> findByExtension [".txt"] "test/t" inputFiles1 <- if skipS3 then filterM (fmap not . hasS3Directive) inputFiles0 else pure inputFiles0 + inputFiles2 <- if skipFsatrace + then filterM (fmap not . hasFsatraceDirective) inputFiles1 + else pure inputFiles1 let inputFiles - | skipSlow = filter (\filename -> not ("/slow/" `isInfixOf` filename)) inputFiles1 - | otherwise = inputFiles1 + | skipSlow = filter (\filename -> not ("/slow/" `isInfixOf` filename)) inputFiles2 + | otherwise = inputFiles2 -- Print informative message about what tests are running let totalTests = length inputFiles0 s3Tests = length inputFiles0 - length inputFiles1 - slowTests = length inputFiles1 - length inputFiles + fsatraceTests = length inputFiles1 - length inputFiles2 + slowTests = length inputFiles2 - length inputFiles runningTests = length inputFiles when (skipS3 && s3Tests > 0) $ do @@ -61,6 +69,12 @@ goldenTests = do else System.IO.putStrLn $ "S3 credentials not found - skipping " <> show s3Tests <> " S3-dependent tests" System.IO.putStrLn $ "To run S3 tests, set: TASKRUNNER_TEST_S3_ENDPOINT, TASKRUNNER_TEST_S3_ACCESS_KEY, TASKRUNNER_TEST_S3_SECRET_KEY" + when (skipFsatrace && fsatraceTests > 0) $ do + if skipFsatraceExplicit + then System.IO.putStrLn $ "SKIP_FSATRACE_TESTS=1 - skipping " <> show fsatraceTests <> " fsatrace-dependent tests" + else System.IO.putStrLn $ "fsatrace not found - skipping " <> show fsatraceTests <> " fsatrace-dependent tests" + System.IO.putStrLn $ "To run fsatrace tests, install fsatrace from https://github.com/jacereda/fsatrace" + when (skipSlow && slowTests > 0) $ System.IO.putStrLn $ "SKIP_SLOW_TESTS=1 - skipping " <> show slowTests <> " slow tests" @@ -174,6 +188,7 @@ data Options = Options { checkFileGlobs :: [Text] , toplevel :: Bool , s3 :: Bool + , fsatrace :: Bool -- | Whether to provide GitHub app credentials in environment. -- If github status is disabled, taskrunner should work without them. , githubKeys :: Bool @@ -187,6 +202,7 @@ instance Default Options where { checkFileGlobs = ["output"] , toplevel = True , s3 = False + , fsatrace = False , githubKeys = False , quiet = False , githubTokenLifetime = Nothing @@ -207,6 +223,9 @@ getOptions source = flip execState def $ go (lines source) ["#", "s3"] -> do modify (\s -> s { s3 = True }) go rest + ["#", "fsatrace"] -> do + modify (\s -> s { fsatrace = True }) + go rest ["#", "github", "keys"] -> do modify (\s -> s { githubKeys = True }) go rest @@ -270,6 +289,12 @@ hasS3Directive file = do let options = getOptions (toText content) pure options.s3 +hasFsatraceDirective :: FilePath -> IO Bool +hasFsatraceDirective file = do + content <- System.IO.readFile file + let options = getOptions (toText content) + pure options.fsatrace + hasS3Credentials :: IO Bool hasS3Credentials = do endpoint <- lookupEnv "TASKRUNNER_TEST_S3_ENDPOINT" diff --git a/test/t/trace-basic.out b/test/t/trace-basic.out new file mode 100644 index 0000000..400330f --- /dev/null +++ b/test/t/trace-basic.out @@ -0,0 +1,9 @@ +-- output: + +=== File System Trace Report === + +Files read: + input.txt + +Files written: + output.txt diff --git a/test/t/trace-basic.txt b/test/t/trace-basic.txt new file mode 100644 index 0000000..6885a57 --- /dev/null +++ b/test/t/trace-basic.txt @@ -0,0 +1,10 @@ +# check output +# fsatrace +# no toplevel +echo "hello" > input.txt +git init -q +git add input.txt +git commit -q -m "init" +taskrunner --trace -n mytask bash -e -c ' + cat input.txt > output.txt +' From 03be3a2cb955e49384d12f59a1f5c074b0e3caf6 Mon Sep 17 00:00:00 2001 From: Michal Kozakiewicz Date: Thu, 9 Apr 2026 19:24:22 +0000 Subject: [PATCH 2/7] Exclude .git/ from trace report Internal git metadata files aren't meaningful project inputs. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/Trace.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Trace.hs b/src/Trace.hs index c6c086c..36822e8 100644 --- a/src/Trace.hs +++ b/src/Trace.hs @@ -62,12 +62,16 @@ filterTraceEntries rootDir entries = -- Don't exclude system prefixes that are ancestors of the root directory systemPrefixes = filter (\sp -> not (sp `isPrefixOf` rootDir)) allSystemPrefixes + -- Paths within the project that should be excluded (not meaningful inputs) + excludedRelPrefixes = [".git/"] + isProjectFile entry = let p = entry.path rel = makeRelative rootDir p in rootDir `isPrefixOf` p && isRelative rel && not (any (`isPrefixOf` p) systemPrefixes) + && not (any (`isPrefixOf` rel) excludedRelPrefixes) formatTraceReport :: FilePath -> [TraceEntry] -> Text formatTraceReport rootDir entries = From e2ff7192fd7de4e7e7976c9bfcab0fec41fa3499 Mon Sep 17 00:00:00 2001 From: Michal Kozakiewicz Date: Fri, 10 Apr 2026 08:42:21 +0000 Subject: [PATCH 3/7] Add directory summary, --trace-files flag, and snapshot discrepancy detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --trace now shows a directory-level summary by default (e.g. "apps/ (51 files)") instead of listing every individual file. Use --trace-files for the full file list. Both modes also compare traced reads against declared snapshot inputs and report discrepancies — files actually read but not covered by the snapshot. For example, docs BUILD declares `snapshot .` (= docs/) but mdbook reads from apps/ and libs/. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/App.hs | 15 +++++-- src/CliArgs.hs | 4 ++ src/Trace.hs | 90 ++++++++++++++++++++++++++++++++++++++++-- src/Types.hs | 1 + test/t/trace-basic.out | 8 ++-- 5 files changed, 107 insertions(+), 11 deletions(-) diff --git a/src/App.hs b/src/App.hs index 1c6e65c..f47da1c 100644 --- a/src/App.hs +++ b/src/App.hs @@ -81,6 +81,7 @@ getSettings = do , quietMode , githubTokenRefreshThresholdSeconds , trace = traceMode + , traceFiles = False } main :: IO () @@ -88,8 +89,8 @@ main = do (args :: CliArgs) <- getCliArgs settings' <- getSettings let f = args.force - let traceMode = args.trace || settings'.trace - let settings = (settings' :: Settings) { force = f, trace = traceMode } + let traceMode = args.trace || args.traceFiles || settings'.trace + let settings = (settings' :: Settings) { force = f, trace = traceMode, traceFiles = args.traceFiles } when traceMode Trace.checkFsatrace @@ -208,8 +209,16 @@ main = do traceContent <- Text.readFile traceFile let entries = Trace.parseTraceOutput traceContent let filtered = Trace.filterTraceEntries settings.rootDirectory entries - let report = Trace.formatTraceReport settings.rootDirectory filtered + let format = if settings.traceFiles then Trace.formatFileReport else Trace.formatDirectoryReport + let report = format settings.rootDirectory filtered Text.hPutStr toplevelStderr report + + m_snapshotArgs' <- readIORef appState.snapshotArgsRef + whenJust m_snapshotArgs' \snapshotArgs -> do + let discrepancies = Trace.findDiscrepancies settings.rootDirectory cwd snapshotArgs.fileInputs filtered + unless (null discrepancies) do + Text.hPutStr toplevelStderr (Trace.formatDiscrepancies discrepancies) + removeFile traceFile else logWarn appState "Trace file not found after execution; fsatrace may have failed to start." diff --git a/src/CliArgs.hs b/src/CliArgs.hs index 5fe1fe5..2262a95 100644 --- a/src/CliArgs.hs +++ b/src/CliArgs.hs @@ -7,6 +7,7 @@ data CliArgs = CliArgs { name :: Maybe String -- Optional name argument , force :: Bool -- Skip cache , trace :: Bool -- Trace file system access (requires fsatrace) + , traceFiles :: Bool -- Show individual files in trace (instead of directory summary) , cmd :: String -- The command to run , args :: [String] -- List of arguments for the command } deriving (Show) @@ -25,6 +26,9 @@ commandParser = CliArgs <*> switch ( long "trace" <> help "Trace file system access during task execution (requires fsatrace)" ) + <*> switch + ( long "trace-files" + <> help "Like --trace but show individual files instead of directory summary" ) <*> argument str ( metavar "CMD" <> help "The command to run" ) diff --git a/src/Trace.hs b/src/Trace.hs index 36822e8..16b0018 100644 --- a/src/Trace.hs +++ b/src/Trace.hs @@ -5,14 +5,18 @@ module Trace , TraceEntry(..) , TraceOp(..) , filterTraceEntries - , formatTraceReport + , formatFileReport + , formatDirectoryReport + , findDiscrepancies + , formatDiscrepancies ) where import Universum import qualified Data.Text as Text +import qualified Data.Map.Strict as Map import System.Directory (findExecutable) -import System.FilePath (makeRelative, isRelative) +import System.FilePath (makeRelative, isRelative, ()) import Data.List (nub) import Utils (bail) @@ -73,8 +77,9 @@ filterTraceEntries rootDir entries = && not (any (`isPrefixOf` p) systemPrefixes) && not (any (`isPrefixOf` rel) excludedRelPrefixes) -formatTraceReport :: FilePath -> [TraceEntry] -> Text -formatTraceReport rootDir entries = +-- | Format trace report showing individual files (--trace-files) +formatFileReport :: FilePath -> [TraceEntry] -> Text +formatFileReport rootDir entries = let reads_ = nub $ sort [makeRelative rootDir e.path | e <- entries, e.op == TraceRead] writes = nub $ sort [makeRelative rootDir e.path | e <- entries, e.op == TraceWrite] @@ -86,3 +91,80 @@ formatTraceReport rootDir entries = <> section "Files read:" reads_ <> (if not (null reads_) && not (null writes) then "\n" else "") <> section "Files written:" writes + +-- | Format trace report showing directory-level summary (--trace, default) +formatDirectoryReport :: FilePath -> [TraceEntry] -> Text +formatDirectoryReport rootDir entries = + let reads_ = nub [makeRelative rootDir e.path | e <- entries, e.op == TraceRead] + writes = nub [makeRelative rootDir e.path | e <- entries, e.op == TraceWrite] + + dirSummary :: [FilePath] -> [(FilePath, Int)] + dirSummary = sortOn fst . Map.toList . foldl' countDir Map.empty + where + countDir acc fp = + let dir = topLevelDir fp + in Map.insertWith (+) dir (1 :: Int) acc + + topLevelDir :: FilePath -> FilePath + topLevelDir fp = case break (== '/') fp of + (_, '/':_) -> takeWhile (/= '/') fp <> "/" + _ -> fp -- file at root level, show as-is + + section :: Text -> [(FilePath, Int)] -> Text + section _ [] = "" + section title dirs = title <> "\n" <> Text.unlines + (map (\(d, n) -> " " <> toText d <> " (" <> show n <> " files)") dirs) + + in "\n=== File System Trace Report ===\n\n" + <> section "Directories read:" (dirSummary reads_) + <> (if not (null reads_) && not (null writes) then "\n" else "") + <> section "Directories written:" (dirSummary writes) + +-- | Resolve snapshot input pathspecs to directories relative to rootDirectory. +-- Pathspecs: "." = cwd, ":/path" = from root, "relative" = relative to cwd +resolveInputPaths :: FilePath -> FilePath -> [FilePath] -> [FilePath] +resolveInputPaths rootDir cwd = map resolve + where + cwdRel = makeRelative rootDir cwd + + resolve (':':'/':rest) = rest -- ":/libs/ps" -> "libs/ps" + resolve "." = cwdRel -- "." -> cwd relative to root + resolve p + | "/" `isPrefixOf` p = makeRelative rootDir p -- absolute path + | otherwise = cwdRel p -- relative to cwd + +-- | Check if a file path is covered by any of the resolved input directories. +isCoveredBy :: FilePath -> [FilePath] -> Bool +isCoveredBy file inputs = any covers inputs + where + covers "." = True -- "." means repo root, covers everything + covers input + | input == file = True + | otherwise = (input <> "/") `isPrefixOf` file + +-- | Find files that were read but not covered by declared snapshot inputs. +findDiscrepancies :: FilePath -> FilePath -> [FilePath] -> [TraceEntry] -> [FilePath] +findDiscrepancies rootDir cwd snapshotInputs entries = + let resolvedInputs = resolveInputPaths rootDir cwd snapshotInputs + reads_ = nub $ sort [makeRelative rootDir e.path | e <- entries, e.op == TraceRead] + -- Exclude the scripts themselves and taskrunner internals + excludePrefixes = [".taskrunner/", "scripts/"] + in filter (\f -> not (isCoveredBy f resolvedInputs) + && not (any (`isPrefixOf` f) excludePrefixes)) + reads_ + +-- | Format discrepancy warnings +formatDiscrepancies :: [FilePath] -> Text +formatDiscrepancies files = + let dirSummary = sortOn fst . Map.toList . foldl' countDir Map.empty $ files + where + countDir acc fp = + let dir = case break (== '/') fp of + (_, '/':_) -> takeWhile (/= '/') fp <> "/" + _ -> fp + in Map.insertWith (+) dir (1 :: Int) acc + + in "\n=== Snapshot Discrepancies ===\n" + <> "Files read but NOT covered by snapshot inputs:\n" + <> Text.unlines (map (\(d, n) -> " " <> toText d <> " (" <> show n <> " files)") dirSummary) + <> "\n" <> Text.unlines (map (\f -> " " <> toText f) files) diff --git a/src/Types.hs b/src/Types.hs index 143bf71..84151cf 100644 --- a/src/Types.hs +++ b/src/Types.hs @@ -23,6 +23,7 @@ data Settings = Settings , quietMode :: Bool , githubTokenRefreshThresholdSeconds :: Int , trace :: Bool + , traceFiles :: Bool } deriving (Show) type JobName = String diff --git a/test/t/trace-basic.out b/test/t/trace-basic.out index 400330f..1a33ab4 100644 --- a/test/t/trace-basic.out +++ b/test/t/trace-basic.out @@ -2,8 +2,8 @@ === File System Trace Report === -Files read: - input.txt +Directories read: + input.txt (1 files) -Files written: - output.txt +Directories written: + output.txt (1 files) From 9215dd15c0cc7d61916e3a82cf6f1566eea27ba7 Mon Sep 17 00:00:00 2001 From: Michal Kozakiewicz Date: Sat, 11 Apr 2026 08:53:09 +0000 Subject: [PATCH 4/7] Exclude .gitignore files, .git, and .taskrunner/ from trace report These are not meaningful project inputs: - .gitignore files at any depth (e.g. libs/hs/re-geo/.gitignore) - .git bare path (was only filtering .git/ prefix) - .taskrunner/ internal state directory Co-Authored-By: Claude Opus 4.6 (1M context) --- src/Trace.hs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Trace.hs b/src/Trace.hs index 16b0018..23e8d15 100644 --- a/src/Trace.hs +++ b/src/Trace.hs @@ -16,7 +16,7 @@ import Universum import qualified Data.Text as Text import qualified Data.Map.Strict as Map import System.Directory (findExecutable) -import System.FilePath (makeRelative, isRelative, ()) +import System.FilePath (makeRelative, isRelative, (), takeFileName) import Data.List (nub) import Utils (bail) @@ -67,7 +67,9 @@ filterTraceEntries rootDir entries = systemPrefixes = filter (\sp -> not (sp `isPrefixOf` rootDir)) allSystemPrefixes -- Paths within the project that should be excluded (not meaningful inputs) - excludedRelPrefixes = [".git/"] + excludedRelPrefixes = [".git/", ".taskrunner/"] + excludedExact = [".git", ".gitignore"] + excludedFileNames = [".gitignore"] isProjectFile entry = let p = entry.path @@ -76,6 +78,8 @@ filterTraceEntries rootDir entries = && isRelative rel && not (any (`isPrefixOf` p) systemPrefixes) && not (any (`isPrefixOf` rel) excludedRelPrefixes) + && rel `notElem` excludedExact + && takeFileName rel `notElem` excludedFileNames -- | Format trace report showing individual files (--trace-files) formatFileReport :: FilePath -> [TraceEntry] -> Text From 1a5b05f55a5b1aa7b9e820dca377f1e5ff3b1a8d Mon Sep 17 00:00:00 2001 From: Michal Kozakiewicz Date: Sat, 11 Apr 2026 08:55:31 +0000 Subject: [PATCH 5/7] Only trace when --trace is explicitly passed on CLI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parent fsatrace already traces the entire process tree via LD_PRELOAD inheritance, so nested taskrunner calls don't need to independently wrap with fsatrace. This was causing duplicate trace report sections in the output — one per nested taskrunner invocation. Now only the outermost --trace/--trace-files invocation wraps and reports. The _taskrunner_trace env var is still propagated but no longer triggers tracing on its own. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/App.hs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/App.hs b/src/App.hs index f47da1c..f6d43b6 100644 --- a/src/App.hs +++ b/src/App.hs @@ -63,7 +63,6 @@ getSettings = do mainBranch <- map toText <$> lookupEnv "TASKRUNNER_MAIN_BRANCH" quietMode <- (==Just "1") <$> lookupEnv "TASKRUNNER_QUIET" githubTokenRefreshThresholdSeconds <- maybe 300 read <$> lookupEnv "TASKRUNNER_GITHUB_TOKEN_REFRESH_THRESHOLD_SECONDS" - traceMode <- (==Just "1") <$> lookupEnv "_taskrunner_trace" pure Settings { stateDirectory , rootDirectory @@ -80,7 +79,7 @@ getSettings = do , force = False , quietMode , githubTokenRefreshThresholdSeconds - , trace = traceMode + , trace = False , traceFiles = False } @@ -89,10 +88,12 @@ main = do (args :: CliArgs) <- getCliArgs settings' <- getSettings let f = args.force - let traceMode = args.trace || args.traceFiles || settings'.trace - let settings = (settings' :: Settings) { force = f, trace = traceMode, traceFiles = args.traceFiles } + -- Only trace when explicitly requested via CLI, not when inherited via env. + -- The parent's fsatrace already traces the entire process tree via LD_PRELOAD. + let traceExplicit = args.trace || args.traceFiles + let settings = (settings' :: Settings) { force = f, trace = traceExplicit, traceFiles = args.traceFiles } - when traceMode Trace.checkFsatrace + when traceExplicit Trace.checkFsatrace let jobName = fromMaybe (FilePath.takeFileName args.cmd) args.name From 771f9f9318deae384153df1b771dfff7c0bc29ad Mon Sep 17 00:00:00 2001 From: Michal Kozakiewicz Date: Sat, 11 Apr 2026 11:34:46 +0000 Subject: [PATCH 6/7] Add comment crediting Rattle for fsatrace approach Co-Authored-By: Claude Opus 4.6 (1M context) --- src/Trace.hs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Trace.hs b/src/Trace.hs index 23e8d15..2a11e41 100644 --- a/src/Trace.hs +++ b/src/Trace.hs @@ -1,3 +1,9 @@ +-- | File system tracing via fsatrace. +-- The approach of using fsatrace (LD_PRELOAD-based file system call interception) +-- to automatically discover file dependencies is taken from Rattle +-- (https://github.com/ndmitchell/rattle), a build system that uses it to +-- automatically track which files are read/written by commands rather than +-- requiring manual dependency declarations. module Trace ( checkFsatrace , wrapWithFsatrace From f6654037d37d2d3cab969fe686a2f4417cf4bb14 Mon Sep 17 00:00:00 2001 From: Michal Kozakiewicz Date: Mon, 13 Apr 2026 07:45:30 +0000 Subject: [PATCH 7/7] Add trace-nested test verifying single report for nested taskrunner calls Ensures that when an outer taskrunner with --trace invokes a nested taskrunner, only one File System Trace Report section is produced, and it captures file operations from both outer and inner processes (since fsatrace's LD_PRELOAD inherits to all children). Co-Authored-By: Claude Opus 4.6 (1M context) --- test/t/trace-nested.out | 13 +++++++++++++ test/t/trace-nested.txt | 12 ++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 test/t/trace-nested.out create mode 100644 test/t/trace-nested.txt diff --git a/test/t/trace-nested.out b/test/t/trace-nested.out new file mode 100644 index 0000000..b31003e --- /dev/null +++ b/test/t/trace-nested.out @@ -0,0 +1,13 @@ +-- output: + +=== File System Trace Report === + +Directories read: + inner.txt (1 files) + outer.txt (1 files) + +Directories written: + builds/ (2 files) + inner-copy.txt (1 files) + locks/ (1 files) + outer-copy.txt (1 files) diff --git a/test/t/trace-nested.txt b/test/t/trace-nested.txt new file mode 100644 index 0000000..3ee6dbb --- /dev/null +++ b/test/t/trace-nested.txt @@ -0,0 +1,12 @@ +# check output +# fsatrace +# no toplevel +echo "outer" > outer.txt +echo "inner" > inner.txt +git init -q +git add outer.txt inner.txt +git commit -q -m "init" +taskrunner --trace -n outer bash -e -c ' + cat outer.txt > outer-copy.txt + taskrunner -n inner bash -e -c "cat inner.txt > inner-copy.txt" +'