Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 32 additions & 6 deletions modules/nf-commons/src/main/nextflow/util/HashBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package nextflow.util;

import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.FileVisitResult;
Expand Down Expand Up @@ -48,7 +49,7 @@
import nextflow.script.types.Bag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static nextflow.Const.DEFAULT_ROOT;
import static nextflow.util.CacheHelper.HashMode;


Expand Down Expand Up @@ -262,7 +263,7 @@ static private Hasher hashFile( Hasher hasher, Path path, HashMode mode, Path ba
log.warn("Unable to get file attributes file: {} -- Cause: {}", FilesEx.toUriString(path), e.toString());
}

if( (mode==HashMode.STANDARD || mode==HashMode.LENIENT) && isAssetFile(path) ) {
if( (mode==HashMode.STANDARD || mode==HashMode.LENIENT) && isAssetFile(path, DEFAULT_ROOT) ) {
if( attrs==null ) {
// when file attributes are not avail, or it's a directory
// hash the file using the file name path and the repository
Expand Down Expand Up @@ -506,12 +507,34 @@ static private byte[] sumBytes(byte[] resultBytes, byte[] nextBytes) {

/**
* Check if the argument is an asset file i.e. a file that makes part of the
* pipeline Git repository
* pipeline Git repository.
*
* <p>Asset files are hashed using their content (SHA-256) rather than metadata
* to maintain cache validity across different clones where timestamps may differ
* on remote executors like batch processing systems.
*
* <p>This method checks two locations:
* <ol>
* <li>Files under {@code session.getBaseDir()} - the script's working directory</li>
* <li>Files under {@code assetRoot} - the repository root (typically ~/.nextflow/assets)</li>
* </ol>
*
* The distinction is important when executing workflows from subdirectories using
* the main-script parameter, as repository assets may exist outside the script's
* directory but still be part of the repository.
*
* @param path
* The item to check.
* @param assetRoot
* Location where assets are being stored (the repository root).
* @return
* {@code true} if the path is included in the pipeline Git repository,
* {@code false} otherwise.
*
* @see <a href="https://github.com/nextflow-io/nextflow/issues/6604">Issue #6604</a>
* @see <a href="https://github.com/nextflow-io/nextflow/pull/6605">PR #6605</a>
*/
static protected boolean isAssetFile(Path path) {
static protected boolean isAssetFile(Path path, File assetRoot) {
final ISession session = Global.getSession();
if( session==null )
return false;
Expand All @@ -521,8 +544,11 @@ static protected boolean isAssetFile(Path path) {
// if the file belong to different file system, cannot be a file belonging to the repo
if( session.getBaseDir().getFileSystem()!=path.getFileSystem() )
return false;
// if the file is in the same directory as the base dir it's a asset by definition
return path.startsWith(session.getBaseDir());
// Check both the script's base directory and the repository root.
// This handles cases where a workflow is executed from a subdirectory
// (using the main-script parameter) but references assets elsewhere in the repo.
// The assetRoot check ensures these non-sibling assets are still recognized.
return path.startsWith(session.getBaseDir()) || path.startsWith(assetRoot.toPath());
}

}
28 changes: 25 additions & 3 deletions modules/nf-commons/src/test/nextflow/util/HashBuilderTest.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,11 @@ class HashBuilderTest extends Specification {
def 'should validate is asset file'() {
when:
def BASE = Paths.get("/some/pipeline/dir")
def ROOT = new File("/some/pipeline/")
and:
Global.session = Mock(Session) { getBaseDir() >> BASE }
then:
!HashBuilder.isAssetFile(BASE.resolve('foo'))
!HashBuilder.isAssetFile(BASE.resolve('foo'), ROOT)


when:
Expand All @@ -85,9 +86,30 @@ class HashBuilderTest extends Specification {
getCommitId() >> '123456'
}
then:
HashBuilder.isAssetFile(BASE.resolve('foo'))
HashBuilder.isAssetFile(BASE.resolve('foo'), ROOT)
and:
!HashBuilder.isAssetFile(Paths.get('/other/dir'))
!HashBuilder.isAssetFile(Paths.get('/other/dir'), ROOT)
}

def 'should validate is asset file when not part of base directory'() {
given:
Global.session = Mock(Session) {
getBaseDir() >> Paths.get(BASE)
getCommitId() >> COMMIT_ID
}

expect:
HashBuilder.isAssetFile(Paths.get(PATH), new File(ROOT)) == EXPECTED

where:
BASE | ROOT | COMMIT_ID | PATH | EXPECTED
"/some/pipeline/dir" | "/some/pipeline/" | null | "/some/pipeline/dir/foo" | false
"/some/pipeline/dir" | "/some/pipeline/" | '123456' | '/other/dir' | false
"/some/pipeline/dir" | "/some/pipeline/" | '123456' | '/some/pipeline/foo' | true
and:
"/this/pipeline" | "/that/pipeline/" | '123456' | '/other/pipeline/foo' | false
"/this/pipeline" | "/that/pipeline/" | '123456' | '/this/pipeline/foo' | true
"/this/pipeline" | "/that/pipeline/" | '123456' | '/that/pipeline/foo' | true
}

def 'should hash file content'() {
Expand Down
Loading