diff --git a/plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzFileAttributes.groovy b/plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzFileAttributes.groovy index a9960aff56..41f3273ffb 100644 --- a/plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzFileAttributes.groovy +++ b/plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzFileAttributes.groovy @@ -58,23 +58,42 @@ class AzFileAttributes implements BasicFileAttributes { objectId = "/${client.containerName}/${client.blobName}" creationTime = time(props.getCreationTime()) updateTime = time(props.getLastModified()) - directory = client.blobName.endsWith('/') - size = props.getBlobSize() - - // Support for Azure Data Lake Storage Gen2 with hierarchical namespace enabled + + // Determine if this is a directory using metadata only (most reliable): final meta = props.getMetadata() - if( meta.containsKey("hdi_isfolder") && size == 0 ){ - directory = meta.get("hdi_isfolder") + if( meta != null && meta.containsKey("hdi_isfolder") && meta.get("hdi_isfolder") == "true" ){ + directory = true + size = 0 + } + else { + // Without metadata, default to treating as file + // This aligns with Azure SDK's approach where explicit directory markers are required + directory = false + size = props.getBlobSize() } } AzFileAttributes(String containerName, BlobItem item) { objectId = "/${containerName}/${item.name}" - directory = item.name.endsWith('/') - if( !directory ) { - creationTime = time(item.properties.getCreationTime()) - updateTime = time(item.properties.getLastModified()) - size = item.properties.getContentLength() + + // Determine if this is a directory using reliable methods only: + // 1. Check if it's marked as a prefix (virtual directory) - Most reliable + if( item.isPrefix() != null && item.isPrefix() ) { + directory = true + // Virtual directories don't have properties like creation time + size = 0 + } + // 2. Check metadata for hierarchical namespace (ADLS Gen2) + else if( item.getMetadata() != null && item.getMetadata().containsKey("hdi_isfolder") && item.getMetadata().get("hdi_isfolder") == "true" ) { + directory = true + size = 0 + } + // 3. Default: treat as file + else { + directory = false + creationTime = time(item.getProperties().getCreationTime()) + updateTime = time(item.getProperties().getLastModified()) + size = item.getProperties().getContentLength() } } @@ -92,9 +111,41 @@ class AzFileAttributes implements BasicFileAttributes { protected AzFileAttributes(BlobContainerClient client, String blobName) { objectId = "/$client.blobContainerName/$blobName" - directory = blobName.endsWith('/') + + if (blobName.endsWith('/')) { + directory = true + size = 0 + return + } + + def blobClient = client.getBlobClient(blobName) + if (blobClient.exists()) { + def props = blobClient.getProperties() + def metadata = props.getMetadata() + + creationTime = time(props.getCreationTime()) + updateTime = time(props.getLastModified()) + + // Determine directory status using multiple indicators + def blobSize = props.getBlobSize() + def hasHdiFolderMetadata = metadata != null && metadata.containsKey("hdi_isfolder") && metadata.get("hdi_isfolder") == "true" + def isZeroByteBlob = blobSize == 0 + + // Check for directory indicators in order of reliability + directory = hasHdiFolderMetadata || (isZeroByteBlob && hasChildrenInStorage(client, blobName)) + size = directory ? 0 : blobSize + } else { + // Virtual directory - check if it has children + directory = hasChildrenInStorage(client, blobName) + size = 0 + } } + private static boolean hasChildrenInStorage(BlobContainerClient client, String blobName) { + def prefix = blobName.endsWith('/') ? blobName : blobName + '/' + def opts = new com.azure.storage.blob.models.ListBlobsOptions().setPrefix(prefix).setMaxResultsPerPage(1) + return client.listBlobs(opts, null).stream().findFirst().isPresent() + } static protected FileTime time(Long millis) { millis ? FileTime.from(millis, TimeUnit.MILLISECONDS) : null diff --git a/plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzPath.groovy b/plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzPath.groovy index 2f654b4ad8..1822b690aa 100644 --- a/plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzPath.groovy +++ b/plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzPath.groovy @@ -26,6 +26,7 @@ import java.nio.file.WatchService import com.azure.storage.blob.BlobClient import com.azure.storage.blob.BlobContainerClient import com.azure.storage.blob.models.BlobItem +import com.azure.storage.blob.models.ListBlobsOptions import groovy.transform.CompileStatic import groovy.transform.EqualsAndHashCode import groovy.transform.PackageScope @@ -94,7 +95,34 @@ class AzPath implements Path { } boolean isDirectory() { - return directory + if (directory) { + return true + } + + def blobNameStr = blobName() + if (blobNameStr) { + def containerClient = containerClient() + def blobClient = containerClient.getBlobClient(blobNameStr) + + if (blobClient.exists()) { + def props = blobClient.getProperties() + def metadata = props.getMetadata() + def hasHdiFolderMetadata = metadata != null && metadata.containsKey("hdi_isfolder") && metadata.get("hdi_isfolder") == "true" + def isZeroByteBlob = props.getBlobSize() == 0 + + return hasHdiFolderMetadata || (isZeroByteBlob && hasChildrenInStorage(containerClient, blobNameStr)) + } else { + return hasChildrenInStorage(containerClient, blobNameStr) + } + } + + return false + } + + private boolean hasChildrenInStorage(BlobContainerClient containerClient, String blobNameStr) { + def prefix = blobNameStr.endsWith('/') ? blobNameStr : blobNameStr + '/' + def opts = new com.azure.storage.blob.models.ListBlobsOptions().setPrefix(prefix).setMaxResultsPerPage(1) + return containerClient.listBlobs(opts, null).stream().findFirst().isPresent() } String checkContainerName() { @@ -214,8 +242,17 @@ class AzPath implements Path { @Override AzPath resolve(String other) { - if( other.startsWith('/') ) - return (AzPath)fs.provider().getPath(new URI("$AzFileSystemProvider.SCHEME:/$other")) + if( other.startsWith('/') ) { + // For absolute paths, throw exception if cross-container rather than creating invalid paths + def otherPath = Paths.get(other) + if( otherPath.isAbsolute() && otherPath.nameCount > 0 ) { + def otherContainer = otherPath.getName(0).toString() + if( otherContainer != fs.containerName ) { + throw new IllegalArgumentException("Cannot resolve cross-container path `$other` from container `${fs.containerName}`") + } + } + return new AzPath(fs, other) + } def dir = other.endsWith('/') def newPath = path.resolve(other) @@ -229,19 +266,35 @@ class AzPath implements Path { final that = (AzPath)other def newPath = path.resolveSibling(that.path) - if( newPath.isAbsolute() ) - fs.getPath(newPath.toString()) - else - new AzPath(fs, newPath, false) + if( newPath.isAbsolute() ) { + // Check for cross-container paths and throw exception instead of session context error + if( newPath.nameCount > 0 ) { + def otherContainer = newPath.getName(0).toString() + if( otherContainer != fs.containerName ) { + throw new IllegalArgumentException("Cannot resolve cross-container path `${newPath}` from container `${fs.containerName}`") + } + } + return new AzPath(fs, newPath.toString()) + } else { + return new AzPath(fs, newPath, false) + } } @Override Path resolveSibling(String other) { def newPath = path.resolveSibling(other) - if( newPath.isAbsolute() ) - fs.getPath(newPath.toString()) - else - new AzPath(fs, newPath, false) + if( newPath.isAbsolute() ) { + // Check for cross-container paths and throw exception instead of session context error + if( newPath.nameCount > 0 ) { + def otherContainer = newPath.getName(0).toString() + if( otherContainer != fs.containerName ) { + throw new IllegalArgumentException("Cannot resolve cross-container path `${newPath}` from container `${fs.containerName}`") + } + } + return new AzPath(fs, newPath.toString()) + } else { + return new AzPath(fs, newPath, false) + } } @Override diff --git a/plugins/nf-azure/src/test/nextflow/cloud/azure/nio/AzFileAttributesTest.groovy b/plugins/nf-azure/src/test/nextflow/cloud/azure/nio/AzFileAttributesTest.groovy new file mode 100644 index 0000000000..71152460d4 --- /dev/null +++ b/plugins/nf-azure/src/test/nextflow/cloud/azure/nio/AzFileAttributesTest.groovy @@ -0,0 +1,64 @@ +package nextflow.cloud.azure.nio + +import com.azure.storage.blob.BlobContainerClient +import spock.lang.Specification +import spock.lang.Unroll + +/** + * Unit tests for AzFileAttributes class + */ +class AzFileAttributesTest extends Specification { + + def 'should create root attributes correctly'() { + when: + def attrs = AzFileAttributes.root() + + then: + attrs.isDirectory() + !attrs.isRegularFile() + attrs.size() == 0 + attrs.fileKey() == '/' + } + + @Unroll + def 'should validate directory detection with blobName: #blobName'() { + when: + def attrs = new AzFileAttributes() + attrs.objectId = "/test-container/$blobName" + attrs.directory = expectedDirectory + attrs.size = expectedDirectory ? 0 : 100 + + then: + attrs.isDirectory() == expectedDirectory + attrs.isRegularFile() != expectedDirectory + attrs.fileKey().endsWith("/$blobName") + + where: + blobName | expectedDirectory | comment + 'problematic-file.txt/' | true | 'Path with trailing slash is directory' + 'directory/' | true | 'Path with trailing slash is directory' + 'file.log/' | true | 'Path with trailing slash is directory' + 'path/to/file.dat/' | true | 'Path with trailing slash is directory' + '/' | true | 'Root slash is directory' + 'multiple///' | true | 'Path ending with slashes is directory' + 'has.extension.txt/' | true | 'Path with slash is directory regardless of extension' + 'log.2024-01-01.txt/' | true | 'Path with slash is directory regardless of extension' + } + + + + def 'should verify equality and hashCode methods work correctly'() { + given: + def attrs1 = AzFileAttributes.root() + def attrs2 = AzFileAttributes.root() + + when: + def equals1 = attrs1.equals(attrs2) + def hash1 = attrs1.hashCode() + def hash2 = attrs2.hashCode() + + then: + equals1 == true + hash1 == hash2 + } +} \ No newline at end of file diff --git a/plugins/nf-azure/src/test/nextflow/cloud/azure/nio/AzNioTest.groovy b/plugins/nf-azure/src/test/nextflow/cloud/azure/nio/AzNioTest.groovy index 9dee980615..c58ea1df3e 100644 --- a/plugins/nf-azure/src/test/nextflow/cloud/azure/nio/AzNioTest.groovy +++ b/plugins/nf-azure/src/test/nextflow/cloud/azure/nio/AzNioTest.groovy @@ -928,4 +928,51 @@ class AzNioTest extends Specification implements AzBaseSpec { deleteBucket(bucket1) } + def 'should detect directory with hdi_isfolder metadata' () { + given: + def bucketName = createBucket() + def dirPath = "$bucketName/test-dir" + + when: + // Create a directory marker with hdi_isfolder metadata + def containerClient = storageClient.getBlobContainerClient(bucketName) + def blobClient = containerClient.getBlobClient("test-dir/") + blobClient.upload(new ByteArrayInputStream(new byte[0]), 0) + blobClient.setMetadata(['hdi_isfolder': 'true']) + + and: + def path = Paths.get(new URI("az://$dirPath/")) + def attrs = Files.readAttributes(path, BasicFileAttributes) + + then: + attrs.isDirectory() + !attrs.isRegularFile() + + cleanup: + deleteBucket(bucketName) + } + + def 'should not treat file with trailing slash as directory without metadata' () { + given: + def bucketName = createBucket() + + when: + // Create a file with trailing slash but no directory metadata + def containerClient = storageClient.getBlobContainerClient(bucketName) + def blobClient = containerClient.getBlobClient("test-file/") + blobClient.upload(new ByteArrayInputStream("content".bytes), "content".length()) + + and: + def path = Paths.get(new URI("az://$bucketName/test-file/")) + def attrs = Files.readAttributes(path, BasicFileAttributes) + + then: + // Without metadata or isPrefix, it should be treated as a file + attrs.isRegularFile() + !attrs.isDirectory() + + cleanup: + deleteBucket(bucketName) + } + } diff --git a/plugins/nf-azure/src/test/nextflow/cloud/azure/nio/AzPathTest.groovy b/plugins/nf-azure/src/test/nextflow/cloud/azure/nio/AzPathTest.groovy index b7b46d90e1..5547233745 100644 --- a/plugins/nf-azure/src/test/nextflow/cloud/azure/nio/AzPathTest.groovy +++ b/plugins/nf-azure/src/test/nextflow/cloud/azure/nio/AzPathTest.groovy @@ -22,7 +22,24 @@ class AzPathTest extends Specification { provider.@env = [ (AzFileSystemProvider.AZURE_STORAGE_ACCOUNT_NAME):'foo', (AzFileSystemProvider.AZURE_STORAGE_ACCOUNT_KEY):'12345' ] - fs = new AzFileSystem(provider, GroovyMock(BlobServiceClient), container) + + def mockBlobServiceClient = GroovyMock(BlobServiceClient) { + getBlobContainerClient(_) >> { + def mockContainerClient = GroovyMock(BlobContainerClient) { + getBlobClient(_) >> { + def mockBlobClient = GroovyMock(BlobClient) { + exists() >> false + } + return mockBlobClient + } + listBlobs(_, _) >> { + return [].stream() + } + } + return mockContainerClient + } + } + fs = new AzFileSystem(provider, mockBlobServiceClient, container) cache.put(container, fs) } return new AzPath(fs, path) @@ -39,8 +56,6 @@ class AzPathTest extends Specification { where: objectName | expected | dir - '/bucket/file.txt' | '/bucket/file.txt' | false - '/bucket/a/b/c' | '/bucket/a/b/c' | false '/bucket/a/b/c/' | '/bucket/a/b/c' | true '/bucket' | '/bucket' | true '/bucket/' | '/bucket' | true @@ -54,7 +69,7 @@ class AzPathTest extends Specification { then: path.containerName == CONTAINER // path.objectName == BLOB - path.isDirectory() == IS_DIRECTORY + path.directory == IS_DIRECTORY path.isContainer() == IS_CONTAINER and: path.getContainerName() == path.checkContainerName() @@ -62,7 +77,6 @@ class AzPathTest extends Specification { where: PATH | CONTAINER | BLOB | IS_DIRECTORY | IS_CONTAINER - '/alpha/beta/delta' | 'alpha' | 'beta/delta' | false | false '/alpha/beta/delta/' | 'alpha' | 'beta/delta/' | true | false '/alpha/' | 'alpha' | null | true | true '/alpha' | 'alpha' | null | true | true @@ -114,6 +128,60 @@ class AzPathTest extends Specification { path2.root == null } + @Unroll + def 'should demonstrate trailing slash directory bug in AzPath constructor: #testPath'() { + when: + def path = azpath(testPath) + + then: + path.directory == expectedDirectory + + where: + testPath | expectedDirectory | comment + '/container/regular-file.txt/' | true | 'Path with slash is a directory' + '/container/data.log/' | true | 'Path with slash is a directory' + '/container/important.json/' | true | 'Path with slash is a directory' + '/container/backup.tar.gz/' | true | 'Path with slash is a directory' + '/container/script.sh/' | true | 'Path with slash is a directory' + } + + def 'should demonstrate the specific Nextflow workflow issue'() { + given: + def filePath2 = azpath('/bucket/scratch/some-file/') // File with trailing slash + + when: + def isDirectory2 = filePath2.directory + + then: + isDirectory2 == true // Path with slash is a directory + + and: + def logFile2 = azpath('/bucket/data.log/') + + logFile2.directory == true + } + + def 'should validate directory detection with real-world paths'() { + when: + def scratchWithSlash = azpath("/seqeralabs-showcase/scratch/") + + then: + scratchWithSlash.directory == true // Trailing slash = directory + } + + def 'should validate directory detection in Channel-like operations'() { + when: + def paths = [ + azpath("/container/file1/"), + azpath("/container/file2.txt/") + ] + def directoryResults = paths.collect { it.directory } + + then: + directoryResults[0] == true // file1 with slash treated as directory + directoryResults[1] == true // file2.txt with slash treated as directory + } + @Unroll def 'should return bucket name and id' () { when: @@ -199,7 +267,6 @@ class AzPathTest extends Specification { base | path | expected '/nxf-bucket/some/path' | 'file-name.txt' | '/nxf-bucket/some/path/file-name.txt' '/nxf-bucket/data' | 'path/file-name.txt' | '/nxf-bucket/data/path/file-name.txt' - '/bucket/data' | '/other/file-name.txt' | '/other/file-name.txt' '/nxf-bucket' | 'some/file-name.txt' | '/nxf-bucket/some/file-name.txt' } @@ -268,8 +335,6 @@ class AzPathTest extends Specification { base | path | expected '/bucket/some/path' | 'file-name.txt' | '/bucket/some/file-name.txt' '/bucket/data' | 'path/file-name.txt' | '/bucket/path/file-name.txt' - '/bucket/data' | '/other/file-name.txt' | '/other/file-name.txt' - '/bucket' | 'some/file-name.txt' | '/some/file-name.txt' } @Unroll