-
Notifications
You must be signed in to change notification settings - Fork 6
add groovy function to validate blastn database format and busco line… #204
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from all commits
4f37b9d
8a2bc7a
d896581
39351fa
396cc57
0944d01
535a662
9ba7304
23fd0ac
3887e85
c6d1906
53b39eb
cdfdd77
f999572
a72c1bb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,8 +3,6 @@ | |
| // | ||
|
|
||
| include { samplesheetToList } from 'plugin/nf-schema' | ||
|
|
||
|
|
||
| include { UNTAR } from '../../modules/nf-core/untar/main' | ||
| include { CAT_CAT } from '../../modules/nf-core/cat/cat/main' | ||
| include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' | ||
|
|
@@ -45,6 +43,14 @@ workflow INPUT_CHECK { | |
| .map { db_meta, db_path -> | ||
| if (db_meta.type in ["blastp", "blastx"] && db_path.isDirectory()) { | ||
| [db_meta, file(db_path.toString() + "/${db_path.name}", checkIfExists: true)] | ||
| } else if (db_meta.type == "blastn") { | ||
| // Special handling for BLAST nucleotide databases | ||
| def (resolved_path, db_name) = validateBlastnDatabase(db_path) | ||
| [db_meta, resolved_path] | ||
| } else if (db_meta.type == "busco") { | ||
| // Special handling for BUSCO databases | ||
| def resolved_path = validateBuscoDatabase(db_path) | ||
| [db_meta, resolved_path] | ||
| } else { | ||
| [db_meta, db_path] | ||
| } | ||
|
|
@@ -272,3 +278,180 @@ def get_read_counts ( stats ) { | |
|
|
||
| return read_count_meta | ||
| } | ||
|
|
||
| /* | ||
| * Function to validate and resolve BUSCO database paths | ||
| * Handles the common user error of including '/lineages' at the end of the path | ||
| */ | ||
| def validateBuscoDatabase(db_path) { | ||
| def path_file = file(db_path) | ||
| if (path_file.isDirectory()) { | ||
| // Check if path ends with /lineages and has a parent directory | ||
| if (path_file.name == 'lineages' && path_file.parent != null) { | ||
| def parent_dir = file(path_file.parent) | ||
| log.info "BUSCO path correction: Detected '/lineages' suffix in path" | ||
| log.info " Original path: ${path_file}" | ||
| log.info " Corrected path: ${parent_dir}" | ||
| log.info "This prevents the common error where BUSCO tries to use '${path_file}/lineages/lineage_name' instead of '${parent_dir}/lineages/lineage_name'" | ||
| return parent_dir | ||
| } | ||
| // Check if path points to a specific lineage directory (e.g., eukaryota_odb10) | ||
| else if (path_file.name.endsWith('_odb10') && path_file.parent != null) { | ||
| def parent_dir = file(path_file.parent) | ||
| // Check if parent is 'lineages' - if so, we need to go up two levels | ||
| if (parent_dir.name == 'lineages' && parent_dir.parent != null) { | ||
| def busco_root = file(parent_dir.parent) | ||
| log.info "BUSCO path correction: Detected specific lineage directory in path" | ||
| log.info " Original path: ${path_file} (specific lineage: ${path_file.name})" | ||
| log.info " Corrected path: ${busco_root}" | ||
| log.info "This prevents the error where BUSCO tries to use a specific lineage directory instead of the root BUSCO database directory" | ||
| log.warn "Use `--busco_lineages ${path_file.name}` to control the lineage" | ||
| return busco_root | ||
| } else { | ||
| error """ | ||
| ERROR: Invalid BUSCO lineage directory structure: ${path_file} | ||
| It appears you're pointing to a specific BUSCO lineage directory (${path_file.name}), | ||
| but the expected directory structure is: | ||
| /path/to/busco_downloads/lineages/${path_file.name}/ | ||
| Please provide the path to the root BUSCO database directory. | ||
| Example: --busco /path/to/busco_downloads/ | ||
| """ | ||
| } | ||
| } else { | ||
| // Path looks correct, return as-is | ||
| log.info "Using BUSCO database path: ${path_file}" | ||
| return path_file | ||
| } | ||
yumisims marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } else { | ||
| error """ | ||
| ERROR: Invalid BUSCO database path: ${path_file} | ||
| BUSCO databases must be directories containing the 'lineages/' subdirectory. | ||
| Please ensure the path points to a valid BUSCO database directory. | ||
| Common issues: | ||
| - Path should point to the directory containing 'lineages/' subdirectory | ||
| - Do NOT include '/lineages' at the end of the path | ||
| - Do NOT point to a specific lineage directory (e.g., eukaryota_odb10) | ||
| - BUSCO databases cannot be individual files | ||
| Example: --busco /path/to/busco_downloads/ | ||
| NOT: --busco /path/to/busco_downloads/lineages/ | ||
| NOT: --busco /path/to/busco_downloads/lineages/eukaryota_odb10/ | ||
| """ | ||
| } | ||
| } | ||
|
|
||
| /* | ||
| * Function to validate and resolve BLAST nucleotide database paths | ||
| * Handles both directory paths (for backwards compatibility) and direct .nal file paths | ||
| */ | ||
| def validateBlastnDatabase(db_path) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's still a problem with the way the function is used. When people mix multiple Blast databases under the same directory (like in #184), yes, they can now refer to one
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the db_name parameter is now implemented and flows from the validation function to the BLAST module. |
||
| def path_file = file(db_path) | ||
| if (path_file.isFile()) { | ||
| // Direct file provided - validate it's a .nal file and create isolated directory | ||
| if (path_file.name.endsWith('.nal')) { | ||
| if (!path_file.exists()) { | ||
| error """ | ||
| ERROR: BLAST database file not found: ${path_file} | ||
| Please check that the path is correct and the file exists. | ||
| """ | ||
| } | ||
| def parent_dir = file(path_file.parent) | ||
| def db_name = path_file.name.replaceAll('\\.nal$', '') | ||
|
|
||
| // Create a temporary directory in the system temp folder with a UUID to avoid | ||
| // writing into the database parent directory | ||
| def uuid = java.util.UUID.randomUUID().toString() | ||
| // Create isolated directory inside the pipeline working directory | ||
| def temp_dir = file("${System.getProperty('user.dir')}/.btk_isolated_${db_name}_${uuid}") | ||
| if (!temp_dir.exists()) { | ||
| temp_dir.mkdirs() | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Funnily, this doesn't seem to complain if the directory can't be created. It just carries on (and the pipeline fails later). $ nextflow run -profile sanger,singularity,test --blastn /data/tol/resources/nt/latest/nt.nal
(...)
Direct BLAST database file specified: /data/tol/resources/nt/latest/nt.nal
Database name: nt
Created isolated directory: /data/tol/resources/nt/latest/.btk_isolated_nt
This ensures only the specified database is available to BLAST
(...)
Execution cancelled -- Finishing pending tasks before exit
-[sanger-tol/blobtoolkit] Pipeline completed with errors-
(...)
Command error:
(...)
File "/nfs/users/nfs_m/mm49/workspace/tol-it/nextflow/sanger-tol/blobtoolkit_param/bin/generate_config.py", line 230, in adjust_taxon_id
con = sqlite3.connect(os.path.join(nt, "taxonomy4blast.sqlite3"))
sqlite3.OperationalError: unable to open database file
(...)
$ ls -ld /data/tol/resources/nt/latest/.btk_isolated_nt
ls: cannot access '/data/tol/resources/nt/latest/.btk_isolated_nt': No such file or directory |
||
| } | ||
|
|
||
| // Find all files belonging to this specific database | ||
| def db_files = parent_dir.listFiles().findAll { | ||
| it.name.startsWith("${db_name}.") || | ||
| it.name in ['taxdb.btd', 'taxdb.bti', 'taxonomy4blast.sqlite3'] | ||
| } | ||
|
|
||
| // Create symlinks in the temporary directory | ||
| db_files.each { source_file -> | ||
| def link_file = file("${temp_dir}/${source_file.name}") | ||
| if (!link_file.exists()) { | ||
| // Create symbolic link | ||
| link_file.createLink(source_file) | ||
| } | ||
| } | ||
|
|
||
| log.info "Direct BLAST database file specified: ${path_file}" | ||
| log.info "Database name: ${db_name}" | ||
| log.info "Created isolated directory: ${temp_dir}" | ||
| log.info "This ensures only the specified database is available to BLAST" | ||
| return [temp_dir, db_name] | ||
| } else { | ||
| error """ | ||
| ERROR: Invalid BLAST database file: ${path_file} | ||
| The file must have a .nal extension. | ||
| Please provide either: | ||
| - A directory containing a single BLAST database | ||
| - The direct path to a .nal file | ||
| Example: --blastn /path/to/databases/nt.nal | ||
| """ | ||
| } | ||
| } else if (path_file.isDirectory()) { | ||
| // Directory provided - require the user to specify the database prefix | ||
| log.info "BLAST database directory provided: ${path_file}" | ||
| def prefix = (this.binding.hasVariable('params') && params.containsKey('ntdb_prefix')) ? params.ntdb_prefix : null | ||
| if (!prefix) { | ||
| error """ | ||
| ERROR: A BLAST database directory was provided (${path_file}) but no database prefix was supplied. | ||
| The pipeline requires you to select which database inside the directory to use. | ||
| Please provide the database prefix (basename without extension) using --ntdb_prefix. | ||
| Example: --blastn ${path_file} --ntdb_prefix nt (will select ${path_file}/nt.nal) | ||
| """ | ||
| } | ||
|
|
||
| // Look for the requested .nal file inside the directory | ||
| def expected_name = "${prefix}.nal" | ||
| def expected_file = path_file.listFiles().find { it.name == expected_name } | ||
| if (!expected_file) { | ||
| error """ | ||
| ERROR: Requested BLAST database prefix '${prefix}' not found in ${path_file} | ||
| Expected file: ${path_file}/${expected_name} | ||
| Please ensure the prefix passed with --ntdb_prefix matches a .nal file in the directory. | ||
| """ | ||
| } | ||
|
|
||
| // Create isolated directory with symlinks to the chosen database files | ||
| def parent_dir = file(expected_file.parent) | ||
| def db_name = expected_file.name.replaceAll('\\.nal$', '') | ||
| def uuid = java.util.UUID.randomUUID().toString() | ||
| // Create isolated directory inside the pipeline working directory | ||
| def temp_dir = file("${System.getProperty('user.dir')}/.btk_isolated_${db_name}_${uuid}") | ||
| if (!temp_dir.exists()) { | ||
| temp_dir.mkdirs() | ||
| } | ||
|
|
||
| def db_files = parent_dir.listFiles().findAll { | ||
| it.name.startsWith("${db_name}.") || | ||
| it.name in ['taxdb.btd', 'taxdb.bti', 'taxonomy4blast.sqlite3'] | ||
| } | ||
|
|
||
| db_files.each { source_file -> | ||
| def link_file = file("${temp_dir}/${source_file.name}") | ||
| if (!link_file.exists()) { | ||
| link_file.createLink(source_file) | ||
| } | ||
| } | ||
|
|
||
| log.info "Using BLAST database '${db_name}' from directory: ${path_file}" | ||
| log.info "Created isolated directory: ${temp_dir}" | ||
| return [temp_dir, db_name] | ||
| } else { | ||
| error """ | ||
| ERROR: Invalid database path: ${path_file} | ||
| The path must point to either: | ||
| - A directory containing a single BLAST database | ||
| - A direct path to a .nal file | ||
| Example: --blastn /path/to/databases/nt.nal | ||
| """ | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.