# Example usage

To use `nf_parser` in a project:

In [None]:
import nf_parser

print(nf_parser.__version__)

In [194]:
## test passing
test_1 = """#!/usr/bin/env nextflow
 
/*
 * Defines the pipeline input parameters (with a default value for each one).
 * Each of the following parameters can be specified as command line options.
 */
params.query = "$baseDir/data/sample.fa"
params.db = "$baseDir/blast-db/pdb/tiny"
params.out = "result.txt"
params.chunkSize = 100
 
db_name = file(params.db).name
db_dir = file(params.db).parent
 

nextflow.enable.dsl=2

include { foo } from './module/1.nf'
include { BAR } from './module/2.nf'

db_name = file(params.db).name

//def sayHello() {
//    println "$params.foo $params.bar"
//}

Channel.fromPath(params.query)

/*
 * Defines the pipeline input parameters (with a default value for each one).
 * Each of the following parameters can be specified as command line options.
 */
params.in = "$baseDir/data/sample.fa" // path to fa file
// some comment
params.query = "$baseDir/data/sample.fa"
params.db = "$baseDir/blast-db/pdb/tiny"
params.out = "result.txt"
params.chunkSize = 100

process foo {
    container 'python'

    output:
    path 'sample.txt'

    shell:
    '''
    echo 'hello' >  sample.txt
    '''
}

workflow  { 

    foo()
}

workflow jimmy {
    foo()
    /*
     * Create a channel emitting the given query fasta file(s).
     * Split the file into chunks containing as many sequences as defined by the parameter 'chunkSize'.
     * Finally, assign the resulting channel to the variable 'ch_fasta'
     */
    Channel
        .fromPath(params.query)
        .splitFasta(by: params.chunkSize, file:true)
        .set { ch_fasta }
    
    /*
     * Execute a BLAST job for each chunk emitted by the 'ch_fasta' channel
     * and emit the resulting BLAST matches.
     */
    ch_hits = blast(ch_fasta, db_dir)
 

    // define a channel emitting three values
    source = Channel.of( 'alpha', 'beta', 'delta' )
    
    
    /*
     * Execute a BLAST job for each chunk emitted by the 'ch_fasta' channel
     * and emit the resulting BLAST matches.
     */
    ch_sequences
        .collectFile(name: params.out)
        .view { file -> "matching sequences:\n ${file.text}" }
    
    Channel
        .watchPath( '/path/*.fa' )
        .subscribe { println "Fasta file: $it" }

    Channel
        .watchPath( '/path/*.fa', 'create,modify' )
        .subscribe { println "File created or modified: $it" }

    
    Channel
        .fromPath(params.query)
        .branch{ }
        .splitFasta(by: params.chunkSize, file:true)
        .splitFasta{ }
        .set{ ch_fasta }
        .collect()
        .view { file -> "matching sequences:\n ${file.text}" }

    ch_hits = blast(ch_fasta, db_dir)
        
 
}

"""

In [None]:
workflow  { 

    ch_sequences
        .collectFile(name: params.out)
        .view { file -> "matching sequences:\n ${file.text}" }
}

In [198]:

test_2 = ''' #!/usr/bin/env nextflow
 
/*
 * Defines the pipeline input parameters (with a default value for each one).
 * Each of the following parameters can be specified as command line options.
 */
params.query = "$baseDir/data/sample.fa"
params.db = "$baseDir/blast-db/pdb/tiny"
params.out = "result.txt"
params.chunkSize = 100
 
db_name = file(params.db).name
db_dir = file(params.db).parent
 
 
workflow {
    /*
     * Create a channel emitting the given query fasta file(s).
     * Split the file into chunks containing as many sequences as defined by the parameter 'chunkSize'.
     * Finally, assign the resulting channel to the variable 'ch_fasta'
     */
    Channel
        .fromPath(params.query)
        .splitFasta(by: params.chunkSize, file:true)
        .set { ch_fasta }
 
    /*
     * Execute a BLAST job for each chunk emitted by the 'ch_fasta' channel
     * and emit the resulting BLAST matches.
     */
    ch_hits = blast(ch_fasta, db_dir)
 
    /*
     * Each time a file emitted by the 'blast' process, an extract job is executed,
     * producing a file containing the matching sequences.
     */
    ch_sequences = extract(ch_hits, db_dir)
 
    /*
     * Collect all the sequences files into a single file
     * and print the resulting file contents when complete.
     */
    ch_sequences
        .collectFile(name: params.out)
        .view { file -> "matching sequences:\n ${file.text}" }
}
 
 
process blast {
    input:
    path 'query.fa'
    path db
 
    output:
    path 'top_hits'
 
    """
    blastp -db $db/$db_name -query query.fa -outfmt 6 > blast_result
    cat blast_result | head -n 10 | cut -f 2 > top_hits
    """
}
 
 
process extract {
    input:
    path 'top_hits'
    path db
 
    output:
    path 'sequences'
 
    """
    blastdbcmd -db $db/$db_name -entry_batch top_hits | head -n 10 > sequences
    """
}
'''

In [201]:
## # https://chat.openai.com/share/fb9d0650-3f66-4104-bff3-8697d5aeaf2e

from pathlib import Path
from lark import Lark

test_file = test_1 
#test_file = test_2 
#test_file = test_2Channel\.(fromList|fromPath|fromFilePairs|fromSRA|from|of|empty|value|watchPath)\(((\S+)(\s+)?,?)?\) 
#nextflow_grammar = Path("../nf_parser/grammer/nf.lark").read_text()
nextflow_grammar = Path("./grammers/nf.lark").read_text()
parser = Lark(nextflow_grammar, parser='lalr')
tree = parser.parse(test_file)
#print(tree)
print(tree.pretty())

start
  shebang	#!/usr/bin/env nextflow
  comment	/*
 * Defines the pipeline input parameters (with a default value for each one).
 * Each of the following parameters can be specified as command line options.
 */
  param
    query
    string	"$baseDir/data/sample.fa"
  param
    db
    string	"$baseDir/blast-db/pdb/tiny"
  param
    out
    string	"result.txt"
  param
    chunkSize
    int	100
  function_call	db_name = file(params.db).name
  function_call	db_dir = file(params.db).parent
  dsl	2
  module_import
    foo
    './module/1.nf'
  module_import
    BAR
    './module/2.nf'
  function_call	db_name = file(params.db).name
  comment	//def sayHello() {
  comment	//    println "$params.foo $params.bar"
  comment	//}
  channel	

Channel.fromPath(params.query)
  comment	/*
 * Defines the pipeline input parameters (with a default value for each one).
 * Each of the following parameters can be specified as command line options.
 */
  param
    in
    string	"$baseDir/data/sample.fa"
    