# Command-Line Interface

> Command-line interface for OligoSeeker

In [None]:
#| default_exp cli

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import argparse
import sys
import logging
import os
from typing import List, Optional

from OligoSeeker.pipeline import PipelineConfig, OligoCodonPipeline

  from pandas.core import (


## Argument Parser

In [None]:
#| export
def create_parser() -> argparse.ArgumentParser:
    """Create command-line argument parser.
    
    Returns:
        Configured argument parser
    """
    parser = argparse.ArgumentParser(
        description="OligoSeeker: Process FASTQ files to count oligo codons",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    
    # Required arguments
    parser.add_argument('--f1', '--fastq_1', dest='fastq_path_1', required=True,
                        default="../test_fastq_files/test_1.fq.gz",
                        help="Path to FASTQ 1 file")
    
    parser.add_argument('--f2', '--fastq_2', dest='fastq_path_2', required=True,
                        default="../test_fastq_files/test_2.fq.gz",
                        help="Path to FASTQ 2 file")   
    
    # Oligo source (at least one required)
    oligo_group = parser.add_argument_group("Oligo Source Options (one required)")
    
    oligo_group.add_argument('--oligos-file', dest='oligos_file',
                           help="File containing oligo sequences (one per line)")
    
    oligo_group.add_argument('--oligos', dest='oligos_string',
                             default="GCGGATTACATTNNNAAATAACATCGT,TGTGGTAAGCGGNNNGAAAGCATTTGT,GTCGTAGAAAATNNNTGGGTGATGAGC",
                           help="Comma-separated list of oligo sequences")
    
    # Output options
    parser.add_argument('-o', '--output', dest='output_path', default="./results",
                        help="Output directory for results")
    
    parser.add_argument('--prefix', dest='output_prefix', default="",
                        help="Prefix for output files")
    
    parser.add_argument('--offset', dest='offset_oligo', type=int, default=1,
                        help="Value to add to oligo index in output")
    
    # Logging options
    parser.add_argument('--log-file', dest='log_file',
                        help="Path to log file (if not specified, logs to console only)")
    
    parser.add_argument('--log-level', dest='log_level', default="INFO",
                        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
                        help="Logging level")
    
    return parser

## Command-Line Runner

In [None]:
#| export
def validate_args(args: argparse.Namespace) -> bool:
    """Validate command-line arguments.
    
    Args:
        args: Parsed command-line arguments
        
    Returns:
        True if arguments are valid, False otherwise
    """
    if not (args.oligos_file or args.oligos_string):
        print("Error: You must specify either --oligos-file or --oligos")
        return False
        
    if args.oligos_file and not os.path.exists(args.oligos_file):
        print(f"Error: Oligos file does not exist: {args.oligos_file}")
        return False
        
    if not os.path.exists(args.fastq_path_1):
        print(f"Error: FASTQ file 1 does not exist: {args.fastq_path_1}")
        return False
    
    if not os.path.exists(args.fastq_path_2):
        print(f"Error: FASTQ file 2 does not exist: {args.fastq_path_2}")
        return False        
    
    return True

In [None]:
#| export
def args_to_config(args: argparse.Namespace) -> PipelineConfig:
    """Convert command-line arguments to pipeline configuration.
    
    Args:
        args: Parsed command-line arguments
        
    Returns:
        Pipeline configuration object
    """
    # Convert log level string to int
    log_level = getattr(logging, args.log_level)
    
    return PipelineConfig(
        fastq_1=args.fastq_path_1,
        fastq_2=args.fastq_path_2,
        oligos_file=args.oligos_file,
        oligos_string=args.oligos_string,
        oligos_list=None,  # Not used in CLI
        output_path=args.output_path,
        output_prefix=args.output_prefix,
        offset_oligo=args.offset_oligo,
        log_file=args.log_file,
        log_level=log_level
    )

In [None]:
#| export
def run_cli(args: Optional[List[str]] = None) -> int:
    """Run the command-line interface.
    
    Args:
        args: Command-line arguments (if None, uses sys.argv)
        
    Returns:
        Exit code (0 for success, non-zero for failure)
    """
    parser = create_parser()
    
    # When testing, avoid sys.exit() errors by just returning the error code
    is_testing = 'ipykernel' in sys.modules or os.environ.get('NBDEV_TEST') == '1'
    
    try:
        parsed_args = parser.parse_args(args)
        
        if not validate_args(parsed_args):
            parser.print_help()
            return 1
        
        # Convert args to config
        config = args_to_config(parsed_args)
        
        # When testing, we might want to mock the pipeline run
        if is_testing and os.environ.get('MOCK_PIPELINE') == '1':
            print("Mock pipeline run (for testing)")
            return 0
            
        # Run pipeline
        pipeline = OligoCodonPipeline(config)
        results = pipeline.run()
        
        # Print summary information
        print("\nResults saved to:")
        print(f"  CSV: {results['csv_path']}")
        if 'excel_path' in results:
            print(f"  Excel: {results['excel_path']}")
        if 'json_path' in results:
            print(f"  Summary JSON: {results['json_path']}")
        
        print(f"\nProcessed {results['oligos_processed']} oligos in {results['elapsed_time']:.2f} seconds")
        
        return 0
    except Exception as e:
        print(f"Error: {str(e)}")
        import traceback
        traceback.print_exc()
        return 1
    except SystemExit as e:
        # Catch SystemExit and just return the code instead of exiting
        if is_testing:
            print(f"Caught SystemExit: {e.code}")
            return e.code
        else:
            raise

In [None]:
#| export
def main():
    """Main entry point for command-line execution."""
    # Only actually call sys.exit() when running as a script, not in tests or notebooks
    is_notebook_or_test = 'ipykernel' in sys.modules or 'pytest' in sys.modules or 'NBDEV_TEST' in os.environ
    
    # Use test arguments when in test/notebook environment
    if is_notebook_or_test:
        test_args = [
            '--f1', '../test_files/test_1.fq.gz',
            '--f2', '../test_files/test_2.fq.gz',
            '--oligos', 'GCGGATTACATTNNNAAATAACATCGT,TGTGGTAAGCGGNNNGAAAGCATTTGT,GTCGTAGAAAATNNNTGGGTGATGAGC',
            '--output', '../test_files/test_outs',
            '--prefix', 'test_cm1'
        ]
        run_cli(test_args)

        test_args = [
            '--f1', '../test_files/test_1.fq.gz',
            '--f2', '../test_files/test_2.fq.gz',
            '--oligos-file', '../test_files/oligos.txt',
            '--output', '../test_files/test_outs',
            '--prefix', 'test_cm2'
        ]
        run_cli(test_args)
        
        return 0
    else:
        return run_cli()

In [None]:
#| eval: false
#| export
if __name__ == "__main__":
    # For safety, wrap in try/except to catch any SystemExit
    try:
        exit_code = main()
        is_notebook_or_test = 'ipykernel' in sys.modules or 'pytest' in sys.modules or 'NBDEV_TEST' in os.environ
        if is_notebook_or_test:
            print(f"\nCLI completed with exit code: {exit_code}")
    except SystemExit as e:
        # Just in case, catch any SystemExit and print instead
        print(f"\nSystemExit caught with code: {e.code}")

2025-03-11 17:24:37,057 - INFO - Starting OligoCodonPipeline
2025-03-11 17:24:37,058 - INFO - Loading oligo sequences...
2025-03-11 17:24:37,058 - INFO - Loading oligos from provided string
2025-03-11 17:24:37,059 - INFO - Loaded 3 oligo sequences
2025-03-11 17:24:37,059 - INFO - Processing FASTQ files...


0it [00:00, ?it/s]

2025-03-11 17:24:37,123 - INFO - Formatting results...
2025-03-11 17:24:37,125 - INFO - Saving results to: ../test_files/test_outs/test_cm1_counts.csv
2025-03-11 17:24:37,129 - INFO - Pipeline completed in 0.07 seconds
2025-03-11 17:24:37,132 - INFO - Starting OligoCodonPipeline
2025-03-11 17:24:37,133 - INFO - Loading oligo sequences...
2025-03-11 17:24:37,133 - INFO - Loading oligos from file: ../test_files/oligos.txt
2025-03-11 17:24:37,137 - INFO - Loaded 3 oligo sequences
2025-03-11 17:24:37,137 - INFO - Processing FASTQ files...



Results saved to:
  CSV: ../test_files/test_outs/test_cm1_counts.csv

Processed 3 oligos in 0.07 seconds


0it [00:00, ?it/s]

2025-03-11 17:24:37,191 - INFO - Formatting results...
2025-03-11 17:24:37,193 - INFO - Saving results to: ../test_files/test_outs/test_cm2_counts.csv
2025-03-11 17:24:37,197 - INFO - Pipeline completed in 0.06 seconds



Results saved to:
  CSV: ../test_files/test_outs/test_cm2_counts.csv

Processed 3 oligos in 0.06 seconds

CLI completed with exit code: 0


In [None]:
#| hide
#| test
def test_cli():
    """Test function for nbdev testing."""
    test_args = [
        '--f1', '../test_files/test_1.fq.gz',
        '--f2', '../test_files/test_2.fq.gz',
        '--oligos', 'GCGGATTACATTNNNAAATAACATCGT',
        '--output', '../test_files/test_outs',
        '--prefix', 'test_cm'
    ]
    
    # Don't actually run the pipeline, just test that parsing works
    parser = create_parser()
    args = parser.parse_args(test_args)
    assert args.fastq_path_1 == '../test_files/test_1.fq.gz'
    assert args.fastq_path_2 == '../test_files/test_2.fq.gz'
    assert 'GCGGATTACATTNNNAAATAACATCGT' in args.oligos_string
    assert args.output_path == '../test_files/test_outs'
    
    # Test completed successfully
    print("CLI parsing test passed!")
    return True

# Run the test
test_result = test_cli()
assert test_result is True

CLI parsing test passed!


In [None]:
!oligoseeker --f1 ../test_files/test_1.fq.gz --f2 ../test_files/test_2.fq.gz \
--oligos "GCGGATTACATTNNNAAATAACATCGT,TGTGGTAAGCGGNNNGAAAGCATTTGT" --output ../test_files/test_outs --prefix test_cm1

  from pandas.core import (
2025-03-11 17:27:08,292 - INFO - Starting OligoCodonPipeline
2025-03-11 17:27:08,292 - INFO - Loading oligo sequences...
2025-03-11 17:27:08,292 - INFO - Loading oligos from provided string
2025-03-11 17:27:08,292 - INFO - Loaded 2 oligo sequences
2025-03-11 17:27:08,292 - INFO - Processing FASTQ files...
2000it [00:00, 69106.31it/s]
2025-03-11 17:27:08,338 - INFO - Formatting results...
2025-03-11 17:27:08,340 - INFO - Saving results to: ../test_files/test_outs/test_cm2_counts.csv
2025-03-11 17:27:08,345 - INFO - Pipeline completed in 0.05 seconds

Results saved to:
  CSV: ../test_files/test_outs/test_cm2_counts.csv

Processed 2 oligos in 0.05 seconds


In [None]:
!oligoseeker --f1 ../test_files/test_1.fq.gz --f2 ../test_files/test_2.fq.gz \
--oligos-file '../test_files/oligos.txt' --output ../test_files/test_outs --prefix test_cm2

  from pandas.core import (
2025-03-11 17:28:07,749 - INFO - Starting OligoCodonPipeline
2025-03-11 17:28:07,749 - INFO - Loading oligo sequences...
2025-03-11 17:28:07,749 - INFO - Loading oligos from file: ../test_files/oligos.txt
2025-03-11 17:28:07,750 - INFO - Loaded 3 oligo sequences
2025-03-11 17:28:07,750 - INFO - Processing FASTQ files...
2000it [00:00, 57755.28it/s]
2025-03-11 17:28:07,801 - INFO - Formatting results...
2025-03-11 17:28:07,802 - INFO - Saving results to: ../test_files/test_outs/test_cm2_counts.csv
2025-03-11 17:28:07,808 - INFO - Pipeline completed in 0.06 seconds

Results saved to:
  CSV: ../test_files/test_outs/test_cm2_counts.csv

Processed 3 oligos in 0.06 seconds
