# CLI

> Pipeline to compute CLI


In [None]:
#| default_exp cli

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import argparse
from protein_cutter.pai import add_pai
from protein_cutter.core import flag_proprietary_from_pg

In [None]:
#| export
from pathlib import Path
import os
import yaml
# Get the repository root
if 'GITHUB_WORKSPACE' in os.environ:
    # In GitHub Actions
    REPO_ROOT = Path(os.environ['GITHUB_WORKSPACE'])
else:
    # Local development - find repo root
    REPO_ROOT = Path.cwd()
    while not (REPO_ROOT / 'settings.ini').exists():
        if REPO_ROOT == REPO_ROOT.parent:
            REPO_ROOT = Path.cwd()  # Fallback
            break
        REPO_ROOT = REPO_ROOT.parent

TEST_DATA = REPO_ROOT / 'test_data'
CONFIG_FILES = REPO_ROOT / 'config_files'
print(f"Repo root: {REPO_ROOT}")
print(f"Test data dir: {TEST_DATA}")
print(f"Test data exists: {TEST_DATA.exists()}")

Repo root: /Users/mtinti/git_projects/protein_cutter
Test data dir: /Users/mtinti/git_projects/protein_cutter/test_data
Test data exists: True


In [None]:
#| export
with open(CONFIG_FILES / 'config.yaml', 'r') as stream:
    config = yaml.safe_load(stream)
print(config)   

{'min_pep_length': 6, 'max_pep_length': 52, 'min_mz_range': 200, 'max_mz_range': 4000, 'enzyme': 'trypsin_full', 'input_pep': 'test_spectronaut_pep_out.tsv', 'input_prot': 'test_spectronaut_prot_out.tsv', 'input_fasta': 'test_sequence.fa'}


In [None]:
#| export
def add_pai_cli(args=None) -> int:
    """Console entrypoint for add_pai."""
    parser = argparse.ArgumentParser(description="Compute PAI for a protein report.")
    parser.add_argument("--prot", required=True, help="Protein report TSV")
    parser.add_argument("--pep", required=True, help="Peptide report TSV")
    parser.add_argument("--fasta", required=True, help="FASTA file")
    parser.add_argument("--out", default=None, help="Output TSV (optional)")
    parser.add_argument("--enzyme", default="trypsin_full", help="Protease name")
    parser.add_argument(
        "--no-filter-missed",
        action="store_true",
        help="Do not filter missed cleavages",
    )
    ns = parser.parse_args(args=args)

    add_pai(
        prot_df_path=ns.prot,
        pep_df_path=ns.pep,
        fasta_path=ns.fasta,
        output_path=ns.out,
        enzyme=ns.enzyme,
        filter_missed_cleavages=not ns.no_filter_missed,
    )
    return 0

In [None]:
!add_pai \
--prot {TEST_DATA}'/pipeline_test/prot_report.tsv' \
--pep {TEST_DATA}'/pipeline_test/pep_report.tsv' \
--fasta {TEST_DATA}'/pipeline_test/prot.fa' \
--out {TEST_DATA}'/pipeline_test/prot_report_cli.tsv'

Repo root: /Users/mtinti/git_projects/protein_cutter
Test data dir: /Users/mtinti/git_projects/protein_cutter/test_data
Test data exists: True
/Users/mtinti/git_projects/protein_cutter/nbs
Repo root: /Users/mtinti/git_projects/protein_cutter
Test data dir: /Users/mtinti/git_projects/protein_cutter/test_data
Test data exists: True
{'min_pep_length': 6, 'max_pep_length': 52, 'min_mz_range': 200, 'max_mz_range': 4000, 'enzyme': 'trypsin_full', 'input_pep': 'test_spectronaut_pep_out.tsv', 'input_prot': 'test_spectronaut_prot_out.tsv', 'input_fasta': 'test_sequence.fa'}
/Users/mtinti/git_projects/protein_cutter/nbs
Repo root: /Users/mtinti/git_projects/protein_cutter
Test data dir: /Users/mtinti/git_projects/protein_cutter/test_data
Test data exists: True
{'min_pep_length': 6, 'max_pep_length': 52, 'min_mz_range': 200, 'max_mz_range': 4000, 'enzyme': 'trypsin_full', 'input_pep': 'test_spectronaut_pep_out.tsv', 'input_prot': 'test_spectronaut_prot_out.tsv', 'input_fasta': 'test_sequence.fa'}

In [None]:
#| export
def flag_proprietary_from_pg_cli(args=None) -> int:
    """Console entrypoint for flag_proprietary_from_pg."""
    parser = argparse.ArgumentParser("Flag proprietary peptides based on protein accessions.")
    parser.add_argument("--input", required=True, help="Input peptide report (TSV/CSV)")
    parser.add_argument("--output", required=True, help="Output file with novelty flag")
    parser.add_argument("--accession-col", default="PG.ProteinAccessions",
                        help="Column with protein accessions")
    parser.add_argument("--new-col-name", default="is_novel",
                        help="Name for the new boolean flag column")
    parser.add_argument("--uniprot-prefixes", default=">fl,>sp",
                        help="Comma-separated UniProt prefixes (default: >fl,>sp)")
    parser.add_argument("--keep-only-novel", action="store_true",
                        help="Only write rows flagged as novel")
    parser.add_argument("--sep", default=None,
                        help="Field separator (default: auto-detect)")
    parser.add_argument("--no-progress", action="store_true",
                        help="Disable progress bar")
    ns = parser.parse_args(args=args)

    prefixes = tuple(p.strip() for p in ns.uniprot_prefixes.split(',') if p.strip())

    flag_proprietary_from_pg(
        input_path=ns.input,
        output_path=ns.output,
        accession_col=ns.accession_col,
        new_col_name=ns.new_col_name,
        uniprot_prefixes=prefixes,
        keep_only_novel=ns.keep_only_novel,
        sep=ns.sep,
        show_progress=not ns.no_progress,
    )
    return 0


In [None]:
!flag_proprietary_from_pg \
--input {TEST_DATA}'/test_spectronaut_pep.tsv' \
--uniprot-prefixes '>sp' \
--output {TEST_DATA}'/test_spectronaut_pep_annoated_cli.tsv'

Repo root: /Users/mtinti/git_projects/protein_cutter
Test data dir: /Users/mtinti/git_projects/protein_cutter/test_data
Test data exists: True
/Users/mtinti/git_projects/protein_cutter/nbs
Repo root: /Users/mtinti/git_projects/protein_cutter
Test data dir: /Users/mtinti/git_projects/protein_cutter/test_data
Test data exists: True
{'min_pep_length': 6, 'max_pep_length': 52, 'min_mz_range': 200, 'max_mz_range': 4000, 'enzyme': 'trypsin_full', 'input_pep': 'test_spectronaut_pep_out.tsv', 'input_prot': 'test_spectronaut_prot_out.tsv', 'input_fasta': 'test_sequence.fa'}
/Users/mtinti/git_projects/protein_cutter/nbs
Repo root: /Users/mtinti/git_projects/protein_cutter
Test data dir: /Users/mtinti/git_projects/protein_cutter/test_data
Test data exists: True
{'min_pep_length': 6, 'max_pep_length': 52, 'min_mz_range': 200, 'max_mz_range': 4000, 'enzyme': 'trypsin_full', 'input_pep': 'test_spectronaut_pep_out.tsv', 'input_prot': 'test_spectronaut_prot_out.tsv', 'input_fasta': 'test_sequence.fa'}

In [None]:
!flag_proprietary_from_pg \
--input {TEST_DATA}'/test_spectronaut_pep.tsv' \
--uniprot-prefixes '>sp' \
--keep-only-novel \
--output {TEST_DATA}'/test_spectronaut_pep_annoated_cli_only_novel.tsv'

Repo root: /Users/mtinti/git_projects/protein_cutter
Test data dir: /Users/mtinti/git_projects/protein_cutter/test_data
Test data exists: True
/Users/mtinti/git_projects/protein_cutter/nbs
Repo root: /Users/mtinti/git_projects/protein_cutter
Test data dir: /Users/mtinti/git_projects/protein_cutter/test_data
Test data exists: True
{'min_pep_length': 6, 'max_pep_length': 52, 'min_mz_range': 200, 'max_mz_range': 4000, 'enzyme': 'trypsin_full', 'input_pep': 'test_spectronaut_pep_out.tsv', 'input_prot': 'test_spectronaut_prot_out.tsv', 'input_fasta': 'test_sequence.fa'}
/Users/mtinti/git_projects/protein_cutter/nbs
Repo root: /Users/mtinti/git_projects/protein_cutter
Test data dir: /Users/mtinti/git_projects/protein_cutter/test_data
Test data exists: True
{'min_pep_length': 6, 'max_pep_length': 52, 'min_mz_range': 200, 'max_mz_range': 4000, 'enzyme': 'trypsin_full', 'input_pep': 'test_spectronaut_pep_out.tsv', 'input_prot': 'test_spectronaut_prot_out.tsv', 'input_fasta': 'test_sequence.fa'}

In [None]:
!echo {TEST_DATA}

/Users/mtinti/git_projects/protein_cutter/test_data


In [None]:
print(TEST_DATA)

/Users/mtinti/git_projects/protein_cutter/test_data


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()