In [None]:
# |default_exp fastp

In [None]:
# |hide
# See above? this hides these blocks, meaning these blocks aren't in the module and aren't in the documentation
import nbdev
from nbdev.showdoc import *  # ignore this Pylance warning in favor of following nbdev docs

In [None]:
# |export
# That export there, it makes sure this code goes into the module.

# standard libs
import os
import re

# Common to template
# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
import dotenv  # for loading config from .env files, https://pypi.org/project/python-dotenv/
import envyaml  # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
import fastcore  # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
from fastcore import (
    test,
)
from fastcore.script import (
    call_parse,
)  # for @call_parse, https://fastcore.fast.ai/script
import json  # for nicely printing json and yaml
from fastcore import test

#!export
from bifrost_bridge import core


In [None]:
# |hide
# This block should never be exported. It is to have python running in the project (and not the nbs) dir, and to initiate the package using pip.
os.chdir(core.PROJECT_DIR)

##################################################CODE_SEGMENT###########################################

In [None]:
# |export

def process_fastp_data(
    input_path:str,
    output_path:str = './output.tsv',
    replace_header:str = None,
    filter_columns:str = None):

    """
    Command-line interface for processing MLST data.

    This function sets up an argument parser to handle command-line arguments for processing FASTP data files.
    It supports specifying input and output file paths, replacing headers, filtering columns.

    Arguments:
        input_path (str): Path to the input file.
        output_path (str): Path to the output file (default: './output.tsv').
        replace_header (str): Header to replace the existing header (default: None).
        filter_columns (str): Columns to filter from the header (default: None).
    """

    df = core.DataFrame()
    if not os.path.exists(input_path):
        raise FileNotFoundError(f"The input file {input_path} does not exist.")
    df.import_data(input_path, file_type='json')

    if filter_columns:
        df.filter_columns(filter_columns)

    if replace_header:
        df.rename_header(replace_header)

    #df.show()

    df.export_data(output_path, file_type='tsv')


@call_parse
def process_fastp_data_from_cli(
    input_path:str,
    output_path:str = './output.tsv',
    replace_header:str = None,
    filter_columns:str = None):
    process_fastp_data(input_path, output_path, replace_header, filter_columns)

In [None]:
# |hide
# Example usage of the function
process_fastp_data(
    input_path='test_data/TestSample2.json', 
    output_path='test_data/TestSample2.tsv',
    replace_header="fastp_version, sequencing, total_reads",
    filter_columns="summary£fastp_version, summary£sequencing, summary£before_filtering£total_reads"
)

  fastp_version                            sequencing  total_reads
0        0.23.4  paired end (151 cycles + 151 cycles)      4369610


##################################################CODE_SEGMENT###########################################

In [None]:
#| hide
# This is included at the end to ensure when you run through your notebook the code is also transferred to the associated python package

nbdev.nbdev_export()