In [None]:
# |default_exp bifrost

In [None]:
# |hide
# See above? this hides these blocks, meaning these blocks aren't in the module and aren't in the documentation
import nbdev
from nbdev.showdoc import *  # ignore this Pylance warning in favor of following nbdev docs

In [None]:
# |export
# That export there, it makes sure this code goes into the module.

# standard libs
import os
import re

# Common to template
# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
import dotenv  # for loading config from .env files, https://pypi.org/project/python-dotenv/
import envyaml  # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
import fastcore  # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
from fastcore import (
    test,
)
from fastcore.script import (
    call_parse,
)  # for @call_parse, https://fastcore.fast.ai/script
import json  # for nicely printing json and yaml
from fastcore import test

#!export
from bifrost_bridge import core

In [None]:
# |hide
# This block should never be exported. It is to have python running in the project (and not the nbs) dir, and to initiate the package using pip.
os.chdir(core.PROJECT_DIR)

##################################################CODE_SEGMENT###########################################

In [None]:
# |export
from bifrost_bridge.mlst import process_mlst_data
from bifrost_bridge.fastp import process_fastp_data

@call_parse
def process_qc_data(
    mlst_path:str = None,
    fastp_path:str = None,
    output_path:str = './output.tsv'):

    """
    Command-line interface for processing QC data.

    This function processes MLST and FASTP data files based on the provided command-line arguments.
    It supports specifying input file paths for MLST and FASTP data, and outputs the processed data to specified paths.

    Arguments:
        mlst (str): Path to the MLST input file.
        fastp (str): Path to the FASTP input file.
        output (str): Path to the output file (default: './output.tsv').
    """
    print(mlst_path)
    print(fastp_path)
    if mlst_path is not None:
        if not os.path.exists(mlst_path):
            raise FileNotFoundError(f"File not found: {mlst_path}")
        process_mlst_data(
        input_path=str(mlst_path), 
        output_path='./parsed_mlst.tsv',
        replace_header=None, 
        filter_columns="SampleID, Species, ST",
        header_exists=0,
        add_header="SampleID, Species, ST, 1, 2, 3, 4, 5, 6, 7"
        )

    if fastp_path is not None:
        if not os.path.exists(fastp_path):
            raise FileNotFoundError(f"File not found: {fastp_path}")
        process_fastp_data(
            input_path=fastp_path, 
            output_path='./parsed_fastp.tsv',
            filter_columns="summary£fastp_version, summary£sequencing, summary£before_filtering£total_reads",
            replace_header="fastp_version, sequencing, total_reads"
        )


In [None]:
# |hide
# Example usage of the function
#process_qc_data(
#    mlst_path='/Users/B246654/vscode_storage/ssi-dk/bifrost_bridge/test_data/mlst_report.tabular', 
#    fastp_path='/Users/B246654/vscode_storage/ssi-dk/bifrost_bridge/test_data/TestSample2.json'
#)

      SampleID        Species  ST
0  TestSample2  campylobacter  22
  fastp_version                            sequencing  total_reads
0        0.23.4  paired end (151 cycles + 151 cycles)      4369610


##################################################CODE_SEGMENT###########################################

In [None]:
#| hide
# This is included at the end to ensure when you run through your notebook the code is also transferred to the associated python package

nbdev.nbdev_export()