In [None]:
# |default_exp pmlst

In [None]:
# |hide
# See above? this hides these blocks, meaning these blocks aren't in the module and aren't in the documentation
import nbdev
from nbdev.showdoc import *  # ignore this Pylance warning in favor of following nbdev docs

In [None]:
# |export
# That export there, it makes sure this code goes into the module.

# standard libs
import os
import re

# Common to template
# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
import dotenv  # for loading config from .env files, https://pypi.org/project/python-dotenv/
import envyaml  # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
import fastcore  # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
from fastcore import (
    test,
)
from fastcore.script import (
    call_parse,
)  # for @call_parse, https://fastcore.fast.ai/script
import json  # for nicely printing json and yaml
from fastcore import test
from bifrost_bridge import core


Because the notebooks now are located in the `nbs` folder, we need to change the python `wd` for the notebook to the project folder. Keep this included in all notebooks but don't export it to the package. 

In [None]:
# This block should never be exported. It is to have python running in the project (and not the nbs) dir, and to initiate the package using pip.
os.chdir(core.PROJECT_DIR)

##################################################CODE_SEGMENT###########################################

In [None]:
# |export

def process_pmlst_data(
    input_path:str,
    output_path:str = './output.tsv',
    filter_columns:str = None):

    """
    Command-line interface for processing PMLST data.

    This function sets up an argument parser to handle command-line arguments for processing PMLST data files.
    It supports specifying input and output file paths, replacing headers, filtering columns, and handling the presence or absence of headers in the input file.

    Arguments:
        input_path (str): Path to the input file.
        output_path (str): Path to the output file (default: './output.tsv').
        filter_columns (str): Columns to filter from the header (default: None).
    """

    df = core.DataFrame()
    
    if not os.path.exists(input_path):
        raise FileNotFoundError(f"The input file {input_path} does not exist.")
    df.import_data(input_path, file_type='tsv')
    
    df.show()

    df.export_data(output_path, file_type='tsv')

@call_parse
def process_pmlst_data_from_cli(
    input_path:str,
    output_path:str = './output.tsv',
    filter_columns:str = None):
    process_pmlst_data(input_path, output_path, filter_columns)

In [None]:
# |hide
# Example usage of the function
process_pmlst_data(
    input_path='test_data/simple_output.tsv', 
    output_path='test_data/parsed_simple_output.tsv'#,
    #filter_columns="SampleID, Species, ST"
)

                                            plasmids       IncF  IncI1  \
0  Col(BS512),IncFIA,IncFIB(AP001918),IncFII,IncF...  F2:A2:B20    NaN   

   IncA/C  IncHI1  IncHI2  IncN    pMLST summary  
0     NaN     NaN     NaN   NaN  incf[F2:A2:B20]  


##################################################CODE_SEGMENT###########################################

In [None]:
#| hide
# This is included at the end to ensure when you run through your notebook the code is also transferred to the associated python package

nbdev.nbdev_export()