In [None]:
# |default_exp mlst

In [None]:
# |hide
# See above? this hides these blocks, meaning these blocks aren't in the module and aren't in the documentation
import nbdev
from nbdev.showdoc import *  # ignore this Pylance warning in favor of following nbdev docs

In [None]:
# |export
# That export there, it makes sure this code goes into the module.

# standard libs
import os
import re

# Common to template
# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
import dotenv  # for loading config from .env files, https://pypi.org/project/python-dotenv/
import envyaml  # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
import fastcore  # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
from fastcore import (
    test,
)
from fastcore.script import (
    call_parse,
)  # for @call_parse, https://fastcore.fast.ai/script
import json  # for nicely printing json and yaml
from fastcore import test
from bifrost_bridge import core


Because the notebooks now are located in the `nbs` folder, we need to change the python `wd` for the notebook to the project folder. Keep this included in all notebooks but don't export it to the package. 

In [None]:
# This block should never be exported. It is to have python running in the project (and not the nbs) dir, and to initiate the package using pip.
os.chdir(core.PROJECT_DIR)

##################################################CODE_SEGMENT###########################################

In [None]:
# |export

def process_mlst_data(
    input_path:str,
    output_path:str = './output.tsv',
    add_header:str = None,
    replace_header:str = None,
    filter_columns:str = None):

    """
    Command-line interface for processing MLST data.

    This function sets up an argument parser to handle command-line arguments for processing MLST data files.
    It supports specifying input and output file paths, replacing headers, filtering columns, and handling the presence or absence of headers in the input file.

    Arguments:
        input_path (str): Path to the input file.
        output_path (str): Path to the output file (default: './output.tsv').
        add_header (str): Header to add if the header does not exist in the input file (default: None).
        replace_header (str): Header to replace the existing header (default: None).
        filter_columns (str): Columns to filter from the header (default: None).
    """

    if not os.path.exists(input_path):
        raise FileNotFoundError(f"The input file {input_path} does not exist.")
    
    df_check = core.DataFrame()
    df_check.import_data(input_path, file_type='tsv')
    
    if check_if_mlst_empty(df_check):
        with open(output_path, 'w') as f:
            f.write('')
        return

    df = core.DataFrame()
    df.import_data(input_path, file_type='tsv')

    if add_header:
        header_list = add_header.split(", ")
        header_list[3] = header_list[3] + "1"
        for i in range(4, len(df.df.columns)):
            header_list.append(header_list[3][:-1] + str(i-2))
        df.df.loc[-1] = df.df.columns  # adding header as first row
        df.df.index = df.df.index + 1  # shifting index
        df.df = df.df.sort_index()  # sorting by index to move the header row to the top
        df.df.columns = header_list  # setting new header

    if replace_header:
        df.rename_header(replace_header)

    if filter_columns:
        df.filter_columns(filter_columns)

    df.export_data(output_path, file_type='tsv')

def check_if_mlst_empty(df):
    if (df.df.columns[1] == "-"):
        return(True)
    else:
        return(False)

@call_parse
def process_mlst_data_from_cli(
    input_path:str,
    output_path:str = './output.tsv',
    add_header:str = None,
    replace_header:str = None,
    filter_columns:str = None):
    process_mlst_data(input_path, output_path, add_header, replace_header, filter_columns)

In [None]:
# |hide
# Example usage of the function
#process_mlst_data( 
#    #input_path='test_data/mlst_empty.tabular',
#    input_path='test_data/mlst_rep_test.tsv',
#    output_path='test_data/output.tsv',
#    add_header="SampleID, Species, ST, z",
#    replace_header=None, 
#    filter_columns="SampleID, Species, ST"
#)
#process_mlst_data(
#        input_path='test_data/mlst_report.tabular',
#        output_path='test_data/bifrost/parsed_mlst.tsv',
#        replace_header=None, 
#        #filter_columns="SampleID, Species, ST",
#        add_header="SampleID, Species, ST, Allele"
#)

Empty DataFrame
Columns: [TestSample2, campylobacter, 22, aspA(1), glnA(3), gltA(6), glyA(4), pgm(3), tkt(3), uncA(3)]
Index: []
      SampleID        Species  ST  Allele1  Allele2  Allele3  Allele4 Allele5  \
0  TestSample2  campylobacter  22  aspA(1)  glnA(3)  gltA(6)  glyA(4)  pgm(3)   

  Allele6  Allele7  
0  tkt(3)  uncA(3)  


##################################################CODE_SEGMENT###########################################

In [None]:
#| hide
# This is included at the end to ensure when you run through your notebook the code is also transferred to the associated python package

nbdev.nbdev_export()