In [None]:
# |default_exp amrfinderplus

In [1]:
# |hide
# See above? this hides these blocks, meaning these blocks aren't in the module and aren't in the documentation
import nbdev
from nbdev.showdoc import *  # ignore this Pylance warning in favor of following nbdev docs

In [2]:
# |export
# That export there, it makes sure this code goes into the module.

# standard libs
import os
import re

# Common to template
# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
import dotenv  # for loading config from .env files, https://pypi.org/project/python-dotenv/
import envyaml  # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
import fastcore  # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
from fastcore import (
    test,
)
from fastcore.script import (
    call_parse,
)  # for @call_parse, https://fastcore.fast.ai/script
import json  # for nicely printing json and yaml
from fastcore import test
from bifrost_bridge import core


Because the notebooks now are located in the `nbs` folder, we need to change the python `wd` for the notebook to the project folder. Keep this included in all notebooks but don't export it to the package. 

In [3]:
# This block should never be exported. It is to have python running in the project (and not the nbs) dir, and to initiate the package using pip.
os.chdir(core.PROJECT_DIR)

##################################################CODE_SEGMENT###########################################

In [4]:
# |export

def process_amrfinderplus_data(
    input_path:str,
    output_path:str = './output.tsv',
    replace_header:str = None,
    filter_columns:str = None,
    add_header:str = None):

    """
    Command-line interface for processing amrfinderplus data.

    This function sets up an argument parser to handle command-line arguments for processing amrfinderplus data files.
    It supports specifying input and output file paths, replacing headers, filtering columns, and handling the presence or absence of headers in the input file.

    Arguments:
        input_path (str): Path to the input file.
        output_path (str): Path to the output file (default: './output.tsv').
        replace_header (str): Header to replace the existing header (default: None).
        filter_columns (str): Columns to filter from the header (default: None).
        header_exists (int): Indicates if the header exists in the input file (default: 1).
        add_header (str): Header to add if the header does not exist in the input file (default: None).
    """


    df = core.DataFrame()
    df.import_data(input_path, file_type='tsv', add_header=add_header)
    #print(df.df.columns)
    def concatenate_vector(x, sep=','):
        return ','.join([str(i) for i in x])
    
    df_agg = df.df.apply(concatenate_vector, axis=0)
    df.df = df_agg.to_frame().T
    if replace_header:
        df.rename_header(replace_header)

    if filter_columns:
        df.filter_columns(filter_columns)
    
    #df.show()

    df.export_data(output_path, file_type='tsv')

@call_parse
def process_amrfinderplus_data_from_cli(
    input_path:str,
    output_path:str = './output.tsv',
    replace_header:str = None,
    filter_columns:str = None,
    add_header:str = None):
    process_amrfinderplus_data(input_path, output_path, replace_header, filter_columns, add_header)

In [None]:
#|hide
#Example usage of the function
process_amrfinderplus_data(
   input_path='test_data/amrfinderplus.tsv', 
   output_path='test_data/amrfinderplus_testout.tsv',
   #filter_columns="Query / Template length"
)

In [None]:
#|hide
#Example usage of the function
process_amrfinderplus_data(
   input_path='test_data/amrfinderplus_long_example.tsv', 
   output_path='test_data/amrfinderplus_long_example_testout.tsv',
   #filter_columns="Query / Template length"
)

In [62]:
#|hide
#Example usage of the function
process_amrfinderplus_data(
   input_path='test_data/amrfinderplus_empty.tsv', 
   output_path='test_data/amrfinderplus_empty_testout.tsv',
   #filter_columns="Query / Template length"
)

  Database Plasmid Identity Query / Template length Contig Position in contig  \
0                                                                               

  Note Accession number  
0                        


##################################################CODE_SEGMENT###########################################

In [None]:
#| hide
# This is included at the end to ensure when you run through your notebook the code is also transferred to the associated python package

nbdev.nbdev_export()