In [None]:
# |default_exp quast

In [None]:
# |hide
# See above? this hides these blocks, meaning these blocks aren't in the module and aren't in the documentation
import nbdev
from nbdev.showdoc import *  # ignore this Pylance warning in favor of following nbdev docs

In [None]:
# |export
# That export there, it makes sure this code goes into the module.

# standard libs
import os
import re

# Common to templateÂ´
# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
import dotenv  # for loading config from .env files, https://pypi.org/project/python-dotenv/
import envyaml  # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
import fastcore  # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
from fastcore import (
    test,
)
from fastcore.script import (
    call_parse,
)  # for @call_parse, https://fastcore.fast.ai/script
import json  # for nicely printing json and yaml
from fastcore import test

#!export
from bifrost_bridge import core


In [None]:
# |hide
# This block should never be exported. It is to have python running in the project (and not the nbs) dir, and to initiate the package using pip.
os.chdir(core.PROJECT_DIR)

##################################################CODE_SEGMENT###########################################

In [None]:
# |export

def process_quast_data(
    input_path:str,
    output_path:str = './output.tsv',
    add_header:str = '',
    replace_header:str = None,
    filter_columns:str = None,
    transpose:bool = True):

    """
    Process QUAST data.

    This function processes QUAST data files by importing the data, optionally replacing the header,
    transposing the data, filtering columns, and exporting the processed data to an output file.

    Arguments:
        input_path (str): Path to the input file.
        output_path (str): Path to the output file (default: './output.tsv').
        replace_header (str): Header to replace the existing header (default: None).
        filter_columns (str): Columns to filter from the header (default: None).
        transpose (bool): Whether to transpose the data (default: True).
    """
    
    df = core.DataFrame()

    if not os.path.exists(input_path):
        raise FileNotFoundError(f"The input file {input_path} does not exist.")

    if transpose:
        df.import_data(input_path, file_type='tsv', add_header=['column_names', 'values'])
        df_df = df.df
        df_df = df_df.T
        df_df = df_df.rename(columns=df_df.loc['column_names'])
        df_df.drop('column_names', axis=0, inplace=True)
        df.df = df_df
        print(add_header)
        if add_header:
            add_header = add_header.replace(" ", "").split(',')
            if len(add_header) != len(df.df.columns):
                raise ValueError(f"Error: Number of new column names ({len(add_header)}) must match the number of columns in the DataFrame ({len(df.df.columns)}).")
            elif isinstance(add_header, str):
                if len(add_header) > 0:
                    df.df.columns = add_header
            elif isinstance(add_header, list):
                df.df.columns = add_header
            else:
                raise ValueError(f"Error: Invalid type for add_header ({type(add_header)}). Must be a string or list.")
    else:
        df.import_data(input_path, file_type='tsv', add_header=add_header)
        #print(df.df)

    if filter_columns:
        df.filter_columns(filter_columns)

    if replace_header:
        df.rename_header(replace_header)

    df.export_data(output_path, file_type='tsv')


@call_parse
def process_quast_data_from_cli(
    input_path:str,
    output_path:str = './output.tsv',
    add_header:str = '',
    replace_header:str = None,
    filter_columns:str = None,
    transpose:bool = True):
    process_quast_data(input_path, output_path, add_header, replace_header, filter_columns, transpose)

In [None]:
# |hide
# Example usage of the function
#process_quast_data(
#   input_path='test_data/quast.tsv', 
#   output_path='test_data/quast_test_out1.tsv',
#   #add_header = '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15',
#   #replace_header = '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15',
#   #filter_columns='Assembly,# contigs (>= 0 bp), N50',
#   transpose=True
#)

In [None]:
# |hide
# Example usage of the function
#process_quast_data(
#   input_path='test_data/quast_transposed.tsv', 
#   output_path='test_data/quast_test_out2.tsv',
#   #add_header = '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15',
#   #replace_header = '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15',
#   #filter_columns='Assembly,# contigs (>= 0 bp), N50',
#   #filter_columns = '1,2,3',
#   transpose=False
#)

##################################################CODE_SEGMENT###########################################

In [None]:
#| hide
# This is included at the end to ensure when you run through your notebook the code is also transferred to the associated python package

nbdev.nbdev_export()