## Run qcecolisummary
Functions that allow the user to run qcecolisummary.py

In [None]:
#|default_exp run_qcecolisummary
# This will create a package named bps_fbi_sp_ecoli/run_qcecolisummary.py

In [None]:
#|hide
# Nbdev requires this imports
import nbdev
from nbdev.showdoc import *

In [None]:
#|export
# Standard libs to be used in the notebook
import os
import re
import sys
import shutil
import subprocess
from pathlib import Path

# Common to nbdev template
# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
import dotenv # for loading config from .env files, https://pypi.org/project/python-dotenv/
import envyaml # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
import fastcore # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
import fastcore.script
from fastcore.script import call_parse

# Project specific libraries
from bps_fbi_sp_ecoli import core, helpers


In [None]:
#|hide
print(core.__all__)
print(helpers.__all__)

In [None]:
#|hide
# This is the notebook config to use for developing purposes
notebook_config = core.get_config(f"{core.PROJECT_DIR}/config/config.default.env")
#core.show_project_env_vars(notebook_config)

INPUT_DIR = notebook_config['run_qcecolisummary']['input']['dir']

# Outputs
OUTPUT_DIR = notebook_config['run_qcecolisummary']['output']['dir']
COMMAND_FILE = notebook_config["run_qcecolisummary"]["output"]["command_file"]

In [None]:
#|export
def qcecolisummary(input_dir: Path, output_dir:Path) -> str:
    """A function that generates a command for 1 sample.
    """
    # Get the full paths of relative paths
    input_dir = os.path.abspath(input_dir)
    output_dir = os.path.abspath(output_dir)
    
    # Note the qcecolisummary command will create output folders if they don't exist. This includes parent directories.
    command = f"""
    python3 {core.PACKAGE_DIR}/scripts/qcecolisummary.py -i {input_dir} -o {output_dir}
    """.strip()
    
    # This is where one would normally run it with subprocess or such but am utilizing linux commands in Notebooks as the alternative.
    return command

In [None]:
#|export
def generate_qcecolisummary_commands(input_dir: Path, output_dir:Path):
    """A function that generates commands for many folders in a folder.
    """
    # Get the full paths of relative paths
    input_dir = os.path.abspath(input_dir)
    output_dir = os.path.abspath(output_dir)

    commands = []
    commands.append(qcecolisummary(input_dir, output_dir))
    
    return commands

In [None]:
# #|export
# commands = generate_qcecolisummary_commands(INPUT_DIR, OUTPUT_DIR)
# helpers.write_list_to_file(commands, COMMAND_FILE)
# for command in commands:
#     print(command)
# #     !{command}
# #     core.notification(NOTIFICATIONS)

## Turn the scripts into a commmand line tool

In [None]:
#|export
# This are the command line options for FBI_run_qcecolisummary
# Add 'FBI_run_bifrostpostkma' into settings.ini
@call_parse
def cli(
    input:Path = None, # Path to input folder that contains the results of kma matching against the ecoligenes db (the folder)
    output:Path = None, # Path to the output directory
    command_file:str = None, # Path to file to write commands to
    execute:bool = True, # Run commands in command file
    to_stdout:bool = False, # If true, will write to stdout instead of file, mutually exclusive with output_file
    overwrite:bool = False, # If true, will overwrite output_file if it exists
    config_file:str = None # Config file to overwrite default settings, arg parse values will override config file values,
    ) -> None:
    """
    This program takes as input a folder containing .tsv files 
    and allows you to run qcecolisummary.py on all Ecoli samples."""
    config = core.get_config(config_file) # Set env vars and get config variables

    if input is not None:
        config["run_qcecolisummary"]["input"]["dir"] = input
    if output is not None:
        config['run_qcecolisummary']['output']['dir'] = output
    if command_file is not None:
        config['run_qcecolisummary']['output']['command_file'] = command_file
    if to_stdout is True:
        config['run_qcecolisummary']['output']['to_stdout'] = True
    if overwrite is True:
        config['run_qcecolisummary']['output']['overwrite'] = True

    # generate_qcecolisummary_commands(INPUT_DIR, SAMPLE_SHEET, OUTPUT_DIR, DB_PATH, KMA_PATH)
    commands = generate_qcecolisummary_commands(
        config["run_qcecolisummary"]["input"]["dir"],
        config["run_qcecolisummary"]["output"]["dir"]
    )
    helpers.write_list_to_file(
        commands,
        config["run_qcecolisummary"]["output"]["command_file"],
        overwrite=config["run_qcecolisummary"]["output"]["overwrite"],
        to_stdout=config["run_qcecolisummary"]["output"]["to_stdout"]
    )

    # For running make sure command is present
    if execute and helpers.tools_are_present(["kma"]):
        helpers.execute_commands_from_file(config["run_qcecolisummary"]["output"]["command_file"])
        #core.notification(config["demultiplex_runs"]["notification"] )

In [None]:
#| hide
cli(config_file=f"{core.PROJECT_DIR}/config/config.default.env", execute=False)

In [None]:
#| hide
# This is included at the end to ensure when you run through your notebook the code is also transferred to the associated python package
nbdev.nbdev_export()