## Run postecolityping
Functions that allow the user to run postecolityping.py

In [None]:
#|default_exp run_postecolityping
# This will create a package named bps_fbi_sp_ecoli/run_postecolityping.py

In [None]:
#|hide
# Nbdev requires this imports
import nbdev
from nbdev.showdoc import *

In [None]:
#|export
# Standard libs to be used in the notebook
import os
import re
import sys
import shutil
import subprocess
from pathlib import Path

# Common to nbdev template
# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
import dotenv # for loading config from .env files, https://pypi.org/project/python-dotenv/
import envyaml # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
import fastcore # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
import fastcore.script
from fastcore.script import call_parse

# Project specific libraries
from bps_fbi_sp_ecoli import core, helpers


In [None]:
#|hide
print(core.__all__)
print(helpers.__all__)

In [None]:
#|hide
# This is the notebook config to use for developing purposes
notebook_config = core.get_config(f"{core.PROJECT_DIR}/config/config.default.env")
#core.show_project_env_vars(notebook_config)

# Inputs
INPUT_DIR = notebook_config['run_postecolityping']['input']['dir']
SAMPLE_SHEET= notebook_config['run_ecolityping']['input']['sample_sheet']
# Outputs
OUTPUT_DIR = notebook_config['run_ecolityping']['output']['dir']
COMMAND_FILE = notebook_config["run_postecolityping"]["output"]["command_file"]
# Other options
STBIT = helpers.get_stbit('')

In [None]:
#|export
def postecolityping(sampleid:str, output_dir:Path, stbit:str) -> str:
    """A function that generates a command for 1 sample.
    """
    command = f"""
    python3 {core.PACKAGE_DIR}/ecoli_fbi/postecolityping.py -i {sampleid} -d {output_dir} -stbit {stbit}
    """.strip()
    # This is where one would normally run it with subprocess or such but am utilizing linux commands in Notebooks as the alternative.
    return command

In [None]:
#|export
def generate_postecolityping_commands(sample_sheet:str, input_dir:Path, output_dir:Path, stbit:str):
    """A function that generates commands for many folders in a folder.
    """
    commands = []
    input_dir = os.path.abspath(input_dir)  # Get the full paths of relative paths
    metadata = helpers.process_sample_sheet(input_dir, sample_sheet)
    stbit = helpers.get_stbit('')
    for sample_dict in metadata:
        try:
            sampleid = sample_dict['SampleID']
            # Here is is the output_dir
            commands.append(postecolityping(sampleid, output_dir, stbit))
        except KeyError as err:
            print(f"Warning: SampleID {sampleid} is missing {err}.")
            pass
    return commands

In [None]:
# #|export
# commands = generate_postecolityping_commands(SAMPLE_SHEET, INPUT_DIR, OUTPUT_DIR, STBIT)
# helpers.write_list_to_file(commands, COMMAND_FILE)
# for command in commands:
#     print(command)
#     !{command}
#     core.notification(NOTIFICATIONS)

## Turn the ecoli_fbi into a commmand line tool

In [None]:
#|export
# This are the command line options for FBI_run_postecolityping
# Add 'FBI_run_bifrostpostkma' into settings.ini
@call_parse
def cli(
    input:Path = None, # Path to input folder that contains the results of kma matching against the ecoligenes db (the folder)
    sample_sheet:str = None, # Name of the sample_sheet file
    output:Path = None, # Path to the output directory
    command_file:str = None, # Path to file to write commands to
    execute:bool = True, # Run commands in command file
    to_stdout:bool = False, # If true, will write to stdout instead of file, mutually exclusive with output_file
    overwrite:bool = False, # If true, will overwrite output_file if it exists
    config_file:str = None # Config file to overwrite default settings, arg parse values will override config file values,
    ) -> None:
    """
    This program takes as input a folder containing .tsv files 
    and allows you to run postecolityping.py on all Ecoli samples."""
    config = core.get_config(config_file) # Set env vars and get config variables

    if input is not None:
        config["run_postecolityping"]["input"]["dir"] = input
    if sample_sheet is not None:
        config['run_postecolityping']['input']['sample_sheet'] = sample_sheet
    if output is not None:
        config['run_postecolityping']['output']['dir'] = output
    if command_file is not None:
        config['run_postecolityping']['output']['command_file'] = command_file
    if to_stdout is True:
        config['run_postecolityping']['output']['to_stdout'] = True
    if overwrite is True:
        config['run_postecolityping']['output']['overwrite'] = True

    # generate_postecolityping_commands(sample_sheet: str, input_dir: Path, output_dir: Path, stbit: str)
    # generate_postecolityping_commands(SAMPLE_SHEET, INPUT_DIR, OUTPUT_DIR, STBIT)
    commands = generate_postecolityping_commands(
        config["run_postecolityping"]["input"]["sample_sheet"],
        config["run_postecolityping"]["input"]["dir"],
        config["run_postecolityping"]["output"]["dir"],
        stbit = helpers.get_stbit('')
    )
    helpers.write_list_to_file(
        commands,
        config["run_postecolityping"]["output"]["command_file"],
        overwrite=config["run_postecolityping"]["output"]["overwrite"],
        to_stdout=config["run_postecolityping"]["output"]["to_stdout"]
    )

    # For running make sure command is present
    if execute:
        helpers.execute_commands_from_file(config["run_postecolityping"]["output"]["command_file"])
        #core.notification(config["demultiplex_runs"]["notification"] )

In [None]:
#| hide
#cli(config_file=f"{core.PROJECT_DIR}/config/config.default.env", execute=False)

In [None]:
#| hide
# This is included at the end to ensure when you run through your notebook the code is also transferred to the associated python package
nbdev.nbdev_export()