In [None]:
#| default_exp core

#| hide
Ensure that your .venv for this notebook is set to ./.venv

# module name here

> API details.

In [None]:
#|hide
from nbdev.showdoc import *

In [None]:
#|export
# standard libs
import os
import re

# add into settings.ini, package name is python-dotenv
from dotenv import dotenv_values # Used for loading configs

In [None]:
#|export

from dotenv import load_dotenv # for loading config from .env files

def set_env_variables(config_path) -> bool:
    # Order of precedence: environment variables > .env file > default values

    # Set the env vars first, this is needed for the card.yaml to replace ENV variables
    if config_path is not None:
        load_dotenv(config_path)
    load_dotenv("./config/config.default.env")

    return True

In [None]:
#|export

import os
set_env_variables(os.environ.get("FRD_CONFIG_PATH"))

True

In [None]:
#|hide

for k, v in os.environ.items():
    # If ENV var starts with FRD_ then print
    if k.startswith("FRD_"):
        print(f"{k}={v}")

FRD_SOURCE_DIR=./input/
FRD_DEST_DIR=./output/
FRD_SOURCE_REGEX=^folder([0-9]+)$
FRD_DEST_REGEX=^folder_([0-9]+)$
FRD_OUTPUT_FILE=./output/output.txt


In [None]:
#|export

def find_runs(dir, regex):
    # make keys from capture groups of regex
    runs = {}
    for root, dirs, files in os.walk(dir):
        for dir in dirs:
            if regex.match(dir):
                key = ""
                for group in range(1, regex.match(dir).lastindex + 1):
                    key += regex.match(dir).group(group)
                runs[key] = os.path.join(root, dir)
    return runs

In [None]:
#|export

def find_runs_in_source_but_not_dest(source_dir, source_regex, destination_dir, destination_regex, output_file):
    source_runs = find_runs(source_dir, source_regex)
    destination_runs = find_runs(destination_dir, destination_regex)

    target_runs = set(source_runs.keys()) - set(destination_runs.keys())
    target_runs = sorted(target_runs)

    with open(output_file, 'w') as f:
        f.write(f'#paths\n')
        for run in target_runs:
            f.write(f"{source_runs[run]}\n")

In [None]:
#|export

from fastcore.script import call_parse
import os

@call_parse
def cli(
    source_dir:str=os.environ.get("FRD_SOURCE_DIR"), # Path to source directory
    dest_dir:str=os.environ.get("FRD_DEST_DIR"), # Path to destination directory
    source_regex:str=os.environ.get("FRD_SOURCE_REGEX"), # Regex to match source directory
    dest_regex:str=os.environ.get("FRD_DEST_REGEX"), # Regex to match destination directory
    output_file:str=os.environ.get("FRD_OUTPUT_FILE"), # Path to output file
    ):
    "Move all files in source_dir to dest_dir"
    find_runs_in_source_but_not_dest(source_dir, source_regex, dest_dir, dest_regex, output_file)
    print("Hello World!")

In [None]:
cli(os.environ.get("FRD_SOURCE_DIR","./input"), os.environ.get("FRD_DEST_DIR","./input/"), os.environ.get("FRD_SOURCE_REGEX"), os.environ.get("FRD_DEST_REGEX"), os.environ.get("FRD_OUTPUT_FILE"))

Hello World!


In [None]:
#| hide
from nbdev import nbdev_export
nbdev_export()