In [1]:
import os
import sys
from pathlib import Path

# Append resources dir to path
p = os.path.dirname(os.getcwd())+'/resources/'
sys.path.append(p)

from utils import *


In [2]:
# # save mn5 version of config
# save_mn5_config()

In [3]:
def get_branch_url():
    """
    Construct the GitHub URL for the current branch of the repository.

    This function determines the name of the current Git branch using
    ``git branch --show-current`` and constructs a URL pointing to the
    root of that branch on GitHub. The base GitHub repository URL must
    be provided by ``load_resources()`` as ``m['gh_url']``.

    Returns
    -------
    str
        A URL string pointing to the root of the current branch on GitHub,
        formatted as: ``<gh_url>tree/<branch_name>/``.

    Notes
    -----
    - Example output for the ``main`` branch:
      ``https://github.com/user/repo/tree/main/``.
    """
    m = load_resources()
    cmd = 'git branch --show-current'
    b = run_cmd(cmd)
    b = b.strip()
    branch_url = f"{m['gh_url']}tree/{b}/"
    return branch_url

def find_missing_subdirs(d):
    """
    Identify subdirectories within a given directory that are not referenced in its README.

    This function checks each subdirectory of the specified directory `d` and compares
    its name against entries in the corresponding `README.md` file. Certain permanent
    directories (e.g., 'rules', 'template_snakemake') are ignored. Subdirectories not
    found in the README are collected and returned.

    Parameters
    ----------
    d : str
        Name of the parent directory to scan. Typically 'processing'.

    Returns
    -------
    list of pathlib.Path
        List of subdirectory paths that are missing from the README. If all subdirectories
        are listed in the README, the list will be empty.

    Notes
    -----
    - Permanent directories defined for 'processing' are ['rules', 'template_snakemake'].
    - The function assumes the README is located at '../<d>/README.md'.
    - Only immediate subdirectories of `d` are checked, not nested ones.
    """

    to_update = []

    if d == 'processing':
        perm_dirs = ['rules', 'template_snakemake', '.ipynb_checkpoints']
    elif d == 'analysis':
        perm_dirs = ['.ipynb_checkpoints']
    else: perm_dirs = []

    readme = f'../{d}/README.md'

    # loop through each valid subdir
    for sub_d in Path(f'../{d}/').glob('*/'):
        stem_sub_d = sub_d.stem
        if stem_sub_d in perm_dirs: continue
        if not sub_d.is_dir(): continue

        fmt_sub_d = f'[{stem_sub_d}]'
        if any(fmt_sub_d in line for line in open(readme)): continue
        to_update.append(sub_d)
        
    return to_update

def add_missing_subdirs_to_readme(d, missing_dirs):
    
    """
    Append missing subdirectory entries to the appropriate section of a README.

    This function reads the README file for a given top-level directory (`d`) and
    appends bullet points for any subdirectories listed in `missing_dirs` that
    are not already present. The new bullets are inserted just before the next
    section header in the README.

    Parameters
    ----------
    d : str
        Top-level directory name. Currently, only 'processing' and 'analysis'
        are supported.
    missing_dirs : list of pathlib.Path
        List of subdirectory paths that should be added to the README as bullet points.

    Notes
    -----
    - The function assumes the README is located at '../<d>/README.md'.
    - For 'processing', bullets are added under the section titled
      '## Subfolder descriptions'.
    - Each bullet is formatted as:
        * [<subdirectory_name>](<repo_url>/<subdirectory_path>/): # TODO!!
    - The variable `m['gh_url']` must be defined externally to provide the repository URL.
    - The function preserves all other content and headers in the README.

    """
    
    # load resources to get GH URL
    m = load_resources()   
    inserted = False
    
    
    # if d == 'processing':
    header = "## Subfolder descriptions"
    # elif d == 'analysis':
        # raise ValueError('You havent made this yet')
    
    # Read the README
    readme = f'../{d}/README.md'
    with open(readme, 'r') as infile:
        lines = infile.readlines()

    output_lines = []
    inside_section = False

    for i, line in enumerate(lines):
        output_lines.append(line)

        # find first relevant bullet
        if header in line:
            inside_section = True
            continue

        # Detect end of bullet list (next header)
        if inside_section:
            if line.startswith("## "):
                # insert new bullets just before the break
                last_bullet_idx = max(i for i, line in enumerate(output_lines) if "* ["  in line.strip())
                for i, sub_d in enumerate(missing_dirs):
                    stem_sub_d = sub_d.stem
                    output_lines.insert(last_bullet_idx+i+1, f"* [{stem_sub_d}]({m['gh_url']}/{sub_d}/): # TODO!! \n")
                output_lines.insert(-2, '\n')
                inside_section = False
                inserted = True
    
    # if we had to wait for end of file
    if inserted == False:
        for i, sub_d in enumerate(missing_dirs):
            stem_sub_d = sub_d.stem
            output_lines.append(f"* [{stem_sub_d}]({m['gh_url']}/{sub_d}/): # TODO!! ")
            output_lines.append('\n')

    # Write back updated README
    with open(readme, 'w') as outfile:
        outfile.writelines(output_lines)
                     
    # write to user where README entries have been written
    if len(missing_dirs) > 0:
        print(f"Added README entries to {readme.split('../')[1]} for ")
        for sub_d in missing_dirs:
            print(f'- {sub_d.stem}')
        print()

In [4]:
# all missing dirs to output note to user eventually
all_missing_dirs = []

# processing
d = 'processing'
missing_dirs = find_missing_subdirs(d)
add_missing_subdirs_to_readme(d, missing_dirs)
all_missing_dirs += missing_dirs

# analysis
d = 'analysis'
missing_dirs = find_missing_subdirs(d)
add_missing_subdirs_to_readme(d, missing_dirs)
all_missing_dirs += missing_dirs

In [103]:
print(get_branch_url())
print(load_resources()['gh_url'])


http://github.com/pclavell/project_template/tree/fairlie/
http://github.com/pclavell/project_template/


In [102]:
# # replace all README links using the GH url, if neccessary

# for readme in Path(f'../').rglob('README.md'):
    
#     # loop through all files in repo and see if they're in any readme in their relative form
#     with open(readme, 'r') as infile:
#         lines = infile.readlines()

#     output_lines = []
#         output_lines.append(line)

../README.md
../analysis/README.md
../processing/README.md


In [None]:
# ping to check for broken links