In [100]:
from pyprojroot.here import here
import sys
import pandas as pd
import pathlib
from collections import defaultdict, Counter

sys.path.append(str(here()))

from resources.utils import *

In [101]:
def check_setup_usernames(usernames):
    """
    Checks that all usernames are unique. 
    Raises a value error if not.

    Parameters
    ----------
    usernames : list of usernames
    """
    dupes = [item for item, count in Counter(usernames).items() if count > 1]
    if len(dupes)>0:
        raise ValueError(f'Found duplicated username {dupes}.')

In [102]:
def verify_proj_name(name):
    """
    Checks if name is still the template name
    Raises a value error if it is

    Parameters
    ----------
    name : str
    """
    if name == 'project_template':
        raise ValueError(f'Must provide a new project name in template_user/resources.yml!')

In [126]:
# def get_user_system_entry_path_map(m, user, system):
#     """
#     Construct a mapping of placeholder keys to canonical filesystem paths
#     for a given user and system within a project configuration.

#     Parameters
#     ----------
#     m : dict
#         Project configuration dictionary, typically loaded from YAML.
#         Must contain a ``setup_settings`` section with keys:
#         - ``project_name`` (str): the project name
#         - ``mn5_projects`` (str): base directory for MN5 projects
#     user : str
#         User identifier (e.g., ``"freese"``). Used to build user-specific paths.
#     system : str
#         System identifier (e.g., ``"mn5"``, ``"local"``).
#         Determines which base path prefix to use.

#     Returns
#     -------
#     dict of str to str
#         A mapping from placeholder keys to resolved filesystem paths:

#         - ``"{data_dir}"`` → project data directory
#         - ``"{ref_dir}"`` → project reference directory
#         - ``"{figures_dir}"`` → project figures directory
#         - ``"{metadata_dir}"`` → user-specific metadata directory

#     Notes
#     -----
#     - For the ``"mn5"`` system, the path prefix is inferred from
#       ``m['setup_settings']['mn5_projects']``.
#     - For all other systems, the path prefix is taken from the
#       resources.yml dictionary
#     - All returned paths are normalized using :class:`pathlib.Path`.
#     - Placeholder-style keys (e.g., ``"{data_dir}"``) are used to
#       facilitate string substitution elsewhere in the project.

#     Examples
#     --------
#     >>> config = {
#     ...     "setup_settings": {
#     ...         "project_name": "test_project",
#     ...         "mn5_projects": "/mnt/projects"
#     ...     }
#     ... }
#     >>> get_user_system_entry_path_map(config, "freese", "mn5")  # doctest: +ELLIPSIS
#     {
#         '{data_dir}': '/mnt/projects/test_project/data',
#         '{ref_dir}': '/mnt/projects/test_project/ref',
#         '{figures_dir}': '/mnt/projects/test_project/figures',
#         '{metadata_dir}': '/mnt/projects/test_project/freese/metadata'
#     }
#     """

#     d = {}

#     # mn5 paths
#     if system == 'mn5':
#         pref = f"{m['setup_settings']['mn5_projects']}/{m['setup_settings']['project_name']}"

#     # any other path
#     else:
#         pref = f"{m['setup_settings']['users'][user][system]['path']}/{m['setup_settings']['project_name']}/"

#     # print(user)
#     # print(system)
#     # print(pref)
#     # print()

#     data_dir = f'{pref}/data/'
#     ref_dir = f'{pref}/ref/'
#     figures_dir = f'{pref}/figures/'

#     # metadata dir is part of the github-stored stuff, so it's separate
#     metadata_dir = str(pathlib.Path(f'{pref}/{user}/metadata/'))

#     # add all paths to dict
#     d[r"\{data_dir\}"] = str(pathlib.Path(data_dir))
#     d[r"\{ref_dir\}"] = str(pathlib.Path(ref_dir))
#     d[r"\{figures_dir\}"] = str(pathlib.Path(figures_dir))
#     d[r"\{metadata_dir\}"] = str(pathlib.Path(metadata_dir))

#     return d

# def get_setup_settings_path_maps(m):
#     """
#     Create dictionaries for resources.yml entries for paths for
#     each user / system.

#     Parameters
#     ----------
#     m : dict from load_resources()

#     Returns
#     -------
#     path_map, quick_path_map
#         Dictionaries of paths dict['path_map'][system][username];
#         dict[username] respectively
#     """
#     # loop through usernames
#     path_map = defaultdict(dict)
#     quick_path_map = {}

#     for user, systems in m['setup_settings']['users'].items():
#         for system, system_dict in systems.items():

#             username = system_dict['username']
#             path_map[username] = get_user_system_entry_path_map(m, user, system)
#             # quick_path_map[username] = get_user_system_entry_path_map(m, user, system)

#     return path_map

In [121]:
def construct_templated_paths(path_map, path, user, proj_name):
    
    pref = f"{path}/{m['setup_settings']['project_name']}/"

    data_dir = f'{pref}/data/'
    ref_dir = f'{pref}/ref/'
    figures_dir = f'{pref}/figures/'

    # metadata dir is part of the github-stored stuff, so it's separate
    metadata_dir = str(pathlib.Path(f'{pref}/{user}/metadata/'))

    # add all paths to dict
    path_map[entry2['username']]['data_dir'] = str(pathlib.Path(data_dir))
    path_map[entry2['username']]['ref_dir'] = str(pathlib.Path(ref_dir))
    path_map[entry2['username']]['figures_dir'] = str(pathlib.Path(figures_dir))
    path_map[entry2['username']]['metadata_dir'] = str(pathlib.Path(metadata_dir))
    
    return path_map


In [127]:
def generate_path_map(m, proj_name):
    
    # create the path map entries automatically
    path_map = defaultdict(dict)
    for user, entry in m['setup_settings']['users'].items():
        for system, entry2 in entry.items():

            for k, i in entry2.items():
                if k.endswith('_dir'):
                    path_map[entry2['username']][k] = i

            # for both of these, append /Projects/ so we don't have to keep track of both
            # /gpfs/projects/bsc83/ and /gpfs/projects/bsc83/Projects
            if system == 'mn5':
                # for mn5, we record the projects path already in the config
                path = f"{m['setup_settings']['mn5_locs']['projects_dir']}/Projects/"
                # as well as the default projects and scratch dirs
                for mn5_dir in ['projects_dir', 'scratch_dir']:
                    path_map[entry2['username']][mn5_dir] = m['setup_settings']['mn5_locs'][mn5_dir]
            else:
                path = f"{m['setup_settings']['users'][user][system]['projects_dir']}/Projects/"

            # add the templated directories -- ones we know where to find either
            # 1. relative to the project or 
            # 2. based on abs. paths on mn5
            path_map = construct_templated_paths(path_map,
                                      path,
                                      user, 
                                      proj_name)
    path_map = dict(path_map)

    return path_map

In [128]:
def main(test=True, config_file=None,
         output_config='template_user/resources/resources.yml'):
    m = load_resources(config_file)
    
    proj_name = m['setup_settings']['project_name']
    
    verify_proj_name(proj_name)
    
    usernames = [i2['username'] for k, i in m['setup_settings']['users'].items() for k2, i2 in i.items()]
    check_setup_usernames(usernames)
    
    # finally a way to test this
    if not test:
        # rename project; immediately remove all git things;
        cmd = 'rm -rf .git'
        run_cmd(cmd)
        cmd = f"mv ../project_template ../{proj_name}"
        run_cmd(cmd)

    path_map = generate_path_map(m, proj_name)
    
    # also add a users list
    users_list = {}
    users_list['users'] = list(m['setup_settings']['users'].keys())

    # write to resources.yml, just append the path_map and quick-access path maps
    with open(output_config, 'a') as f:
        yaml.dump(path_map, f, default_flow_style=False)
        yaml.dump(users_list, f, default_flow_style=False)
    
    # make a copy of template user for each user
    if not test:
        for user, systems in m['setup_settings']['users'].items():
            cmd = f'cp -r template_user/ {user}'
            run_cmd(cmd)


In [129]:
m = main(test=True, 
         config_file='test_yml.yml',
        output_config='test_yml_out.yml')

In [119]:
# temp = [(i2['username'], k3, i3, k2) for k, i in m['setup_settings']['users'].items() \
#                for k2, i2 in i.items() \
#                for k3, i3 in i2.items()\
#                if '_dir' in k3]
# path_map = defaultdict(defaultdict)
# for i in temp:
#     path_map[i[0]][i[1]] = i[2]
#     path_map[i[0]] = dict(path_map[i[0]])
    
#     if i[3] == 'mn5':
        
# path_map = dict(path_map)


In [120]:
proj_name = 'test_project'

In [123]:
path_map

{'bscuser1': {'new_dir': '/new/mn5/dir/user1/',
  'projects_dir': '/gpfs/projects/bsc83/',
  'scratch_dir': '/gpfs/scratch/bsc83/',
  'data_dir': '/gpfs/projects/bsc83/Projects/my_test_project/data',
  'ref_dir': '/gpfs/projects/bsc83/Projects/my_test_project/ref',
  'figures_dir': '/gpfs/projects/bsc83/Projects/my_test_project/figures',
  'metadata_dir': '/gpfs/projects/bsc83/Projects/my_test_project/user1_alias/metadata'},
 'localuser1': {'scratch_dir': '/user1/sshfs/scratch/',
  'projects_dir': '/user1/sshfs/projects/',
  'new_dir': '/user1/sshfs/new_dir/',
  'data_dir': '/user1/sshfs/projects/Projects/my_test_project/data',
  'ref_dir': '/user1/sshfs/projects/Projects/my_test_project/ref',
  'figures_dir': '/user1/sshfs/projects/Projects/my_test_project/figures',
  'metadata_dir': '/user1/sshfs/projects/Projects/my_test_project/user1_alias/metadata'},
 'bscuser2': {'new_dir': '/new/mn5/dir/user1/',
  'projects_dir': '/gpfs/projects/bsc83/',
  'scratch_dir': '/gpfs/scratch/bsc83/',
