In [1]:
from pathlib import Path
from itertools import chain
from pandas import DataFrame, Series

from rolling_pin.conform_etl import ConformETL

In [37]:
*a, foo = (1,2,3, 'bar')
foo

'bar'

In [2]:
exclude_re = '.(git|mypy|pytest)|__(pycache|mypy)__'
source_dir = '/home/ubuntu/rolling-pin'
target_dir = '/tmp/repo'
source_rules = [
    dict(
        path=source_dir + '/python/rolling_pin',
        include=None,
        exclude=exclude_re,
    ),
    dict(
        path=source_dir + '/docker',
        include='pyproject\.toml|pdm\.lock|pdm\.toml',
        exclude=exclude_re,
    ),
    dict(
        path=source_dir,
        include='README|LICENSE',
        exclude=exclude_re + '|docker',
    ),
]

rename_rules = [
    dict(regex='/home/ubuntu/rolling-pin', replace='/tmp/repo'),
    dict(regex='/docker', replace=''),
    dict(regex='/python', replace=''),
    dict(regex='/pdm.lock', replace='/.pdm.lock'),
]

group_rules = [
    dict(name='init', regex='__init__.py$'),
    dict(name='test', regex='_test.py$'),
    dict(name='resource', regex='/resources'),
]

line_rules = [
    dict(
        group='init',
        include=None,
        exclude='test',
    )
]

d = ConformETL(source_rules, rename_rules, group_rules, line_rules)
d.to_html()
d

                                                         SOURCE                                             TARGET               GROUPS  LINE_RULE
                               /home/ubuntu/rolling-pin/LICENSE                                  /tmp/repo/LICENSE               [base]           
                             /home/ubuntu/rolling-pin/README.md                                /tmp/repo/README.md               [base]           
        /home/ubuntu/rolling-pin/python/rolling_pin/__init__.py                  /tmp/repo/rolling_pin/__init__.py               [init]          X
             /home/ubuntu/rolling-pin/python/rolling_pin/app.py                       /tmp/repo/rolling_pin/app.py               [base]           
        /home/ubuntu/rolling-pin/python/rolling_pin/blob_etl.py                  /tmp/repo/rolling_pin/blob_etl.py               [base]           
   /home/ubuntu/rolling-pin/python/rolling_pin/blob_etl_test.py             /tmp/repo/rolling_pin/blob_etl_test.py    

In [34]:
e = f'{"SOURCE":>63}{"TARGET":>51}{"GROUPS":>21}{"LINE_RULE":>11}'
print(e, 'x')
print(d.__repr__().split('\n')[0], 'x')

                                                         SOURCE                                             TARGET               GROUPS  LINE_RULE x
                                                         SOURCE                                             TARGET               GROUPS  LINE_RULE x


In [15]:
def buffer_filepath(data):
    data = data.copy()
    output = data \
        .apply(lambda x: Path(x).parts[1:-1]) \
        .apply(lambda x: list(chain(*zip(['/'] * len(x), x)))) \
        .tolist()
    output = DataFrame(output)
    output['sep'] = '/'
    output['filename'] = data.apply(lambda x: Path(x).name)
    output = output.fillna('')
    output.columns = list(range(output.shape[1]))
    output = output.to_string(index=False, header=False).split('\n')
    output = Series(output)
    return output

data = d._data.copy()
data.source = buffer_filepath(data.source)
data.target = buffer_filepath(data.target)
data['==>'] = '==>'
data = data[['source', '==>', 'target', 'groups']]
data.rename(lambda x: x.upper(), axis=1, inplace=True)
x = data.to_string(index=False, max_colwidth=150, col_space=[80, 10, 80, 20], justify='left')
print(x)

SOURCE                                                                           ==>        TARGET                                                                           GROUPS              
/ home / ubuntu / rolling-pin                        /           LICENSE         ==>        / tmp / repo               /           LICENSE                                   [base]              
/ home / ubuntu / rolling-pin                        /         README.md         ==>        / tmp / repo               /         README.md                                   [base]              
/ home / ubuntu / rolling-pin / python / rolling_pin /       __init__.py         ==>        / tmp / repo / rolling_pin /       __init__.py                                   [init]              
/ home / ubuntu / rolling-pin / python / rolling_pin /            app.py         ==>        / tmp / repo / rolling_pin /            app.py                                   [base]              
/ home / ubuntu / rolling-pin 

In [126]:
print(d)

                                                       SOURCE                                             TARGET               GROUPS
                             /home/ubuntu/rolling-pin/LICENSE                                  /tmp/repo/LICENSE               [base]
                           /home/ubuntu/rolling-pin/README.md                                /tmp/repo/README.md               [base]
      /home/ubuntu/rolling-pin/python/rolling_pin/__init__.py                  /tmp/repo/rolling_pin/__init__.py               [init]
           /home/ubuntu/rolling-pin/python/rolling_pin/app.py                       /tmp/repo/rolling_pin/app.py               [base]
      /home/ubuntu/rolling-pin/python/rolling_pin/blob_etl.py                  /tmp/repo/rolling_pin/blob_etl.py               [base]
 /home/ubuntu/rolling-pin/python/rolling_pin/blob_etl_test.py             /tmp/repo/rolling_pin/blob_etl_test.py               [test]
     /home/ubuntu/rolling-pin/python/rolling_pin/radon_etl.py 

In [None]:
config = '''
source_rules:
  - path: "/home/ubuntu/lunchbox"
    include: "lunchbox/python/lunchbox"
    exclude: ".(git|mypy|pytest)|__(pycache|mypy)__"
  - path: /home/ubuntu/lunchbox/docker
        include: "pyproject\\.toml|pdm\\.lock|pdm\\.toml"
        exclude: ".(git|mypy|pytest)|__(pycache|mypy)__"
      - path: /home/ubuntu/lunchbox
        include: "README|LICENSE"
        exclude: ".(git|mypy|pytest)|__(pycache|mypy)__|docker"

    rename_rules:
      - regex: "/home/ubuntu/lunchbox"
        replace: "/tmp/repo"
      - regex: "/docker"
        replace: ""
      - regex: "/python"
        replace: ""
      - regex: "/pdm.lock"
        replace: "/.pdm.lock"

    group_rules:
      - name: init
        regex: "__init__.py$"
      - name: test
        regex: "_test.py$"
      - name: resource
        regex: "/resources"

    line_rules:
      - group: init
        include: None
        exclude: "test"
    '''