# PATHS

In [2]:
# default_exp paths

In [3]:
#export
from fastcore.basics import patch_to
from functools import wraps
from torch_snippets.logger import logger, Info, Debug, Warn, Excep

def input_to_str(func):
    @wraps(func)
    def inner(input, *args, **kwargs):
        if isinstance(input, P):
            input = str(input)
        if isinstance(input, list):
            input = [str(i) for i in input]
        out = func(input, *args, **kwargs)
        return out
    return inner

def output_to_path(func):
    @wraps(func)
    def inner(input, *args, **kwargs):
        out = func(input, *args, **kwargs)
        if isinstance(out, str):
            out = P(out)
        if isinstance(out, list):
            if len(out) > 0 and isinstance(out[0], str):
                out = [P(o) for o in out]
        return out
    return inner

In [19]:
#export
from torch_snippets.loader import choose as ts_choose
from torch_snippets.loader import Tqdm
from pathlib import Path
P = Path
P.ls = lambda self: list(self.iterdir())
P.__repr__ = lambda self: f"» {self}"

@patch_to(P)
def size(self):
    if self.is_dir(): raise Exception(f"`{self}` is a directory")
    fsize = os.path.getsize(self) >> 20
    fsize = f'{fsize} MB' if fsize > 0 else f'{os.path.getsize(self) >> 10} KB'
    return fsize

@patch_to(P)
def extn(self, pattern='*'):
    return self.suffix.replace('.', '')

@patch_to(P)
def Glob(self, pattern='*'):
    return list(self.glob(pattern))

@patch_to(P)
def sample(self, pattern='*'):
    return ts_choose(self.Glob(pattern))

@patch_to(P)
def mv(self, to):
    os.rename(self, to)
    return P(to)

import shutil
@patch_to(P)
def cp(self, to):
    return P(shutil.copy(self, to))

@patch_to(P)
def rm(self, confirm_prompt=False, verbose=True):
    confirm = input(f'Are you sure you want to delete `{self}`? [y/N]') if confirm_prompt else 'y'
    if confirm.lower() == 'y':
        os.remove(self)
        if verbose:
            logger.info(f'Deleted {self}')
    else:
        if verbose:
            logger.info(f'Aborting delete: {self}')

In [20]:
p = P('test.txt')
p.touch()
logger.info(p.size())
p = p.mv('test1.txt')
q = p.cp('test2.txt')

assert isinstance(q, P)
assert q.size() == '0 KB'
assert str(p) == 'test1.txt'
assert p.extn() == 'txt'
assert p.parent == P()

p.rm(confirm_prompt=False)
q.rm(confirm_prompt=False)

p = P('torch_snippets')
logger.info(p.Glob())
q = p.sample('*.py')
logger.info(f'Sample file: `{q}`')
logger.info(f'Sample file size: `{q.size()}`')
try:
    p.size()
except Exception as e:
    logger.warning(e)


2021-08-28 10:15:21.514 | INFO     | __main__:<module>:3 - 0 KB
2021-08-28 10:15:21.520 | INFO     | __main__:rm:44 - Deleted test1.txt
2021-08-28 10:15:21.521 | INFO     | __main__:rm:44 - Deleted test2.txt
2021-08-28 10:15:21.524 | INFO     | __main__:<module>:17 - [» torch_snippets/loader.py, » torch_snippets/__pycache__, » torch_snippets/registry.py, » torch_snippets/fastcores.py, » torch_snippets/charts.py, » torch_snippets/__init__.py, » torch_snippets/misc.py, » torch_snippets/.ipynb_checkpoints, » torch_snippets/logger.py, » torch_snippets/markup.py, » torch_snippets/thinc_parser, » torch_snippets/markups.py, » torch_snippets/paths.py, » torch_snippets/sklegos.py, » torch_snippets/torch_loader.py, » torch_snippets/_nbdev.py]
2021-08-28 10:15:21.527 | INFO     | __main__:<module>:19 - Sample file: `torch_snippets/fastcores.py`
2021-08-28 10:15:21.528 | INFO     | __main__:<module>:20 - Sample file size: `0 KB`


In [21]:
#export
import glob, os

def isdir(fpath): return os.path.isdir(fpath)

@input_to_str
def makedir(x):
    os.makedirs(x, exist_ok=True)

@input_to_str
def fname(fpath):
    return fpath.split('/')[-1]

@input_to_str
def fname2(fpath):
    return stem(fpath.split('/')[-1])

def stem(fpath):
    return P(fpath).stem

@input_to_str
def stems(folder):
    if isinstance(folder, (str, P)) : return [stem(str(x)) for x in Glob(folder)]
    if isinstance(folder, list): return [stem(x) for x in folder]

@input_to_str
@output_to_path
def parent(fpath):
    out = '/'.join(fpath.split('/')[:-1])
    if out == '': return './'
    else:         return out

def extn(x):
    return P(x).extn()

@input_to_str
@output_to_path
def Glob(x, extns=None, silent=False):
    files = glob.glob(x+'/*') if '*' not in x else glob.glob(x)
    if extns:
        if isinstance(extns, str): extns = extns.split(',')
        files = [f for f in files if any([f.endswith(ext) for ext in extns])]

    if not silent: logger.opt(depth=1).log('INFO', '{} files found at {}'.format(len(files), x))
    return files

def find(item, List, match_stem=False):
    '''Find an `item` in a `List`
    >>> find('abc', ['ijk','asdfs','dfsabcdsf','lmnop'])
    'dgsabcdsf'
    >>> find('file1', ['/tmp/file0.jpg', '/tmp/file0.png', '/tmp/file1.jpg', '/tmp/file1.png', '/tmp/file2.jpg', '/tmp/file2.png'])
    ['/tmp/file1.jpg', '/tmp/file1.png']
    '''
    filtered = [i for i in List if item in str(i)]
    if match_stem and len(filtered) > 1:
        filtered = [f for f in filtered if stem(f)==item]
    if len(filtered) == 1: return filtered[0]
    return filtered

In [22]:
p = P('test.txt')
p.touch()
assert isdir(p) == False
assert fname(p) == 'test.txt'
assert parent(p) == P()
assert stem(p) == 'test'
assert extn(p) == 'txt'

find('logger', Glob('torch_snippets'))

p.rm(confirm_prompt=False)

2021-08-28 10:15:22.652 | INFO     | __main__:inner:17 - 15 files found at torch_snippets
2021-08-28 10:15:22.655 | INFO     | __main__:rm:44 - Deleted test.txt


In [23]:
#export
import zipfile
import tarfile

def zip_files(list_of_files, dest):
    dest = str(dest)
    logger.info(f'Zipping {len(list_of_files)} files to {dest}...')
    if dest.lower().endswith('.zip'):
        with zipfile.ZipFile(dest, 'w') as zipMe:
            for file in Tqdm(list_of_files):
                zipMe.write(file, compress_type=zipfile.ZIP_DEFLATED)
    elif dest.lower().endswith('.tar.gz'):
        with tarfile.open(dest, "w:gz") as tarMe:
            for file in Tqdm(list_of_files):
                tarMe.add(file)
    return P(dest)

def unzip_file(file, dest):
    file = str(file)
    if file.lower().endswith('.zip'):
        with zipfile.ZipFile(file, 'r') as zip_ref:
            zip_ref.extractall(dest)
    elif file.lower().endswith('.tar.xz') or file.endswith('.tar.gz'):
        with tarfile.open(file, 'r') as f:
            f.extractall(dest)
    return P(dest)

In [27]:
!touch test1.txt test2.txt
f = zip_files(P().Glob('*.txt'), 'test.tar.gz')
unzip_file(f, './')
[f.rm() for f in P().Glob('*.txt')]
P('test.tar.gz').rm()

!touch test1.txt test2.txt
f = zip_files(P().Glob('*.txt'), 'test.zip')
unzip_file(f, './')
[f.rm() for f in P().Glob('*.txt')]
P('test.zip').rm()

2021-08-28 10:16:25.801 | INFO     | __main__:zip_files:7 - Zipping 2 files to test.tar.gz...
100%|██████████| 2/2 [00:00<00:00, 837.35it/s]
2021-08-28 10:16:25.819 | INFO     | __main__:rm:44 - Deleted test2.txt
2021-08-28 10:16:25.822 | INFO     | __main__:rm:44 - Deleted test1.txt
2021-08-28 10:16:25.824 | INFO     | __main__:rm:44 - Deleted test.tar.gz
2021-08-28 10:16:25.958 | INFO     | __main__:zip_files:7 - Zipping 2 files to test.zip...
100%|██████████| 2/2 [00:00<00:00, 807.61it/s]
2021-08-28 10:16:25.979 | INFO     | __main__:rm:44 - Deleted test2.txt
2021-08-28 10:16:25.982 | INFO     | __main__:rm:44 - Deleted test1.txt
2021-08-28 10:16:25.985 | INFO     | __main__:rm:44 - Deleted test.zip


In [8]:
#export
import hashlib
def md5(fname):
    hash_md5 = hashlib.md5()
    with open(fname, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()

def remove_duplicates(files):
    import pandas as pd
    from torch_snippets.loader import diff
    hashes = [md5(f) for f in files]
    df = pd.DataFrame({'f':files, 'h': hashes})
    x = df.drop_duplicates('h')
    y = diff(files, x.f)
    for i in y:
        os.rename(i, './x')
    # !rm ./x
    return

In [18]:
md5('paths.ipynb')

'79f9c5aa4ebc52d7cabd97fdf1eae6dc'

In [9]:
#export
def readlines(fpath, silent=False, encoding=None):
    with open(fpath, 'r', encoding=encoding) as f:
        lines = f.read().split('\n')
        lines = [l.strip() for l in lines if l.strip()!='']
        if not silent: logger.opt(depth=1).log("INFO", f'loaded {len(lines)} lines')
        return lines

@patch_to(P)
def read_lines(self, silent=False, encoding=None):
    return readlines(self, silent=silent, encoding=encoding)

def writelines(lines, file):
    makedir(parent(file))
    failed = []
    with open(file, 'w') as f:
        for line in lines:
            try: f.write(f'{line}\n')
            except: failed.append(line)
    if failed!=[]:
        logger.opt(depth=1).log('INFO', f'Failed to write {len(failed)} lines out of {len(lines)}')
        return failed

@patch_to(P)
def write_lines(self, lines):
    return writelines(lines, self)

In [10]:
x = P('tmp.txt')
x.touch()
x.write_lines([i for i in range(1000)])
lines = x.read_lines()
assert lines == [f'{i}' for i in range(1000)]
logger.info(x.size())

x.rm(confirm_prompt=False)

2021-08-06 16:54:27.782 | INFO     | __main__:read_lines:11 - loaded 1000 lines
2021-08-06 16:54:27.784 | INFO     | __main__:<module>:6 - 3 KB
2021-08-06 16:54:27.786 | INFO     | __main__:rm:43 - Deleted tmp.txt


In [11]:
#export
def rename_batch(folder, func, debug=False, one_file=False):
    from torch_snippets.loader import now
    'V.V.Imp: Use debug=True first to confirm file name changes are as expected'
    if isinstance(folder, (str, P)): folder = Glob(folder)
    sources = []
    destins = []
    log_file = f'moved_files_{now()}.log'
    for f in folder:
        source = f
        destin = func(f)
        if source == destin: continue
        if debug:
            logger.debug(f'moving `{source}` --> `{destin}`')
        else:
            # !mv {source.replace(' ','\ ')} {destin.replace(' ','\ ')}
            logger.info(f'moving `{source}` --> `{destin}`')
            os.rename(source, destin)
        # !echo {source.replace(' ','\ ')} --\> {destin.replace(' ','\ ')} >> {logfile}
        if one_file: break

In [12]:
#export
import dill, time

dill = dill
def dumpdill(obj, fpath, silent=False):
    start = time.time()
    fpath = P(fpath)
    fpath.parent.mkdir(exist_ok=True)
    with open(fpath, 'wb') as f:
        dill.dump(obj, f)
    if not silent:
        fsize = fpath.size()
        logger.opt(depth=1).log('INFO', f'Dumped object of size ≈{fsize} @ "{fpath}" in {time.time()-start:.2e} seconds')

def loaddill(fpath):
    fpath = str(fpath)
    with open(fpath, 'rb') as f:
        obj = dill.load(f)
    return obj

In [13]:
p = P('test.tmp')
dumpdill([1,2,3], p)
y = loaddill(p)
p.rm(confirm_prompt=False)
assert y == [1,2,3]

2021-08-06 16:54:27.829 | INFO     | __main__:<module>:2 - Dumped object of size ≈0 KB @ "test.tmp" in 2.42e-03 seconds
2021-08-06 16:54:27.832 | INFO     | __main__:rm:43 - Deleted test.tmp
