# epsman: repo package tests
18/12/19

Few basic tests for file packaging for repo (Zenodo) upload.

# shutil

Handy file functions... make_archive for simple creation for a full dir.

In [35]:
# https://docs.python.org/2/library/shutil.html#archiving-operations
import shutil

# shutil.make_archive('../repo/shutiltest', 'gztar', '../repo/')  # Add all files from repo/
fileOut = shutil.make_archive('/home/femtolab/temp/shutiltest', 'gztar', '/home/femtolab/python/epsman/repo/')

In [15]:
type(fileOut)

str

#  zipfile

In [31]:
# https://docs.python.org/2/library/zipfile.html
from zipfile import ZipFile

with ZipFile('../repo/ziptest.zip', 'w') as myzip:
    myzip.write('../repo/Zenodo_API_tests_Dec2019.ipynb')
    
# NOTE - relative path used in zip as dir tree.

In [None]:
# For large files, may need force_zip64=True
# Although should be default.

In [33]:
# Add files
with ZipFile('../repo/ziptest.zip', 'a') as myzip:
    myzip.write('/home/femtolab/python/epsman/tests/Notebook_mod_testing_2_161219.ipynb')
    
# NOTE - full path used in zip as dir tree

In [34]:
fileOut = shutil.make_archive('/home/femtolab/temp/shutiltest', 'zip', '/home/femtolab/python/epsman/repo/')

In [37]:
# Info
with ZipFile('../repo/ziptest.zip', 'r') as myzip:
    print(myzip.infolist())
    print(myzip.namelist())

[<ZipInfo filename='../repo/Zenodo_API_tests_Dec2019.ipynb' filemode='-rw-r--r--' file_size=18151>, <ZipInfo filename='home/femtolab/python/epsman/tests/Notebook_mod_testing_2_161219.ipynb' filemode='-rw-r--r--' file_size=4250>]
['../repo/Zenodo_API_tests_Dec2019.ipynb', 'home/femtolab/python/epsman/tests/Notebook_mod_testing_2_161219.ipynb']


In [53]:
# To keep file paths relative to packaged dir, can chdir first.
import sys, os
# dir(os)
os.chdir('/home/femtolab/python/epsman/')
!pwd

# For ZipFile, path as supplied is included in zip
with ZipFile('/home/femtolab/temp/zippathtest.zip', 'w') as myzip:
    myzip.write('./repo/Zenodo_API_tests_Dec2019.ipynb')
    
# With shutil paths are set relative to zipped dir
fileOut = shutil.make_archive('/home/femtolab/temp/shutiltest2', 'zip', 'repo')

/home/femtolab/python/epsman


In [55]:
# Info
with ZipFile('/home/femtolab/temp/zippathtest.zip', 'r') as myzip:
    print(myzip.infolist())
    print(myzip.namelist())

[<ZipInfo filename='repo/Zenodo_API_tests_Dec2019.ipynb' filemode='-rw-r--r--' file_size=18151>]
['repo/Zenodo_API_tests_Dec2019.ipynb']


# tarfile

In [29]:
# https://docs.python.org/2/library/tarfile.html#tar-examples

# fileIn = r'/home/femtolab/python/epsman/repo/shutiltest.tar.gz'
# fileIn = fileOut
fileIn = "sample.tar"

import tarfile

# Read archive
tar = tarfile.open(fileIn, "r")  # r:gz for .gz case
for tarinfo in tar:
    print(tarinfo.name, "is", tarinfo.size, "bytes in size and is", end="")
    if tarinfo.isreg():
        print("a regular file.")
    elif tarinfo.isdir():
        print("a directory.")
    else:
        print("something else.")
tar.close()

home/femtolab/python/epsman/repo/figshare_test.py is 3864 bytes in size and isa regular file.
home/femtolab/python/epsman/repo/zenodo_test.py is 197 bytes in size and isa regular file.
home/femtolab/python/epsman/tests/Notebook_mod_testing_2_161219.ipynb is 4250 bytes in size and isa regular file.


In [28]:
# Add an item
# For archives created by shutil routine this just overwrites everything... seems to only allow append for non-compressed archives.
tar = tarfile.open(fileIn, "a")
tar.add("/home/femtolab/python/epsman/tests/Notebook_mod_testing_2_161219.ipynb")
tar.close()

In [26]:
# Make a tar archive from a list of files
items = ["/home/femtolab/python/epsman/repo/figshare_test.py", "/home/femtolab/python/epsman/repo/zenodo_test.py"]

with tarfile.open("sample.tar", "w") as tar:
    for name in items:
        tar.add(name)

# Job packaging

In [1]:
import sys, os
from pathlib import Path

# Load dev scripts
modPath = r'/home/femtolab/python/'
sys.path.append(modPath)
import epsman as em 

In [2]:
# job = em.epsJob(host = 'localhost')  # Localhost OK, but defined in code so skips some settings.
# job.initConnection()
job = em.epsJob(host = 'bemo', user = 'femtolab', IP = '127.0.0.1')  # Set as unknown (new) host
job.initConnection()

Connecting to machine: bemo at 127.0.0.1
Password for machine? ········
Testing connection...
bemo
Connected OK
Command exited with status 0.
=== stdout ===
bemo

(no stderr)


Setting host dir tree.
Set remote wrkdir: /home/femtolab/ePS


In [3]:
print(job.mol)

None


In [4]:
job.hostDefn['bemo']

{'host': 'bemo',
 'IP': '127.0.0.1',
 'home': PosixPath('/home/femtolab'),
 'wrkdir': PosixPath('/home/femtolab/ePS'),
 'scpdir': PosixPath('/home/femtolab/ePS/scripts2019'),
 'jobPath': PosixPath('/home/femtolab/ePS/jobs'),
 'jobComplete': PosixPath('/home/femtolab/ePS/jobs/completed')}

In [5]:
# Set test params for local code tests
job.mol = 'aniline'

job.hostDefn['bemo']['wrkdir'] = Path('/home/femtolab/temp2/')
job.hostDefn['bemo']['jobDir'] = Path(job.hostDefn['bemo']['wrkdir'], job.mol)
job.hostDefn['bemo']['systemDir'] = job.hostDefn['bemo']['jobDir']
job.hostDefn['bemo']['nbProcDir'] = job.hostDefn['bemo']['jobDir']
job.getNotebookJobList()


***File List (from bemo):
/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV/aniline_wf_0.1-1.1eV_orb26_A1.inp.out
/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV/aniline_wf_0.1-1.1eV_orb26_B1tot.inp.out
/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV/aniline_wf_0.1-1.1eV_orb26_B1tot_v2.inp.out
/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV/aniline_wf_0.1-1.1eV_orb51_A2.inp.out
/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV/aniline_wf_0.1-1.1eV_orb53_B1.inp.out


In [35]:
# Code from job.tidyNotebooks()
# That code should be workable with some minor changes, but currently only sets files when renaming
# Also need more consistency re: using Path or str.
job.nbFileList = []
for item in job.jobList:
    newFile = Path(f"{Path(job.hostDefn[job.host]['nbProcDir'], Path(Path(item).stem).stem)}.ipynb")
    job.nbFileList.append(newFile)
    
job.nbFileList

[PosixPath('/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV_orb26_A1.ipynb'),
 PosixPath('/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV_orb26_B1tot.ipynb'),
 PosixPath('/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV_orb26_B1tot_v2.ipynb'),
 PosixPath('/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV_orb51_A2.ipynb'),
 PosixPath('/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV_orb53_B1.ipynb')]

In [6]:
# Alternative version using new pkg function
job.getNotebookList()
job.nbFileList


***File List (from bemo):
/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV_orb26_A1.ipynb
/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV_orb26_B1tot.ipynb
/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV_orb26_B1tot_v2.ipynb
/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV_orb51_A2.ipynb
/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV_orb53_B1.ipynb


['/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV_orb26_A1.ipynb',
 '/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV_orb26_B1tot.ipynb',
 '/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV_orb26_B1tot_v2.ipynb',
 '/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV_orb51_A2.ipynb',
 '/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV_orb53_B1.ipynb']

In [36]:
Path(job.jobList[0]).parent

PosixPath('/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV')

In [54]:
# Test settings for job dir tree - might be a better way to do this than relying on the format?
job.nbFileList[2].stem.rsplit(sep='_', maxsplit=2)
# job.nbFileList[0].stem.rsplit(sep='eV', maxsplit=2)

['aniline_wf_0.1-1.1eV_orb26', 'B1tot', 'v2']

In [147]:
from zipfile import ZipFile
import zipfile

# make pkg dir
job.hostDefn['bemo']['pkgDir'] = Path(job.hostDefn['bemo']['wrkdir'], 'pkg')
job.c.run('mkdir -p ' + job.hostDefn[job.host]['pkgDir'].as_posix())

# Loop over Notebooks and package corresponding files
zipList = []
failList = []
for item in job.nbFileList[0:2]:
#     print(item.stem)
    # Job keys
    jRoot = item.stem.rsplit(sep='_', maxsplit=2)
    
    # Generate filelist - based on code in getNotebookJobList()
    # Glob for files matching job, inc. subdirs, skip any zip files found.
#     Result = job.c.run(f"shopt -s globstar; ls -d -1 '{job.hostDefn[job.host]['nbProcDir'].as_posix()}/'**/* | grep {jRoot[1]}_{jRoot[2]}", warn = True, hide = True)
#     Result = job.c.run(f"shopt -s globstar; ls -d -1 '{job.hostDefn[job.host]['nbProcDir'].as_posix()}/'**/* | grep --include=${jRoot[1]}_{jRoot[2]} --exclude=pkg", warn = True, hide = True)
    Result = job.c.run(f"shopt -s globstar; ls -d -1 '{job.hostDefn[job.host]['nbProcDir'].as_posix()}/'**/*[!zip] | grep {jRoot[1]}_{jRoot[2]}", warn = True, hide = True)
#     fileOut = shutil.make_archive('/home/femtolab/temp/shutiltest', 'zip', '/home/femtolab/python/epsman/repo/')
                       
    # Write archive with files
    archName = Path(job.hostDefn['bemo']['pkgDir'], item.stem + '.zip')
#     print(archName)
                       
    with ZipFile(archName, 'w', compression=zipfile.ZIP_LZMA) as myzip:
#         [myzip.write(fileIn) for fileIn in Result.stdout.splitlines()]  # Not sure what the issue is here, something recursive - wrote multiple Gbs
                                                                        # AH - problem was recursively adding .zip file, oops. Should check/filter for that.
        for fileIn in Result.stdout.splitlines():
            # Write file, set also arcname to fix relative paths
            myzip.write(fileIn, arcname = Path(fileIn).relative_to(job.hostDefn[job.host]['nbProcDir']))
#             print(fileIn)
#     [print(fileIn) for fileIn in Result.stdout.splitlines()]    
    
        # Check file is OK
        if myzip.testzip() is None:
            zipList.append(archName)
            print(f'Written {archName} OK')
        else:
            failList.append(archName)
            print(f'*** Archive {archName} failed')
                       

# print(Result)        
# print(jRoot)

# Test notes
# B1tot: 27Mb uncompressed (ZIP_STORED)
#        5.3Mb (ZIP_DEFLATED) (==8)
#        4.6Mb (ZIP_BZIP2) (==12)
#        2.1Mb (ZIP_LZMA) (==14)
                
Result

Written /home/femtolab/temp2/pkg/aniline_wf_0.1-1.1eV_orb26_A1.zip OK
Written /home/femtolab/temp2/pkg/aniline_wf_0.1-1.1eV_orb26_B1tot.zip OK


<Result cmd="shopt -s globstar; ls -d -1 '/home/femtolab/temp2/aniline/'**/*[!zip] | grep orb26_B1tot" exited=0>

In [111]:
print(zipfile.ZIP_STORED)
print(zipfile.ZIP_DEFLATED)
print(zipfile.ZIP_BZIP2)
print(zipfile.ZIP_LZMA)

0
8
12
14


In [148]:
fileCheck = r'/home/femtolab/temp2/pkg/aniline_wf_0.1-1.1eV_orb26_B1tot.zip'

# Info
with ZipFile(fileCheck, 'r') as myzip:
#     print(myzip.infolist())
#     print(myzip.namelist())
    infoList = myzip.infolist()
    nameList = myzip.namelist()
    
# print(*infoList, sep='\n')
nameList

['aniline_wf_0.1-1.1eV/aniline_wf_0.1-1.1eV_orb26_B1tot.inp.err',
 'aniline_wf_0.1-1.1eV/aniline_wf_0.1-1.1eV_orb26_B1tot.inp.out',
 'aniline_wf_0.1-1.1eV/aniline_wf_0.1-1.1eV_orb26_B1tot_v2.inp.err',
 'aniline_wf_0.1-1.1eV/aniline_wf_0.1-1.1eV_orb26_B1tot_v2.inp.out',
 'aniline_wf_0.1-1.1eV/orb26_B1tot_idy/',
 'aniline_wf_0.1-1.1eV/orb26_B1tot_idy/anilineSB1CA1.idy',
 'aniline_wf_0.1-1.1eV_orb26_B1tot.ipynb',
 'aniline_wf_0.1-1.1eV_orb26_B1tot_v2.ipynb',
 'aniline_wf_0.1-1.1eV/orb26_B1tot_waveFn/',
 'aniline_wf_0.1-1.1eV/orb26_B1tot_waveFn/anilineSB1CA1_1.1eV_Awave.dat',
 'aniline_wf_0.1-1.1eV/orb26_B1tot_waveFn/anilineSB1CA1_1.1eV_DPot.dat',
 'aniline_wf_0.1-1.1eV/orb26_B1tot_waveFn/anilineSB1CA1_1.1eV_Orb.dat',
 'aniline_wf_0.1-1.1eV/orb26_B1tot_waveFn/anilineSB1CA1_1.1eV_OrbGeom.dat',
 'aniline_wf_0.1-1.1eV/orb26_B1tot_waveFn/anilineSB1CA1_1.1eV_Swave.dat',
 'aniline_wf_0.1-1.1eV/orb26_B1tot_waveFn/anilineSB1CA1_.1eV_Awave.dat',
 'aniline_wf_0.1-1.1eV/orb26_B1tot_waveFn/anilineSB1C

In [92]:
job.nbFileList[0].relative_to(job.hostDefn[job.host]['nbProcDir'])

PosixPath('aniline_wf_0.1-1.1eV_orb26_A1.ipynb')

In [143]:
Result = job.c.run(f"shopt -s globstar; ls -d -1 '{job.hostDefn[job.host]['nbProcDir'].as_posix()}/'**/*[!dat] | grep {jRoot[1]}_{jRoot[2]}", warn = True, hide = True)
(Result.stdout.split())

['/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV/aniline_wf_0.1-1.1eV_orb26_B1tot.inp',
 '/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV/aniline_wf_0.1-1.1eV_orb26_B1tot.inp.err',
 '/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV/aniline_wf_0.1-1.1eV_orb26_B1tot_v2.inp',
 '/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV/aniline_wf_0.1-1.1eV_orb26_B1tot_v2.inp.err',
 '/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV/orb26_B1tot_idy',
 '/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV/orb26_B1tot_idy/anilineSB1CA1.idy',
 '/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV_orb26_B1tot.ipynb',
 '/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV_orb26_B1tot_v2.ipynb',
 '/home/femtolab/temp2/aniline/aniline_wf_0.1-1.1eV/orb26_B1tot_waveFn']

In [29]:
import inspect
Path(inspect.getfile(em))

PosixPath('/home/femtolab/python/epsman/__init__.py')

In [None]:
# Dirs...

# (1) If running as epsman jobs, can get from existing object
if 'job' in locals():
    if job.mol is not None:
        # Package as single job
        pass
    
    elif hasattr(job, 'nbFileList'):
        # Package as job per notebook - this var doesn't exist until created.
        pass

else:
    