Skip to content

Commit

Permalink
Merge 42f0e80 into 8db442f
Browse files Browse the repository at this point in the history
  • Loading branch information
tschaume committed Feb 5, 2019
2 parents 8db442f + 42f0e80 commit 706b316
Show file tree
Hide file tree
Showing 16 changed files with 1,766 additions and 4 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Expand Up @@ -105,4 +105,8 @@ ENV/
.DS_Store

# PyCharm
.idea
.idea

# GDrive
emmet/scripts/credentials.json
emmet/scripts/token.json
Empty file added emmet/scripts/__init__.py
Empty file.
1,496 changes: 1,496 additions & 0 deletions emmet/scripts/emmet.py

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions emmet/scripts/garden_to_hpss.sh
@@ -0,0 +1,15 @@
#!/bin/bash

cd $1 && pwd

for block_dir in `find $1 -maxdepth 1 -type d -name "block_*"`; do
echo $block_dir
parallel -0m 'chmod -v g+rw {}' :::: <(find $block_dir -not -perm -660 -print0)
[[ $? -ne 0 ]] && echo 'error in chmod' && exit
find $block_dir -type f -not -name "*.gz" -exec pigz -9v {} \;
[[ $? -ne 0 ]] && echo "error in pigz" && exit
block=`basename $block_dir`
htar -M 5000000 -cvf garden/${block}.tar $block
[[ $? -ne 0 ]] && echo "error with htar" && exit
rm -rfv $block_dir
done
63 changes: 63 additions & 0 deletions emmet/scripts/hpss_to_mpdrive.sh
@@ -0,0 +1,63 @@
#!/bin/bash

input=$2
[[ ! -e $input ]] && echo $input missing && exit
dirs=`awk -F/ '{print $1}' $input | sort -u`

cd $1 && pwd
stage_dir=rclone_to_mp_drive
[[ ! -d $stage_dir ]] && mkdir -pv $stage_dir

for dir in $dirs; do
echo $dir
files=`grep "^$dir" $input`

echo $files | tr ' ' '\n' | sort -u > ${dir}.files
wc -l ${dir}.files
rclone lsf -R --files-only mp-drive:calculations/garden/$dir | sed "s:^:$dir/:g" | sed 's:.tar.gz::g' | sort -u > ${dir}.rclone_lsf
wc -l ${dir}.rclone_lsf

missing_paths=${dir}.paths
[[ -e $missing_paths ]] && rm -v $missing_paths
for f in $(comm --check-order -23 ${dir}.files ${dir}.rclone_lsf); do # launch dirs missing in mp-drive
launch_dir_tar="${stage_dir}/${f}.tar.gz"
if [[ ! -f $launch_dir_tar || ! -s $launch_dir_tar ]]; then
echo $f >> $missing_paths
elif [ -d $f ]; then
rm -rv $f
fi
done

for f in $(comm --check-order -12 ${dir}.files ${dir}.rclone_lsf | tr '\n' ' '); do # already cloned launch dirs -> cleanup
launch_dir_tar="${stage_dir}/${f}.tar.gz"
[[ -d $f ]] && rm -rv $f
[[ -e $launch_dir_tar ]] && rm -v $launch_dir_tar
done
rm -v ${dir}.files ${dir}.rclone_lsf

[[ ! -e $missing_paths ]] && echo nothing missing on GDrive!? && continue
wc -l $missing_paths

htar -xvf garden/${dir}.tar `cat $missing_paths | tr '\n' ' '`
ls -ltrhd ${dir}
[[ $? -ne 0 ]] && echo missing paths not found in HPSS!? && continue

for f in `cat $missing_paths`; do
[[ ! -e $f ]] && echo $f not found in HPSS!? && continue
launch_dir_tar="${stage_dir}/${f}.tar.gz"
echo $launch_dir_tar ...
mkdir -p `dirname $launch_dir_tar`
if tar --use-compress-program="pigz -9rv" -cf $launch_dir_tar -C `dirname $f` `basename $f`; then
ls -ltrh $launch_dir_tar
else
echo 'problem with launch dir tar!'
rm -v $launch_dir_tar
exit
fi
[[ -d $f ]] && rm -rv $f
done
rm -v $missing_paths

rclone -v copy $stage_dir/$dir mp-drive:calculations/garden/$dir
find $dir -type d -empty -print -delete
done
14 changes: 14 additions & 0 deletions emmet/scripts/sbatch/submit_garden_to_hpss.txt
@@ -0,0 +1,14 @@
#!/bin/bash -l
#SBATCH --qos=xfer
#SBATCH --time=48:00:00
#SBATCH --job-name=garden_to_hpss
#SBATCH --licenses=SCRATCH
#SBATCH --mail-user=phuck@lbl.gov
#SBATCH --mail-type=ALL
#SBATCH --output=garden_to_hpss-%j.out
#SBATCH --error=garden_to_hpss-%j.error
#SBATCH --mem=10GB

script=$HOME/mp_prod/codes/emmet/emmet/scripts/garden_to_hpss.sh
indir=/project/projectdirs/matgen/garden/hpss_to_mpdrive/raw/
$script $indir
16 changes: 16 additions & 0 deletions emmet/scripts/sbatch/submit_hpss_MatProj.script
@@ -0,0 +1,16 @@
#!/bin/bash -l
#SBATCH --qos=xfer
#SBATCH --time=48:00:00
#SBATCH --job-name=hpss_MatProj
#SBATCH --licenses=SCRATCH
#SBATCH --mail-user=phuck@lbl.gov
#SBATCH --mail-type=ALL
#SBATCH --output=hpss_MatProj-%j.out
#SBATCH --error=hpss_MatProj-%j.error
#SBATCH --mem=10GB

while read line; do
echo $line
hsi -q -l matcomp ls -1 ${line}.idx
[[ $? -ne 0 ]] && htar -Xvf $line
done < hpss_MatProj_2014.txt
13 changes: 13 additions & 0 deletions emmet/scripts/sbatch/submit_hpss_to_mpdrive.script
@@ -0,0 +1,13 @@
#!/bin/bash -l
#SBATCH --qos=xfer
#SBATCH --time=19:00:00
#SBATCH --job-name=hpss_to_mpdrive
#SBATCH --licenses=SCRATCH
#SBATCH --mail-user=phuck@lbl.gov
#SBATCH --mail-type=ALL
#SBATCH --output=hpss_to_mpdrive-%j.out
#SBATCH --error=hpss_to_mpdrive-%j.error

indir=/project/projectdirs/matgen/garden/hpss_to_mpdrive
input=/global/homes/h/huck/mp_prod/workdir/emmet_gdrive/launcher_paths_block_2019.txt
~/mp_prod/codes/emmet/emmet/scripts/hpss_to_mpdrive.sh $indir $input
16 changes: 16 additions & 0 deletions emmet/scripts/sbatch/submit_restore_MatProj.txt
@@ -0,0 +1,16 @@
#!/bin/bash -l
#SBATCH --qos=xfer
#SBATCH --time=48:00:00
#SBATCH --job-name=restore_matproj
#SBATCH --licenses=SCRATCH
#SBATCH --mail-user=phuck@lbl.gov
#SBATCH --mail-type=ALL
#SBATCH --output=restore_matproj-%j.out
#SBATCH --error=restore_matproj-%j.error

outdir=/project/projectdirs/matgen/garden/hpss_to_mpdrive/raw/
archive=/home/projects/MatProj/GARDEN/2012-Jul-Aug.tar

cd $outdir && pwd
htar -xvf $archive
echo DONE
13 changes: 13 additions & 0 deletions emmet/scripts/sbatch/submit_rsync.script
@@ -0,0 +1,13 @@
#!/bin/bash -l
#SBATCH --qos=xfer
#SBATCH --time=48:00:00
#SBATCH --job-name=rsync
#SBATCH --licenses=SCRATCH
#SBATCH --mail-user=phuck@lbl.gov
#SBATCH --mail-type=ALL
#SBATCH --output=rsync-%j.out
#SBATCH --error=rsync-%j.error

indir=/project/projectdirs/matgen/garden/control_blocks
outdir=/project/projectdirs/matgen/garden/hpss_to_mpdrive/raw/
rsync --remove-source-files -av $indir/block_* $outdir
15 changes: 15 additions & 0 deletions emmet/scripts/sbatch/submit_targz_to_htar.script
@@ -0,0 +1,15 @@
#!/bin/bash -l
#SBATCH --qos=xfer
#SBATCH --time=48:00:00
#SBATCH --job-name=targz_to_htar
#SBATCH --licenses=SCRATCH
#SBATCH --mail-user=phuck@lbl.gov
#SBATCH --mail-type=ALL
#SBATCH --output=targz_to_htar-%j.out
#SBATCH --error=targz_to_htar-%j.error
#SBATCH --mem=10GB

targz_to_htar=$HOME/mp_prod/codes/emmet/emmet/scripts/targz_to_htar.sh
indir=/project/projectdirs/matgen/garden/hpss_to_mpdrive/raw/
year=2019
$targz_to_htar $indir $year
11 changes: 11 additions & 0 deletions emmet/scripts/sbatch/submit_update_hpss_archive.script
@@ -0,0 +1,11 @@
#!/bin/bash -l
#SBATCH --qos=xfer
#SBATCH --time=06:30:00
#SBATCH --job-name=update_hpss_archive
#SBATCH --licenses=SCRATCH
#SBATCH --mail-user=phuck@lbl.gov
#SBATCH --mail-type=ALL
#SBATCH --output=update_hpss_archive-%j.out
#SBATCH --error=update_hpss_archive-%j.error

~/mp_prod/codes/emmet/emmet/scripts/update_hpss_archive.sh
35 changes: 35 additions & 0 deletions emmet/scripts/targz_to_htar.sh
@@ -0,0 +1,35 @@
#!/bin/bash

# NOTE make sure matcomp is first entry in ~/.netrc!
indir=$1
year=$2
garden=garden_${year}.txt
cd $indir && pwd
hsi -P -l matcomp ls -1 "garden/block_${year}*.tar.gz" > $garden

while read block_tar_gz; do
block=`basename ${block_tar_gz%%.tar.gz}`
echo $block
hsi -q -l matcomp cget garden/${block}.tar.gz
[[ $? -ne 0 ]] && echo 'error in hsi cget' && exit
tar -I pigz --skip-old-files -xvf ${block}.tar.gz
[[ $? -ne 0 ]] && echo 'error in tar -x' && exit
[[ -d garden_pauling_files/$block ]] && mv -vi garden_pauling_files/$block .
[[ -d garden_cori/$block ]] && mv -vi garden_cori/$block .
[[ -d garden_JulAug2018/$block ]] && mv -vi garden_JulAug2018/$block .
[[ -d garden_Jul2018/$block ]] && mv -vi garden_Jul2018/$block .
[[ -d garden_Aug14-16_2018/$block ]] && mv -vi garden_Aug14-16_2018/$block .
[[ -d garden_Aug2018/$block ]] && mv -vi garden_Aug2018/$block .
parallel -0m 'chmod -v g+rw {}' :::: <(find $block -not -perm -660 -print0)
[[ $? -ne 0 ]] && echo 'error in chmod' && exit
find ${block} -type f -not -name "*.gz" -exec pigz -9v {} \;
[[ $? -ne 0 ]] && echo "error in pigz" && exit
htar -M 5000000 -cvf garden/${block}.tar ${block}
[[ $? -ne 0 ]] && echo 'error in htar -c' && exit
hsi -q -l matcomp rm garden/${block}.tar.gz
[[ $? -ne 0 ]] && echo 'error in htar rm' && exit
rm -rv ${block}
rm -v ${block}.tar.gz
done < $garden


44 changes: 44 additions & 0 deletions emmet/scripts/update_hpss_archive.sh
@@ -0,0 +1,44 @@
#!/bin/bash

indir=/project/projectdirs/matgen/garden/hpss_to_mpdrive/raw
cd $indir && pwd

#for block in $(find . -maxdepth 1 -type d -name "block_2011*" -exec basename {} \;); do
#for block in $(cat hpss_update_2013.txt); do
for block_targz in $(ls block_201*.tar.gz); do
tar -I pigz --skip-old-files -xvf ${block_targz}
[[ $? -ne 0 ]] && echo "error in tar -x" && exit
block=${block_targz%%.tar.gz}
echo $block
[[ ! -d $block ]] && echo $block does not exist && exit
find $block -type d -empty -print -delete
[[ ! -d $block ]] && echo $block only contained empty directories && exit

parallel -0m 'chmod -v g+rw {}' :::: <(find $block -not -perm -660 -print0)
[[ $? -ne 0 ]] && echo 'error in chmod' && exit
find $block -type f -not -name "*.gz" -exec pigz -9v {} \;
[[ $? -ne 0 ]] && echo "error in pigz" && exit

htar -vtf garden/${block}.tar | awk '{ print $7 }' | sort -u > ${block}.tar.idx
[[ $? -ne 0 ]] && echo "error in htar -t" && exit
find $block -type f | sort -u > ${block}.idx

comm -13 ${block}.tar.idx ${block}.idx > ${block}.missing
if [ -s ${block}.missing ]; then
nfiles=$(wc -l ${block}.missing | awk '{ print $1}')
echo need syncing of $nfiles files
htar -xvf garden/${block}.tar
[[ $? -ne 0 ]] && echo "error in htar -x" && exit
hsi -q -l matcomp mv garden/${block}.tar garden/${block}.tar.bkp
hsi -q -l matcomp mv garden/${block}.tar.idx garden/${block}.tar.idx.bkp
htar -M 5000000 -cvf garden/${block}.tar ${block}
[[ $? -ne 0 ]] && echo "error in htar -c" && exit
hsi -q -l matcomp rm garden/${block}.tar*.bkp
[[ $? -ne 0 ]] && echo 'error in htar rm' && exit
else
echo all files already in HTAR archive
fi
rm -rv ${block}
rm -v ${block}.tar.idx ${block}.idx ${block}.missing
rm -v ${block_targz}
done
3 changes: 3 additions & 0 deletions emmet/vasp/materials.py
Expand Up @@ -309,6 +309,9 @@ def ensure_indexes(self):
self.materials.ensure_index("task_ids")
self.materials.ensure_index(self.materials.lu_field)

def get_sg(struc):
# helper function to get spacegroup with a loose tolerance
return struc.get_space_group_info(symprec=0.1)[1]

def structure_metadata(structure):
"""
Expand Down
10 changes: 7 additions & 3 deletions setup.py
Expand Up @@ -19,12 +19,12 @@
license='modified BSD',
packages=find_packages(),
include_package_data=True,
package_data={},
zip_safe=False,
install_requires=[
'atomate', 'pymatgen>=2018.4.20','maggma','monty',
'six', 'pydash', 'tqdm', 'matminer',
'prettyplotlib', 'pybtex', 'networkx', 'sumo',
'robocrys'
'six', 'pydash', 'tqdm', 'matminer', 'log4mongo', 'prettytable',
'prettyplotlib', 'pybtex', 'Click', 'networkx', 'sumo', 'robocrys'
],
classifiers=["Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.6",
Expand All @@ -37,5 +37,9 @@
'Topic :: Scientific/Engineering'],
test_suite='nose.collector',
tests_require=['nose'],
entry_points='''
[console_scripts]
emmet=emmet.scripts.emmet:cli
''',
python_requires='>=3.6',
)

0 comments on commit 706b316

Please sign in to comment.