# Create a Dataset from scratch and run it

We want to create a project that contains all the jobs to calculate the Silicon band structure with PBE. To set up the input we will use the scheme in the `CalculationSchemes` class.

In [1]:
# import packages
from pynter.data.datasets import Dataset
from pynter.tools.materials_project import MPDatabase
from pynter.vasp.calculation_schemes import CalculationSchemes

In [2]:
# define the project path - customize with your path
project_path = '/home/lorenzo/tests/project-test/tutorials/Si-BS-dataset'

#initialize dataset
ds = Dataset(path=project_path)

# get structure of FCC Si from Materials Project
structure = MPDatabase(mp_id='mp-149').get_structure()

# Adapt job settings for a smaller job
job_settings = {'nodes':1,'timelimit':'00:30:00'}

cs = CalculationSchemes(structure,job_settings=job_settings,name='Si-BS')
scheme = cs.pbe_electronic_structure()

for step in scheme.steps:
    
    inputs = scheme.get_vaspinput(step)
    job_settings = scheme.get_job_settings(step)
    print(f'create job for step {step}')
    ds.create_job('VaspJob',group=step,inputs=inputs,job_settings=job_settings)
    

create job for step 1-PBE-SCF
create job for step 2-PBE-DOS
create job for step 3-PBE-BS




In [3]:
# visualize job table
ds.jobs_table()

Unnamed: 0_level_0,formula,group,nodes,is_converged
job_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Si-BS_PBE-el-str_1,Si2,1-PBE-SCF,,
Si-BS_PBE-el-str_2,Si2,2-PBE-DOS,,
Si-BS_PBE-el-str_3,Si2,3-PBE-BS,,


In [4]:
# get first step job
job_first_step = ds.select_jobs(groups=['1-PBE-SCF'])[0]
# alternatively
#job_first_step = ds.jobs[0]
print(job_first_step)

Job "Si-BS_PBE-el-str_1" of group "1-PBE-SCF"


In [5]:
# check job inputs
job_first_step.inputs

{'INCAR': {'IBRION': 2,
  'NSW': 0,
  'ISIF': 2,
  'EDIFFG': -0.05,
  'ISPIN': 1,
  'LWAVE': '.TRUE.',
  'LCHARG': '.TRUE.',
  'LORBIT': 10,
  'ENCUT': 500,
  'EDIFF': 1e-06,
  'ISMEAR': 0,
  'SIGMA': 0.05,
  'ALGO': 'Normal',
  'AMIX': 0.2,
  'LREAL': '.FALSE.',
  '#### Default PBE: system': 'Si',
  'ISYM': 2},
 'KPOINTS': pymatgen 4.7.6+ generated KPOINTS with grid density = 1000 / atom
 0
 Gamma
 8 8 8,
 'POSCAR': Si2
 1.0
 3.325489 0.000000 1.919972
 1.108496 3.135301 1.919972
 0.000000 0.000000 3.839943
 Si
 2
 direct
 0.875000 0.875000 0.875000 Si
 0.125000 0.125000 0.125000 Si,
 'POTCAR': [<pymatgen.io.vasp.inputs.PotcarSingle at 0x7f318c53ce10>]}

In [6]:
# write input to files
ds.write_jobs_input()

In [7]:
# sync Dataset folder with HPC to transfer all the files before starting calculations
# In order for the automations to work all files need to be present in HPC
ds.sync_dataset_to_hpc()

/home/lorenzo/tests/project-test/tutorials/Si-BS-dataset /work/scratch/lv51dypu/tutorials/Si-BS-dataset
rsync -r -uavzh -e ssh  /home/lorenzo/tests/project-test/tutorials/Si-BS-dataset/* lv51dypu@lcluster7.hrz.tu-darmstadt.de:/work/scratch/lv51dypu/tutorials/Si-BS-dataset 
sending incremental file list
1-PBE-SCF/
1-PBE-SCF/INCAR
1-PBE-SCF/KPOINTS
1-PBE-SCF/POSCAR
1-PBE-SCF/POTCAR
1-PBE-SCF/job.sh

sent 72.56K bytes  received 115 bytes  29.07K bytes/sec
total size is 196.69K  speedup is 2.71

sending incremental file list
2-PBE-DOS/
2-PBE-DOS/INCAR
2-PBE-DOS/KPOINTS
2-PBE-DOS/POSCAR
2-PBE-DOS/POTCAR
2-PBE-DOS/job.sh

sent 72.52K bytes  received 115 bytes  29.05K bytes/sec
total size is 196.67K  speedup is 2.71

sending incremental file list
3-PBE-BS/
3-PBE-BS/INCAR
3-PBE-BS/KPOINTS
3-PBE-BS/POSCAR
3-PBE-BS/POTCAR
3-PBE-BS/job.sh

sent 72.63K bytes  received 115 bytes  29.10K bytes/sec
total size is 197.07K  speedup is 2.71



Now that we created the Jobs and synced the files we can start the calculations. 
In this case we need to start the calculation of the first step, the others are automated.

In [8]:
job_first_step.run_job(sync=True) #sync files before running, in this case it wouldn't be necessary

rsync -r -uavzh -e ssh  /home/lorenzo/tests/project-test/tutorials/Si-BS-dataset/1-PBE-SCF/* lv51dypu@lcluster7.hrz.tu-darmstadt.de:/work/scratch/lv51dypu/tutorials/Si-BS-dataset/1-PBE-SCF 
sending incremental file list
POSCAR

sent 104 bytes  received 41 bytes  58.00 bytes/sec
total size is 161  speedup is 1.11

sending incremental file list
POTCAR

sent 106 bytes  received 1.72K bytes  728.40 bytes/sec
total size is 195.67K  speedup is 107.45

sending incremental file list
INCAR

sent 103 bytes  received 41 bytes  57.60 bytes/sec
total size is 226  speedup is 1.57

sending incremental file list
KPOINTS

sent 105 bytes  received 41 bytes  58.40 bytes/sec
total size is 80  speedup is 0.55

sending incremental file list
job.sh

sent 104 bytes  received 41 bytes  58.00 bytes/sec
total size is 545  speedup is 3.76

Submitted batch job 14746180
[1;37m[I][0m Selected project: project01136
[1;37m[I][0m Possible CPU types    = Haswell([1;32mavx2[0m)
[1;37m[I][0m Possible specials     

('Submitted batch job 14746180\n\x1b[1;37m[I]\x1b[0m Selected project: project01136\n\x1b[1;37m[I]\x1b[0m Possible CPU types    = Haswell(\x1b[1;32mavx2\x1b[0m)\n\x1b[1;37m[I]\x1b[0m Possible specials     = short\n',
 '')

In [9]:
# check status
job_first_step.status()

'PENDING'

In [10]:
# check queue
ds.queue()

lv51dypu@lcluster7.hrz.tu-darmstadt.de: squeue -o "%.10i %.9P %.40j %.8u %.2t %.10M %.5D %R" 

     JOBID PARTITION                                     NAME     USER ST       TIME NODES NODELIST(REASON)
  14746180 deflt_sho                       Si-BS_PBE-el-str_1 lv51dypu PD       0:00     1 (Priority)



In [11]:
ds.queue() # after some time

lv51dypu@lcluster7.hrz.tu-darmstadt.de: squeue -o "%.10i %.9P %.40j %.8u %.2t %.10M %.5D %R" 

     JOBID PARTITION                                     NAME     USER ST       TIME NODES NODELIST(REASON)
  14746185 deflt_sho                       Si-BS_PBE-el-str_2 lv51dypu  R       0:59     1 hpb0436



In [15]:
# First step should be finished, let's get the files. With the sync-hpc.py script running in the background 
#they will sync automatically every 15 minutes

job_first_step.sync_from_hpc()


rsync -r -uavzh -e ssh lv51dypu@lcluster7.hrz.tu-darmstadt.de:/work/scratch/lv51dypu/tutorials/Si-BS-dataset/1-PBE-SCF/* /home/lorenzo/tests/project-test/tutorials/Si-BS-dataset/1-PBE-SCF 
receiving incremental file list
CHG
CHGCAR
CONTCAR
DOSCAR
EIGENVAL
IBZKPT
OSZICAR
OUTCAR
PCDAT
PROCAR
WAVECAR
XDATCAR
err.14746180
exit_status.txt
out.14746180
vasprun.xml

sent 328 bytes  received 6.14M bytes  945.03K bytes/sec
total size is 10.09M  speedup is 1.64



In [19]:
# now let's collect the outputs and analyse them
job_first_step.get_outputs()
job_first_step.is_converged

True

In [21]:
ds.queue() 
# calculations are finished, we can sync them and collect outputs
ds.sync_jobs()

lv51dypu@lcluster7.hrz.tu-darmstadt.de: squeue -o "%.10i %.9P %.40j %.8u %.2t %.10M %.5D %R" 

     JOBID PARTITION                                     NAME     USER ST       TIME NODES NODELIST(REASON)


rsync -r -uavzh -e ssh lv51dypu@lcluster7.hrz.tu-darmstadt.de:/work/scratch/lv51dypu/tutorials/Si-BS-dataset/1-PBE-SCF/* /home/lorenzo/tests/project-test/tutorials/Si-BS-dataset/1-PBE-SCF 
receiving incremental file list

sent 20 bytes  received 500 bytes  208.00 bytes/sec
total size is 10.09M  speedup is 19,399.47


rsync -r -uavzh -e ssh lv51dypu@lcluster7.hrz.tu-darmstadt.de:/work/scratch/lv51dypu/tutorials/Si-BS-dataset/2-PBE-DOS/* /home/lorenzo/tests/project-test/tutorials/Si-BS-dataset/2-PBE-DOS 
receiving incremental file list
CHG
CHGCAR
CONTCAR
DOSCAR
EIGENVAL
IBZKPT
OSZICAR
OUTCAR
PCDAT
POSCAR
PROCAR
WAVECAR
XDATCAR
err.14746185
exit_status.txt
out.14746185
vasprun.xml

sent 353 bytes  received 77.99M bytes  5.03M bytes/sec
total size is 94.73M  speedup is 1.21


rsync -r -ua

In [22]:
ds.get_jobs_outputs()

Now that we collected the calculations we can check the results

In [23]:
ds.jobs_table()

Unnamed: 0_level_0,formula,group,nodes,is_converged
job_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Si-BS_PBE-el-str_1,Si2,1-PBE-SCF,,True
Si-BS_PBE-el-str_2,Si2,2-PBE-DOS,,True
Si-BS_PBE-el-str_3,Si2,3-PBE-BS,,True


In [24]:
# we could check also the total energies
ds.jobs_table(properties_to_display=['final_energy'])

Unnamed: 0_level_0,formula,group,nodes,is_converged,final_energy
job_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Si-BS_PBE-el-str_1,Si2,1-PBE-SCF,,True,-10.840906
Si-BS_PBE-el-str_2,Si2,2-PBE-DOS,,True,-10.844797
Si-BS_PBE-el-str_3,Si2,3-PBE-BS,,True,-11.002882
