# Create a Dataset from scratch and run it

We want to create a project that contains all the jobs to calculate the Silicon band structure with PBE. To set up the input we will use the scheme in the `CalculationSchemes` class.

In [1]:
# import packages
from pynter.data.datasets import Dataset
from pynter.tools.materials_project import MPDatabase
from pynter.vasp.calculation_schemes import CalculationSchemes

In [2]:
# define the project path - customize with your path
project_path = '/home/lorenzo/tests/project-test/tutorials/Si-BS-dataset-test'

#initialize dataset
ds = Dataset(path=project_path)

# get structure of FCC Si from Materials Project
structure = MPDatabase(mp_id='mp-149').get_structure()

# Adapt job settings for a smaller job
job_settings = {'nodes':1,'timelimit':'00:30:00'}

cs = CalculationSchemes(structure,job_settings=job_settings,name='Si-BS')
scheme = cs.pbe_electronic_structure()

for step in scheme.steps:
    
    inputs = scheme.get_vaspinput(step)
    job_settings = scheme.get_job_settings(step)
    print(f'create job for step {step}')
    ds.create_job('VaspJob',group=step,inputs=inputs,job_settings=job_settings)
    

create job for step 1-PBE-SCF
create job for step 2-PBE-DOS
create job for step 3-PBE-BS




In [3]:
# visualize job table
ds.jobs_table()

Unnamed: 0_level_0,formula,group,nodes,is_converged
job_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Si-BS_PBE-el-str_1,Si2,1-PBE-SCF,,
Si-BS_PBE-el-str_2,Si2,2-PBE-DOS,,
Si-BS_PBE-el-str_3,Si2,3-PBE-BS,,


In [4]:
# get first step job
job_first_step = ds.select_jobs(groups=['1-PBE-SCF'])
# alternatively
#job_first_step = ds.jobs[0]
print(job_first_step)

Job "Si-BS_PBE-el-str_1" of group "1-PBE-SCF"


In [5]:
# check job inputs
job_first_step.inputs

{'INCAR': {'IBRION': 2,
  'NSW': 0,
  'ISIF': 2,
  'EDIFFG': -0.05,
  'ISPIN': 1,
  'LWAVE': '.TRUE.',
  'LCHARG': '.TRUE.',
  'LORBIT': 10,
  'ENCUT': 500,
  'EDIFF': 1e-06,
  'ISMEAR': 0,
  'SIGMA': 0.05,
  'ALGO': 'Normal',
  'AMIX': 0.2,
  'LREAL': '.FALSE.',
  '#### Default PBE: system': 'Si',
  'ISYM': 2},
 'KPOINTS': pymatgen 4.7.6+ generated KPOINTS with grid density = 1000 / atom
 0
 Gamma
 8 8 8,
 'POSCAR': Si2
 1.0
 3.325489 0.000000 1.919972
 1.108496 3.135301 1.919972
 0.000000 0.000000 3.839943
 Si
 2
 direct
 0.875000 0.875000 0.875000 Si
 0.125000 0.125000 0.125000 Si,
 'POTCAR': [<pymatgen.io.vasp.inputs.PotcarSingle at 0x7fbd25999710>]}

In [6]:
# write input to files
ds.write_jobs_input()

In [7]:
# sync Dataset folder with HPC to transfer all the files before starting calculations
# In order for the automations to work all files need to be present in HPC
#ds.sync_dataset_to_hpc()

Now that we created the Jobs and synced the files we can start the calculations. 
In this case we need to start the calculation of the first step, the others are automated.

In [9]:
ds.sync_dataset_from_hpc()


rsync -r -uavzh -e ssh lv51dypu@lcluster7.hrz.tu-darmstadt.de:/work/scratch/lv51dypu/tutorials/Si-BS-dataset-test/ /home/lorenzo/tests/project-test/tutorials/Si-BS-dataset-test/ 
receiving incremental file list

sent 23 bytes  received 444 bytes  133.43 bytes/sec
total size is 590.43K  speedup is 1,264.30



In [14]:
j = ds.jobs[0]
j.sync_to_hpc()

rsync -r -uavzh -e ssh  /home/lorenzo/tests/project-test/tutorials/Si-BS-dataset-test/1-PBE-SCF/ lv51dypu@lcluster7.hrz.tu-darmstadt.de:/work/scratch/lv51dypu/tutorials/Si-BS-dataset-test/1-PBE-SCF/ 
sending incremental file list
./
INCAR
KPOINTS
POSCAR
POTCAR
job.sh

sent 72.54K bytes  received 114 bytes  20.76K bytes/sec
total size is 196.69K  speedup is 2.71

