In [1]:
#Importing the modules
import numpy as np
import dpdata
import os
import matplotlib.pyplot as plt

In [95]:
print("Note:\n\
---------------------------------------------------------------------------------------------\n\
> No. of atoms should be same in both NPT and NVT data\n\
> Combine the NPT and NVT AIMD runs separately for a given system, use combine_AIMD.sh\n\
> To execute: python JDFTX_2_DEEPMD_v4.py \n\
> Output: type.raw type_map.raw force.raw, energy.raw, coord.raw, box.raw which are shuffled\n\
> Use /deepmd-kit/data/raw/raw_2_set.sh to create multiple sets of data\n\
> This script assumes the following: \n\
>> NPT and NVT runs have same no. of atoms and same atom types only \n\
>> jdftx ionic position are dumped alphabetically for all ionic steps not random \n\
---------------------------------------------------------------------------------------------\n\
\n\
")

Note:
---------------------------------------------------------------------------------------------
> No. of atoms should be same in both NPT and NVT data
> Combine the NPT and NVT AIMD runs separately for a given system, use combine_AIMD.sh
> To execute: python JDFTX_2_DEEPMD_v4.py 
> Output: type.raw type_map.raw force.raw, energy.raw, coord.raw, box.raw which are shuffled
> Use /deepmd-kit/data/raw/raw_2_set.sh to create multiple sets of data
> This script assumes the following: 
>> NPT and NVT runs have same no. of atoms and same atom types only 
>> jdftx ionic position are dumped alphabetically for all ionic steps not random 
---------------------------------------------------------------------------------------------




In [101]:
os.system('ls')

CoSi_300k_AIMD - Copy.jdftxout
CoSi_300k_AIMD.jdftxout
CoSi_300k_NPT.jdftxout
CoSi_AIMD
CoSi_vacuum_300k.jdftxout
Data_collection_script
ipynb_version
JDFTX_2_DEEPMD.ipynb
JDFTX_2_DEEPMD_v1.py
JDFTX_2_DEEPMD_v2.py
JDFTX_2_DEEPMD_v2.txt
JDFTX_2_DEEPMD_v3.py
JDFTX_2_DEEPMD_v4.ipynb
JDFTX_2_DEEPMD_v4.py
md.o1065264
Test
training_data


0

In [102]:
# Asking user for cetrain inputs:

# Ask the user for a folder name
# folder = input("Enter the folder to put all data eg. CoSi_AIMD: ")
# frame_sep = input("Enter the frame separation in AIMD data eg. 10 :")
# file_AIMD = input("Enter the combined NPT filename eg. CoSi_AIMD.jdftxout: ")

folder = "Test"
frame_sep = 10
file_AIMD = "CoSi_300k_AIMD - Copy.jdftxout"

# Check if directory exists. If not, create it.
if not os.path.exists(folder):
    os.makedirs(folder)
    print(f"Folder '{folder}' has been created.")
else:
    print(f"Folder '{folder}' already exists.")

Folder 'Test' already exists.


In [24]:
atom_types = ['Co', 'Si', 'O']  # CHANGE HERE, Add the types of elements that you use 
                                # in your project even if certain datas has only few elements

# Conversion from jdftx to deepmd or lammps
ene_conv = 27.2114              # hartree to eV conversion
len_conv = 0.529177             # bohr to Angstrom conversion
force_conv = ene_conv/len_conv  # E = fd, so f = E/d 

# Single atom energies:
E_Co = -148.9280175 * ene_conv # hartree to eV
E_Si = -3.600491552 * ene_conv # hartree to eV
E_O = -15.72003172 * ene_conv # hartree to eV

In [104]:
# Function definitions:

def get_atoms_info(line):
    natoms = int(line.strip().split()[4])
    nspecies = int(line.strip().split()[1])
    return natoms, nspecies

def get_energy(line):
    return float(line.strip().split()[2])

def get_lattice_vectors(lines, i):
    x = list(map(float, lines[i].strip().replace('[', '').replace(']', '').split()))
    y = list(map(float, lines[i+1].strip().replace('[', '').replace(']', '').split()))
    z = list(map(float, lines[i+2].strip().replace('[', '').replace(']', '').split()))
    return x + y + z

def get_volume(line):
    return float(line.strip().split()[4])

def get_stress_tensor(lines, i, volume):
    x = list(map(float, lines[i].strip().replace('[', '').replace(']', '').split()))
    y = list(map(float, lines[i+1].strip().replace('[', '').replace(']', '').split()))
    z = list(map(float, lines[i+2].strip().replace('[', '').replace(']', '').split()))
    xx = [i*volume for i in x]
    yy = [i*volume for i in y]
    zz = [i*volume for i in z]
    return xx + yy + zz

def get_coordinates(lines, natoms, i):
    clines = lines[i:i+natoms]
    coord = np.array([list(map(float, line.strip().split()[2:5])) for line in clines]).reshape(-1)
    allatoms = [line.strip().split()[1] for line in clines]
    return coord, allatoms

def get_forces(lines, natoms, i):
    flines = lines[i:i+natoms]
    force = np.array([list(map(float, line.strip().split()[2:5])) for line in flines]).reshape(-1)
    return force

def automapping(allatoms):
    """
    Use the atom_types in the input and create type.raw
    and type_map.raw files
    """
    # Identifying unique atom types in the simulation:
    #atom_types = list(set(allatoms)); 

    # Assigning integer numbers to unique elements
    mapping = {atom: i for i, atom in enumerate(atom_types)}; 

    # Replacing elements with their mapping
    atm_mapped = [mapping[atom] for atom in allatoms]

    # Writing type_map.raw:
    with open("type.raw", 'w') as f:
        for value in atm_mapped:
            f.write(f"{value}\n")

    # Writing type.raw:
    with open("type_map.raw", 'w') as f:
        for i in atom_types:
            f.write(f"{i}\n")
    return atm_mapped

def all_convert(filename, frame_sep):
    """
    Collects the Force, enegy, box, virial datas from JDFTXoutput file
    """
    with open(filename) as file:
        lines = file.readlines()

        Etot, Force, Stress, Lattice, Coord = [], [], [], [], []

        for i, line in enumerate(lines):
            if "Initialized" in line:
                natoms, nspecies = get_atoms_info(line)
            if "Etot =    " in line:
                Etot.append(get_energy(line))
            if "# Lattice vectors:" in line:
                Lattice.append(get_lattice_vectors(lines, i+2))
            if "unit cell volume =" in line:
                volume = get_volume(line)
#             if "# Stress tensor in Cartesian coordinates" in line:
#                 Stress.append(get_stress_tensor(lines, i+1, volume))
            if "Ionic positions in cartesian coordinates" in line:
                coord, allatoms = get_coordinates(lines, natoms, i+1)
                Coord.append(coord)
            if "Forces in Cartesian coordinates" in line:
                Force.append(get_forces(lines, natoms, i+1))
    
    # Converting the list to array
    Etot = np.array(Etot)
    Lattice = np.array(Lattice)
    Coord = np.array(Coord)
    Force = np.array(Force)
    
    # Sampling the AIMD data:
    AIMD_frame_no = len(Etot) // frame_sep
    Frame_index = np.arange(AIMD_frame_no)*frame_sep

    Etot = Etot[Frame_index]
    Lattice = Lattice[Frame_index]
    Coord = Coord[Frame_index]
    Force = Force[Frame_index]

    # Shuffling the data:
    print(">Force, Energy, Coordinate, Lattice converted, shuffled, and saved")
    permuted_indices = np.random.permutation(len(Etot))
    Etot = np.array(Etot)[permuted_indices]*ene_conv
    Lattice = np.array(Lattice)[permuted_indices]*len_conv
    Coord = np.array(Coord)[permuted_indices]*len_conv
    Force = np.array(Force)[permuted_indices]*force_conv
#     Stress = np.array(Stress)[permuted_indices]*ene_conv
    
    # Energy correction:
    Etot = Etot - Energy_atom(allatoms)

    # Saving the data:
    np.savetxt("energy.raw", Etot)
    np.savetxt("box.raw", Lattice)
    np.savetxt("coord.raw", Coord)
    np.savetxt("force.raw", Force)
#     np.savetxt("virial.raw", Stress)
    
    return Etot, Force, Coord, Lattice, allatoms, natoms, nspecies

def Energy_atom(allatoms):
    """ Calculate the energy of atoms to be subtracted from bulk """
    # Assigning integer numbers to unique elements
    mapping = {atom: i for i, atom in enumerate(atom_types)}; 
    # Replacing elements with their mapping
    atm_mapped = [mapping[atom] for atom in allatoms]
    nCo = atm_mapped.count(0)
    nSi = atm_mapped.count(1)
    nO = atm_mapped.count(2)
    print('Co:', nCo, 'Si:', nSi,'O:', nO)
    E_atom = nCo*E_Co + nSi*E_Si + nO*E_O
    return E_atom

In [105]:
# AIMD Conversion:
print("!!!Converting the AIMD data!!!")
print('-'*50)

# Force, energy, virial, coord:
Etot_AIMD, Force_AIMD, Coord_AIMD, Lattice_AIMD, allatoms_AIMD, natoms, nspecies = all_convert(file_AIMD, frame_sep)

# Energy correction:
Etot_AIMD = Etot_AIMD - Energy_atom(allatoms_AIMD)

# type, type_map
atom_map_AIMD = automapping(allatoms_AIMD)

#---------------------------------------------------------------------------------------------

# Pringin the summary information:
print("# Summary of jdftx to DeepMD\n")
print('-'*50)
print("No. of Elements   :", nspecies)
print("Elements involved :", list(set(allatoms_AIMD)))
print("All Elements      :", atom_types)
# print("Element to int map:", mapping)
print("No. of atoms      :", natoms)
print("No. of frames     :", Force_AIMD.shape[0])
print("force.raw         :", Force_AIMD.shape)
print("coord.raw         :", Coord_AIMD.shape)
print("energy.raw        :", Etot_AIMD.shape)
print("box.raw           :", Lattice_AIMD.shape)
# print("virial.raw        :", Stress_AIMD.shape)
print('-'*50)
    
with open("conversionInfo.dat", 'w') as f:
    f.write("# Summary of jdftx to DeepMD\n")
    f.write('-'*50 + '\n')
    f.write("No. of Elements   :" + f'{nspecies}'+ '\n')
    f.write("Elements involved :" + f'{list(set(allatoms_AIMD))}'+ '\n')
    f.write("All Elements      :" + f'{atom_types}'+ '\n')
    f.write("No. of atoms      :" + f'{natoms}'+ '\n')
    f.write("No. of frames     :" + f'{Force_AIMD.shape[0]}'+ '\n')
    f.write("force.raw         :" + f'{Force_AIMD.shape}'+ '\n')
    f.write("coord.raw         :" + f'{Coord_AIMD.shape}'+ '\n')
    f.write("energy.raw        :" + f'{Etot_AIMD.shape}'+ '\n')
    f.write("box.raw           :" + f'{Lattice_AIMD.shape}'+ '\n')
#     f.write("virial.raw        :" + f'{Stress_AIMD.shape}'+ '\n')
    f.write('-'*50)
    
# Cleaning up 
os.system(f'mv -f *.raw conversionInfo.dat {folder}')

!!!Converting the AIMD data!!!
--------------------------------------------------
>Force, Energy, Coordinate, Lattice converted, shuffled, and saved
Co: 32 Si: 32 O: 0
Co: 32 Si: 32 O: 0
# Summary of jdftx to DeepMD

--------------------------------------------------
No. of Elements   : 2
Elements involved : ['Co', 'Si']
All Elements      : ['Co', 'Si', 'O']
No. of atoms      : 64
No. of frames     : 411
force.raw         : (411, 192)
coord.raw         : (411, 192)
energy.raw        : (411,)
box.raw           : (411, 9)
--------------------------------------------------


0

In [106]:
# Check if directory exists. If not, create it.
if os.path.exists(f'{folder}/training_data'):
    os.system(f'rm -r {folder}/training_data')
    
if os.path.exists(f'{folder}/validation_data'):
    os.system(f'rm -r {folder}/validation_data')

if not os.path.exists(f'{folder}/training_data'):
    os.makedirs(f'{folder}/training_data')
    print(f"'training_data' has been created.")

if not os.path.exists(f'{folder}/validation_data'):
    os.makedirs(f'{folder}/validation_data')
    print(f"'validation_data' has been created.")

# Copying the type.raw and type_map.raw to training_data and validation_data
os.system(f'cp {folder}/type* {folder}/training_data')
os.system(f'cp {folder}/type* {folder}/validation_data')


n_sets = int(Etot_AIMD.shape[0] // 4.5)

os.system(f'cd {folder}; /root/DeepMD_tutorial/deepmd-kit/data/raw/raw_to_set.sh {n_sets}')

os.system(f'mv {folder}/set.000 {folder}/validation_data')
os.system(f'mv {folder}/set.* {folder}/training_data')

'training_data' has been created.
'validation_data' has been created.
nframe is 411
nline per set is 91
will make 5 sets
making set 0 ...
making set 1 ...
making set 2 ...
making set 3 ...
making set 4 ...


0

In [93]:
filename = 'md.o1065264'

dF_cutoff = -1e-05
with open(filename) as file:
    lines = file.readlines()
    
    # Extracting the section of electronic minimization:
    Emin_ini = []
    Emin_fin = []
    for i, line in enumerate(lines):
        if "Electronic minimization" in line:
            Emin_ini.append(i)
        if "Setting wave functions to eigenvectors of Hamiltonian" in line:
            Emin_fin.append(i)
    
    print(len(Emin_ini), len(Emin_fin))
    # Checking convergence with given dF:
    if len(Emin_ini) > 0 and len(Emin_fin) > 0:
        
        conv_Emin_ini = []
        conv_Emin_fin = []
        
        # Looping over all electronic minimization:
        for j in range(len(Emin_ini)):
            print('j', j)
            
            # Looping within the given range to append dF values:
            dF_values = []

            
            for k in range(Emin_ini[j], Emin_fin[j]):
#                 print(Emin_ini[j], Emin_fin[j], k)
                if "dF:" in lines[k]:
#                     print(lines[k])
#                     Extract the dF value from the line
                    dF = float(lines[k].split("dF: ")[1].split()[0])
#                     print(dF)
                    dF_values.append(dF)

            new_dF_values = dF_values
#             print("last dF values:", new_dF_values[-1])
            
            if new_dF_values:                                     
                last_dF = new_dF_values[-1]
                print("last dF values:", new_dF_values[-1])
                if float(last_dF) < -1e-05:
                    print("AIMD converged")
                    conv_Emin_ini.append(Emin_ini[j])
                    conv_Emin_fin.append(Emin_fin[j])
                else:
                    print("AIMD not converged")
#                 plt.plot(j, last_dF, '*r')
#                 plt.show()
print(conv_Emin_ini, conv_Emin_fin)

319 318
j 0
last dF values: -1.23e-06
AIMD not converged
j 1
last dF values: -0.0006407
AIMD converged
j 2
last dF values: 0.0007364
AIMD not converged
j 3
last dF values: 0.0001145
AIMD not converged
j 4
last dF values: -0.0005756
AIMD converged
j 5
last dF values: 0.0004978
AIMD not converged
j 6
last dF values: 0.001536
AIMD not converged
j 7
last dF values: -0.00255
AIMD converged
j 8
last dF values: 0.001342
AIMD not converged
j 9
last dF values: 9.027e-05
AIMD not converged
j 10
last dF values: -0.000262
AIMD converged
j 11
last dF values: -0.002171
AIMD converged
j 12
last dF values: -5.948e-05
AIMD converged
j 13
last dF values: 0.0005374
AIMD not converged
j 14
last dF values: -0.001061
AIMD converged
j 15
last dF values: 0.0002094
AIMD not converged
j 16
last dF values: 0.004561
AIMD not converged
j 17
last dF values: -0.002356
AIMD converged
j 18
last dF values: 0.000218
AIMD not converged
j 19
last dF values: -0.0008187
AIMD converged
j 20
last dF values: -0.001045
AIMD con

j 293
last dF values: 0.008228
AIMD not converged
j 294
last dF values: -0.1316
AIMD converged
j 295
last dF values: 0.4109
AIMD not converged
j 296
last dF values: -0.3204
AIMD converged
j 297
last dF values: 3.416
AIMD not converged
j 298
last dF values: 0.01256
AIMD not converged
j 299
last dF values: -0.2408
AIMD converged
j 300
last dF values: -0.02583
AIMD converged
j 301
last dF values: 0.07536
AIMD not converged
j 302
last dF values: 0.3856
AIMD not converged
j 303
last dF values: -0.1934
AIMD converged
j 304
last dF values: -0.01789
AIMD converged
j 305
last dF values: 0.01216
AIMD not converged
j 306
last dF values: 0.1482
AIMD not converged
j 307
last dF values: -0.402
AIMD converged
j 308
last dF values: -0.5329
AIMD converged
j 309
last dF values: 0.1115
AIMD not converged
j 310
last dF values: 0.07712
AIMD not converged
j 311
last dF values: -0.2319
AIMD converged
j 312
last dF values: 0.337
AIMD not converged
j 313
last dF values: -0.459
AIMD converged
j 314
last dF valu

IndexError: list index out of range

In [None]:
                if new_dF_values:                                     
                    min_dF = min(new_dF_values)
                    print("Smallest dF value:", min_dF)
                    if min_dF > 1e-05:
                        print("The smallest dF value is larger than 1e-05.")
                    else:
                        print("The smallest dF value is not larger than 1e-05.")

In [None]:
                if "dF: " in lines[i]:
                    # Extract the dF value from the line
                    dF = float(lines[i].split("dF: ")[1].split()[0])
                    dF_values.append(dF)
                
                new_dF_values = np.abs(dF_values)
                print("dF values:", new_dF_values)

In [85]:
if -2e-2 < -1e-1:
    print("true")
else:
    print("false")

false


In [27]:
np.linspace(2, 10, 10-2)

array([ 2.        ,  3.14285714,  4.28571429,  5.42857143,  6.57142857,
        7.71428571,  8.85714286, 10.        ])

In [82]:
float(-2e-6)

-2e-06

In [80]:
float(-1e-6)

-0.1

In [45]:
x= [ 16.6195, -0.873218, -1.23946 ]
y= [ -0.861317, 19.2448, -2.15625 ]
z= [ -0.702846, -1.04136, 39.4966 ]

box = np.zeros((3,3))
box[0, :] = np.array(x)
box[1, :] = np.array(y)
box[2, :] = np.array(z)
box_corr = box.T
x_corr = (box_corr[0, :]*len_conv).tolist()
y_corr = (box_corr[1, :]*len_conv).tolist()
z_corr = (box_corr[2, :]*len_conv).tolist()

print(box_corr)
print(box)
print(x_corr, '\n', y_corr, '\n', z_corr)
x_corr + y_corr + z_corr

[[16.6195   -0.861317 -0.702846]
 [-0.873218 19.2448   -1.04136 ]
 [-1.23946  -2.15625  39.4966  ]]
[[16.6195   -0.873218 -1.23946 ]
 [-0.861317 19.2448   -2.15625 ]
 [-0.702846 -1.04136  39.4966  ]]
[8.7946571515, -0.455789146109, -0.371929937742] 
 [-0.46208688158600003, 10.1839055296, -0.5510637607200001] 
 [-0.65589372442, -1.14103790625, 20.9006922982]


[8.7946571515,
 -0.455789146109,
 -0.371929937742,
 -0.46208688158600003,
 10.1839055296,
 -0.5510637607200001,
 -0.65589372442,
 -1.14103790625,
 20.9006922982]

[8.7946571515, -0.46208688158600003, -0.65589372442] 
 [-0.455789146109, 10.1839055296, -1.14103790625] 
 [-0.371929937742, -0.5510637607200001, 20.9006922982]


[8.7946571515, -0.46208688158600003, -0.65589372442]