In [55]:
#Importing the modules
import numpy as np
import dpdata
import os

In [56]:
print("Note:\n\
---------------------------------------------------------------------------------------------\n\
> No. of atoms should be same in both NPT and NVT data\n\
> Combine the NPT and NVT AIMD runs separately for a given system, use combine_AIMD.sh\n\
> Make sure that for NVT runs for all AIMD the box size is same eg. 300K and 400K has same box\n\
> First argument should be NPT_file.jdftxout and second argument NVT_file.jdftxout\n\
> To execute: python JDFTX_2_DEEPMD_v4.py NPT_file.jdftxout NVT_file.jdftxout\n\
> Output: type.raw type_map.raw force.raw, energy.raw, coord.raw, box.raw which are shuffled\n\
> Use /deepmd-kit/data/raw/raw_2_set.sh to create multiple sets of data\n\
---------------------------------------------------------------------------------------------\n\
\n\
")

Note:
---------------------------------------------------------------------------------------------
> No. of atoms should be same in both NPT and NVT data
> Combine the NPT and NVT AIMD runs separately for a given system, use combine_AIMD.sh
> Make sure that for NVT runs for all AIMD the box size is same eg. 300K and 400K has same box
> First argument should be NPT_file.jdftxout and second argument NVT_file.jdftxout
> To execute: python JDFTX_2_DEEPMD_v4.py NPT_file.jdftxout NVT_file.jdftxout
> Output: type.raw type_map.raw force.raw, energy.raw, coord.raw, box.raw which are shuffled
> Use /deepmd-kit/data/raw/raw_2_set.sh to create multiple sets of data
---------------------------------------------------------------------------------------------




In [57]:
# Asking user for cetrain inputs:

# Ask the user for a folder name
folder = input("Enter the folder to put all data eg. CoSi_combined: ")

file_NPT = input("Enter the combined NPT filename eg. CoSi_NPT.jdftxout: ")
file_NVT = input("Enter the combined NVT filename eg. CoSi_NVT.jdftxout: ")

# Check if directory exists. If not, create it.
if not os.path.exists(folder):
    os.makedirs(folder)
    print(f"Folder '{folder}' has been created.")
else:
    print(f"Folder '{folder}' already exists.")

Enter the folder to put all data eg. CoSi_combined: new_combined
Enter the combined NPT filename eg. CoSi_NPT.jdftxout: CoSi_300k_NPT.jdftxout
Enter the combined NVT filename eg. CoSi_NVT.jdftxout: CoSi_300k_NVT.jdftxout
Folder 'new_combined' has been created.


In [58]:
atom_types = ['Co', 'Si', 'O']  # Add the types of elements that you use 
                                # in your project even if certain datas has only few elements

# Conversion from jdftx to deepmd or lammps
ene_conv = 27.2114              # hartree to eV conversion
len_conv = 0.529177             # bohr to Angstrom conversion
force_conv = ene_conv/len_conv  # E = fd, so f = E/d 

In [59]:
# Function definitions:

def get_atoms_info(line):
    natoms = int(line.strip().split()[4])
    nspecies = int(line.strip().split()[1])
    return natoms, nspecies

def get_energy(line):
    return float(line.strip().split()[2])

def get_lattice_vectors(lines, i):
    x = list(map(float, lines[i].strip().replace('[', '').replace(']', '').split()))
    y = list(map(float, lines[i+1].strip().replace('[', '').replace(']', '').split()))
    z = list(map(float, lines[i+2].strip().replace('[', '').replace(']', '').split()))
    return x + y + z

def get_volume(line):
    return float(line.strip().split()[4])

def get_stress_tensor(lines, i, volume):
    x = list(map(float, lines[i].strip().replace('[', '').replace(']', '').split()))
    y = list(map(float, lines[i+1].strip().replace('[', '').replace(']', '').split()))
    z = list(map(float, lines[i+2].strip().replace('[', '').replace(']', '').split()))
    xx = [i*volume for i in x]
    yy = [i*volume for i in y]
    zz = [i*volume for i in z]
    return xx + yy + zz

def get_coordinates(lines, natoms, i):
    clines = lines[i:i+natoms]
    coord = np.array([list(map(float, line.strip().split()[2:5])) for line in clines]).reshape(-1)
    allatoms = [line.strip().split()[1] for line in clines]
    return coord, allatoms

def get_forces(lines, natoms, i):
    flines = lines[i:i+natoms]
    force = np.array([list(map(float, line.strip().split()[2:5])) for line in flines]).reshape(-1)
    return force

def automapping(allatoms):
    """
    Use the atom_types in the input and create type.raw
    and type_map.raw files
    """
    # Identifying unique atom types in the simulation:
    #atom_types = list(set(allatoms)); 

    # Assigning integer numbers to unique elements
    mapping = {atom: i for i, atom in enumerate(atom_types)}; 

    # Replacing elements with their mapping
    atm_mapped = [mapping[atom] for atom in allatoms]

    # Writing type_map.raw:
    with open("type.raw", 'w') as f:
        for value in atm_mapped:
            f.write(f"{value}\n")

    # Writing type.raw:
    with open("type_map.raw", 'w') as f:
        for i in atom_types:
            f.write(f"{i}\n")
    return atm_mapped

def all_convert(filename):
    """
    Collects the Force, enegy, box, virial datas from JDFTXoutput file
    """
    with open(filename) as file:
        lines = file.readlines()

        Etot, Force, Stress, Lattice, Coord = [], [], [], [], []

        for i, line in enumerate(lines):
            if "Initialized" in line:
                natoms, nspecies = get_atoms_info(line)
            if "Etot =    " in line:
                Etot.append(get_energy(line))
            if "unit cell volume =" in line:
                volume = get_volume(line)
#             if "# Stress tensor in Cartesian coordinates" in line:
#                 Stress.append(get_stress_tensor(lines, i+1, volume))
            if "Ionic positions in cartesian coordinates" in line:
                coord, allatoms = get_coordinates(lines, natoms, i+1)
                Coord.append(coord)
            if "Forces in Cartesian coordinates" in line:
                Force.append(get_forces(lines, natoms, i+1))
    
    # Shuffling the data:
    print(">Force, Energy, Coordinate converted, shuffled, and saved")
    permuted_indices = np.random.permutation(len(Etot))
    Etot = np.array(Etot)[permuted_indices]*ene_conv
    Coord = np.array(Coord)[permuted_indices]*len_conv
    Force = np.array(Force)[permuted_indices]*force_conv
#     Stress = np.array(Stress)[permuted_indices]*ene_conv
    
    # Saving the data:
    np.savetxt("energy.raw", Etot)
    np.savetxt("coord.raw", Coord)
    np.savetxt("force.raw", Force)
#     np.savetxt("virial.raw", Stress)
    
    return permuted_indices, Etot, Force, Coord, allatoms

def lattice_vector_NPT(permuted_indices, filename):
    with open(filename) as file:
        lines = file.readlines()
        Lattice= []
        for i, line in enumerate(lines):
            if "# Lattice vectors:" in line:
                Lattice.append(get_lattice_vectors(lines, i+2))

        # Shuffling the data with same indices of NPT energy:
        print(">NPT Lattice converted, shuffled and saved\n")
        Lattice = np.array(Lattice)[permuted_indices]*len_conv

        # Saving the data:
        np.savetxt("box.raw", Lattice)
        return Lattice

def lattice_vector_NVT(permuted_indices, filename):
    """
    permuted_incides = Taken from the NVT permutation
    """
    with open(filename) as file:
        lines = file.readlines()
        Lattice= []
        Etot = []  # used to calculate the length 
        
        for i, line in enumerate(lines):
            if "Etot =    " in line:
                Etot.append(get_energy(line))
                
            if "---------- Initializing tighter grid for wavefunction operations ----------" in line:
                Lattice=get_lattice_vectors(lines, i+2)

        # Shuffling the data:
        Etot = np.array(Etot)[permuted_indices]*ene_conv
        
        # Shuffling the data with same indices of NPT energy:
        print(">NVT Lattice converted, shuffled and saved\n")
        Lattice = np.array(Lattice)*len_conv
        Lattice_repeat = np.tile(Lattice, (len(Etot), 1))[permuted_indices] # repeating as Etot and permuting as well
        
        # Saving the data:
        np.savetxt("box.raw", Lattice_repeat)
        
        return Lattice_repeat

In [60]:
# NPT Conversion:
print("!!!Convering the NPT AIMD data!!!")
print('-'*50)
# file_NPT='CoSi_300k_NPT.jdftxout'

# Force, energy, virial, coord:
random_NPT, Etot_NPT, Force_NPT, Coord_NPT, allatoms_NPT = all_convert(file_NPT)
#box
Lattice_NPT = lattice_vector_NPT(random_NPT, file_NPT)
# type, type_map
atom_map_NPT = automapping(allatoms_NPT)

if not os.path.exists("NPT_raw_data"):
    os.mkdir("NPT_raw_data")
    
os.system('mv -f *.raw NPT_raw_data')

#---------------------------------------------------------------------------------------------

# NVT Conversion:
print("!!!Convering the NVT AIMD data!!!")
print('-'*50)

# file_NVT='CoSi_300k_NVT.jdftxout'

# Force, energy, virial, coord:
random_NVT, Etot_NVT, Force_NVT, Coord_NVT, allatoms_NVT = all_convert(file_NVT)

#box
Lattice_NVT = lattice_vector_NVT(random_NVT, file_NVT)
# type, type_map
atom_map_NVT = automapping(allatoms_NVT)

if not os.path.exists("NVT_raw_data"):
    os.mkdir("NVT_raw_data")
    
os.system('mv -f *.raw NVT_raw_data')

#---------------------------------------------------------------------------------------------

# Combining NPT and NVT and saving as .raw file:
print("!!!Combining and converting NPT and NVT AIMD data!!!")
print('-'*50)
Etot_all = np.hstack((Etot_NPT, Etot_NVT))
Force_all = np.vstack((Force_NPT,Force_NVT))
Coord_all = np.vstack((Coord_NPT, Coord_NVT))
Lattice_all = np.vstack((Lattice_NPT, Lattice_NVT))

# Shuffling the data
random_all = np.random.permutation(len(Etot_all))

Etot_all = Etot_all[random_all]
Force_all = Force_all[random_all]
Coord_all = Coord_all[random_all]
Lattice_all = Lattice_all[random_all]

# type, type_map
atom_map_NVT = automapping(allatoms_NVT)
# Saving the data:
np.savetxt("energy.raw", Etot_all)
np.savetxt("coord.raw", Coord_all)
np.savetxt("force.raw", Force_all)
np.savetxt("box.raw", Lattice_all)

print(">Force, Energy, Coordinate converted, shuffled, and saved")

# Cleaning up 
os.system(f'mv -f *.raw {folder}')
os.system(f'mv -f NPT_raw_data {folder}')
os.system(f'mv -f NVT_raw_data {folder}')


!!!Convering the NPT AIMD data!!!
--------------------------------------------------
>Force, Energy, Coordinate converted, shuffled, and saved
>NPT Lattice converted, shuffled and saved

!!!Convering the NVT AIMD data!!!
--------------------------------------------------
>Force, Energy, Coordinate converted, shuffled, and saved
>NVT Lattice converted, shuffled and saved

!!!Combining and converting NPT and NVT AIMD data!!!
--------------------------------------------------
>Force, Energy, Coordinate converted, shuffled, and saved


In [None]:
print("# Summary of jdftx to DeepMD\n")
print('-'*50)
print("No. of Elements   :", nspecies)
print("Elements involved :", list(set(allatoms)))
print("All Elements      :", atom_types)
# print("Element to int map:", mapping)
print("No. of atoms      :", natoms)
print("No. of frames     :", Force.shape[0])
print("force.raw         :", Force.shape)
print("coord.raw         :", Coord.shape)
print("energy.raw        :", Etot.shape)
print("box.raw           :", Lattice.shape)
# print("virial.raw        :", Stress.shape)
print('-'*50)
    
with open("conversionInfo.dat", 'w') as f:
    f.write("# Summary of jdftx to DeepMD\n")
    f.write('-'*50 + '\n')
    f.write("No. of Elements   :" + f'{nspecies}'+ '\n')
    f.write("Elements involved :" + f'{atom_types}'+ '\n')
#     f.write("Element to int map:" + f'{mapping}'+ '\n')
    f.write("No. of atoms      :" + f'{natoms}'+ '\n')
    f.write("No. of frames     :" + f'{Force.shape[0]}'+ '\n')
    f.write("force.raw         :" + f'{Force.shape}'+ '\n')
    f.write("coord.raw         :" + f'{Coord.shape}'+ '\n')
    f.write("energy.raw        :" + f'{Etot.shape}'+ '\n')
    f.write("box.raw           :" + f'{Lattice.shape}'+ '\n')
#     f.write("virial.raw        :" + f'{Stress.shape}'+ '\n')
    f.write('-'*50)