In [1]:
import ovito
from ovito.io import import_file
from ovito.modifiers import *
from ovito.data import *
import glob as glob
import numpy as np
import matplotlib.pyplot as plt
import os
import shutil
import tarfile

from clusterOVITO.basic.io import *
from clusterOVITO.basic.xyz_to_FEFF import *
from clusterOVITO.ovito_tools import *  
from ovito.modifiers import ReplicateModifier


from clusterOVITO.basic.xyz_to_FEFF import *

rmeshPrime = np.arange(0.01, 6, 0.01)

In [2]:
def write_feff_dir(feff_inp, directory):
    os.makedirs(directory, exist_ok=True)
    with open(directory + "feff.inp", "w") as f:
        f.write(feff_inp)
    

def write_feff_dir_from_xyz(
    xyz, directory, absorber=0, edge="K", title="test", xmu_path=None, feff_inp_path=None, feff_template=feff_template
):
    potentials, atoms = make_potential_atoms_from_xyz(xyz, absorber=absorber)
    feff_inp = feff_template.format(
        title=title, edge=edge, potentials=potentials, atoms=atoms
    )
    write_feff_dir(feff_inp, directory)

    if xmu_path is not None:
        os.makedirs(os.path.dirname(xmu_path), exist_ok=True)
        shutil.copy(directory + "xmu.dat", xmu_path)
        
    if feff_inp_path is not None:
        os.makedirs(os.path.dirname(feff_inp_path), exist_ok=True)
        shutil.copy(directory + "feff.inp", feff_inp_path)

def create_tar_gz_of_directory(directory_path, output_filename, root_dir_name):
    """
    Create a tar.gz archive of the specified directory.

    :param directory_path: Path of the directory to be archived.
    :param output_filename: Name of the output tar.gz file.
    :param root_dir_name: Name of the root directory in the archive.
    """
    with tarfile.open(output_filename, "w:gz") as tar:
        for dirpath, dirnames, filenames in os.walk(directory_path):
            for filename in filenames:
                filepath = os.path.join(dirpath, filename)
                arcname = os.path.join(root_dir_name, os.path.relpath(filepath, directory_path))
                tar.add(filepath, arcname=arcname)

def create_slurm_scripts(dir_list, script_prefix, file_path):
    """
    Create SLURM batch scripts from a list of directories.

    :param dir_list: List of directories to process
    :param script_prefix: Prefix for the output SLURM script files
    """
    for block_num, i in enumerate(range(0, len(dir_list), 48), start=1):
        block = dir_list[i:i + 48]
        script_name = f"{script_prefix}_block{block_num}.sh"

        with open(file_path+script_name, 'w') as file:
            file.write("#!/bin/bash\n")
            file.write("#SBATCH --account=cfn310033\n")
            file.write("#SBATCH --nodes=1\n")
            file.write("#SBATCH --partition=cfn\n")
            file.write("#SBATCH --job-name=feff\n")
            file.write("#SBATCH --ntasks=48\n")  # Request 48 tasks
            file.write("#SBATCH --cpus-per-task=1\n")  # 1 CPU per task
            file.write("#SBATCH --time=01:00:00\n")  # 24 hour time limit
            file.write("#SBATCH --mail-user=nmarcella@bnl.gov\n")
            file.write("#SBATCH --mail-type=ALL\n")
            file.write("\n")

            for dir in block:
                dir_modified = dir.replace('/mnt/sdcc/', '/')
                file.write(f"srun --exclusive -n 1 --cpus-per-task=1 --mem=4G --time=00:10:00 --chdir={dir_modified} ~/feff_code/feff.sh &\n")
            file.write("wait\n")

    print(f"Created {block_num} SLURM script(s) with prefix '{script_prefix}'.")



def move_file(source_path, destination_path):
    """
    Move a file from source_path to destination_path.

    :param source_path: The path of the file to be moved
    :param destination_path: The path where the file will be moved
    """
    shutil.move(source_path, destination_path)


# make a command to run the scripts. sbatch run_block1.sh; sbatch run_bl;ock2.sh; etc
def make_command_to_run_scripts(script_prefix, start_script, end_script):
    """
    Create a command to run a series of SLURM scripts within a specified range.

    :param script_prefix: Prefix for the SLURM script files
    :param start_script: The starting script number in the range
    :param end_script: The ending script number in the range (inclusive)
    :return: A string containing the command to run the specified range of SLURM scripts
    """
    return "; ".join([f"sbatch {script_prefix}_block{i}.sh" for i in range(start_script, end_script + 1)])



def generate_sbatch_command(script_dir, output_file):
    """
    Generate a single sbatch command with semicolons to submit all script files in a specified directory.

    :param script_dir: Directory containing the SLURM script files
    :param output_file: Output file to write the sbatch command
    """
    script_files = [f for f in os.listdir(script_dir) if f.endswith('.sh')]
    with open(output_file, 'w') as file:
        sbatch_commands = '; '.join(f"sbatch {os.path.join(script_dir, script)}" for script in script_files)
        file.write(sbatch_commands + '\n')

    print(f"Generated sbatch command in '{output_file}'.")
    
    
    
def get_variance(rdf, rmeshPrime=rmeshPrime, rrange=[1,3]):
    bins,counts = np.array([i for i in np.array([rmeshPrime, rdf]).T if rrange[0]<i[0]<rrange[1]]).T
    weighted_avg = np.average(bins, weights=counts)
    variance = np.average((bins - weighted_avg)**2, weights=counts)
    return variance



def feff_to_xyz(input_file_path, output_file_path):
    # Reading the input file
    with open(input_file_path, 'r') as file:
        lines = file.readlines()

    # Initialize flags for recording POTENTIALS and ATOMS
    record_potentials = False
    record_atoms = False

    potentials = {}  # To store potential mappings
    atoms = []  # To store atom details

    for line in lines:
        # Extracting potential mappings
        if line.strip() == 'POTENTIALS':
            record_potentials = True
            continue

        if record_potentials:
            if line.strip() == 'ATOMS':
                record_potentials = False
                record_atoms = True
                continue

            parts = line.split()
            if len(parts) >= 2:
                pot = int(parts[0])
                atom_symbol = parts[2]
                potentials[pot] = atom_symbol

        # Extracting atom coordinates and types
        if record_atoms:
            if line.strip() == 'END':
                break

            parts = line.split()
            if len(parts) >= 4:
                x, y, z, pot = parts[:4]
                atom_symbol = potentials[int(pot)]
                atoms.append((atom_symbol, x, y, z))
    
    # Writing to the output XYZ file
    with open(output_file_path, 'w') as file:
        file.write(f"{len(atoms)}\n\n")  # Number of atoms and a blank comment line
        for atom in atoms:
            file.write(f"{atom[0]} {atom[1]} {atom[2]} {atom[3]}\n")  # Atom type and coordinates
                
                
def feff_to_rdf(input_file_path, partial_1, partial_2, return_distances = False):
    # Reading the input file
    with open(input_file_path, 'r') as file:
        lines = file.readlines()

    # Initialize flags for recording POTENTIALS and ATOMS
    record_potentials = False
    record_atoms = False

    potentials = {}  # To store potential mappings
    atoms = []  # To store atom details
    xyz = [] # To store xyz coordinates

    for line in lines:
        # Extracting potential mappings
        if line.strip() == 'POTENTIALS':
            record_potentials = True
            continue

        if record_potentials:
            if line.strip() == 'ATOMS':
                record_potentials = False
                record_atoms = True
                continue

            parts = line.split()
            if len(parts) >= 2:
                pot = int(parts[0])
                atom_symbol = parts[2]
                potentials[pot] = atom_symbol

        # Extracting atom coordinates and types
        if record_atoms:
            if line.strip() == 'END':
                break

            parts = line.split()
            if len(parts) >= 4:
                x, y, z, pot = parts[:4]
                atom_symbol = potentials[int(pot)]
                atoms.append((atom_symbol, x, y, z))
                xyz.append((x, y, z))

In [3]:
traj_dir = '/mnt/a/MD_Trajectories/PtCO/NPs/'
test_path = traj_dir+"Pt309_cuboct_148K.all.bin"
pipeline = import_file(test_path)

In [None]:
file_params = { "edge": "K",
                "feff_template": feff_template_exafs_ptCO,
                "proj_name": "Pt309_cuboct_298K",
                "working_dir": working_dir,
                "cluster_dir": cluster_dir
                }

In [4]:
class MD_EXAFS_Binary_Pipeline:
    def __init__(self, pipeline, framerange, file_params):
        self.pipeline = pipeline
        self.framerange = framerange
        self.file_params = file_params
        #self.proj_name = file_params["proj_name"]
        #self.working_dir = file_params["working_dir"]
        #self.cluster_dir = file_params["cluster_dir"]
        #self.proj_dir = self.working_dir + self.proj_name + '/'
        self.frame = 0
        self.interesting_c = 0
        self.interesting_pt = 0
        self.all_pt = 0
        self.interesting_o = 0
        self.b_line_table = []

    def run(self):
        for frame in self.framerange:
            self.frame = frame
            data = self.pipeline.compute(frame)
            finder = CutoffNeighborFinder(3, data)

            ptatom_index, catom_index, oatom_index = self.get_atom_indices(data)
            all_positions = data.particles.positions[...]

            interesting_c, interesting_pt, interesting_o = self.find_interesting_atoms(
                catom_index, ptatom_index, oatom_index, finder
            )

            self.interesting_c = all_positions[interesting_c]
            self.interesting_pt = all_positions[interesting_pt]
            self.interesting_o = all_positions[interesting_o]
            self.all_pt = all_positions[ptatom_index]

            self.add_sep_line_start()
            self.make_files()


    def get_atom_indices(self, data):
        ptatom_index = np.where(data.particles['Particle Type'].array == 1)[0]
        catom_index = np.where(data.particles['Particle Type'].array == 2)[0]
        oatom_index = np.where(data.particles['Particle Type'].array == 3)[0]
        return ptatom_index, catom_index, oatom_index

    def find_interesting_atoms(self, catom_index, ptatom_index, oatom_index, finder):
        interesting_c, interesting_pt, interesting_o = [], [], []
        for i in catom_index:
            neigh = [neigh.index for neigh in finder.find(i)]
            if len(neigh) > 0:
                n_count = 0
                for l in neigh:
                    if l in ptatom_index:
                        n_count += 1
                        interesting_pt.append(l)
                if n_count > 0:
                    interesting_c.append(i)

        interesting_c = np.unique(np.array(interesting_c))
        interesting_pt = np.unique(np.array(interesting_pt))

        for i in interesting_c:
            neigh = [neigh.index for neigh in finder.find(i)]
            if len(neigh) > 0:
                for l in neigh:
                    if l in oatom_index:
                        interesting_o.append(l)

        interesting_o = np.unique(np.array(interesting_o))

        return interesting_c, interesting_pt, interesting_o
    
    def make_files(self):
        n_c = self.interesting_c.shape[0]
        n_pt = self.all_pt.shape[0]
        n_o = self.interesting_o.shape[0]

        n_pt_pot = 1
        n_c_pot = 2
        n_o_pot = 3

        n_pt_pots = np.full(n_pt, 1).reshape(-1, 1)
        pt_lattice = np.concatenate((self.all_pt, n_pt_pots), axis=1)
        
        n_c_pots = np.full(n_c, 2).reshape(-1, 1)
        c_lattice = np.concatenate((self.interesting_c, n_c_pots), axis=1)

        n_o_pots = np.full(n_o, 3).reshape(-1, 1)
        o_lattice = np.concatenate((self.interesting_o, n_o_pots), axis=1)
        

        b_line = np.concatenate((pt_lattice, c_lattice, o_lattice), axis=0)
        b_line = np.around(b_line, decimals=4)

        
        n_absorbers = len(self.all_pt)
        b_lines = []
        for i in range(n_absorbers):
            # change index 3 in b_line[i] to 0
            b_line0 = np.copy(b_line)
            b_line0[i,3] = 0
            b_lines.append(b_line0)
        
        b_lines = np.array(b_lines)
        b_line = b_lines.reshape(-1)
        self.b_line = b_line
        self.b_line_table.append(b_line)

    def add_sep_line_start(self):
        self.b_line_table.append(np.array([0,0,0,0]))

    def add_sep_line_end(self):
        self.b_line_table.append(np.array([1,1,1,1]))


        



In [5]:
test_pl = MD_EXAFS_Binary_Pipeline(pipeline, range(15999,18000), file_params={})

In [6]:
test_pl.run()

In [7]:
test_pl.b_line_table

[array([0, 0, 0, 0]),
 array([10.0295, 12.0281, 16.023 , ..., 10.7262, 15.0737,  3.    ]),
 array([0, 0, 0, 0]),
 array([10.1346, 12.0471, 15.9808, ..., 10.9569, 15.3571,  3.    ]),
 array([0, 0, 0, 0]),
 array([10.1971, 11.9319, 15.8849, ..., 11.0483, 15.758 ,  3.    ]),
 array([0, 0, 0, 0]),
 array([ 9.979 , 11.9967, 15.9793, ..., 10.9592, 15.9692,  3.    ]),
 array([0, 0, 0, 0]),
 array([ 9.8568, 12.2263, 16.1895, ..., 11.1057, 15.9164,  3.    ]),
 array([0, 0, 0, 0]),
 array([ 9.8245, 12.216 , 16.2132, ..., 11.258 , 15.7941,  3.    ]),
 array([0, 0, 0, 0]),
 array([ 9.9479, 12.145 , 16.1825, ..., 11.5621, 15.6778,  3.    ]),
 array([0, 0, 0, 0]),
 array([10.0865, 12.1186, 16.1017, ..., 11.6532, 15.488 ,  3.    ]),
 array([0, 0, 0, 0]),
 array([10.1255, 11.9945, 15.9939, ..., 11.304 , 15.1943,  3.    ]),
 array([0, 0, 0, 0]),
 array([10.0026, 12.0575, 15.859 , ..., 10.91  , 14.8762,  3.    ]),
 array([0, 0, 0, 0]),
 array([ 9.8885, 12.1252, 15.7921, ..., 10.3506, 14.7429,  3.    ]),

In [8]:
data = test_pl.b_line_table



In [24]:
import struct
import numpy as np
import os

dir_name = "/mnt/a/MD_Trajectories/PtCO/NPs/Inputs/Pt309_cuboct_148K/"
file_name = "Pt309_cuboct_148K.bin"
with open(dir_name + file_name, "wb") as file:
    # Iterate over each line in the data array
    for line in data:
        # Process each atom in the line (every 4 indices)
        for i in range(0, len(line), 4):
            # Extract x, y, z, and p for the atom
            # Ensure that 'p' is cast to an integer if it's not already
            x, y, z, p = line[i], line[i+1], line[i+2], int(line[i+3])

            # Pack the data into a binary format and write to file
            binary_data = struct.pack("fffI", x, y, z, p)
            file.write(binary_data)

# run tar -zcf Pt309_cuboct_148K.tar.gz Pt309_cuboct_148K.bin in the cli
os.system("cd " + dir_name)
os.system("tar -zcf "+file_name+".tar.gz "+ file_name)
os.system("rm "+ file_name+".bin")

tar: Pt309_cuboct_148K.bin.bin: Cannot stat: No such file or directory
tar: Exiting with failure status due to previous errors
rm: cannot remove 'Pt309_cuboct_148K.bin.bin': No such file or directory


256

In [25]:
os.system("cd " + dir_name)
os.system("tar -zcf "+file_name+".tar.gz "+ file_name)
os.system("rm "+ file_name+".bin")

tar: Pt309_cuboct_148K.bin: Cannot stat: No such file or directory
tar: Exiting with failure status due to previous errors
rm: cannot remove 'Pt309_cuboct_148K.bin.bin': No such file or directory


256

In [28]:
os.system("cd " + dir_name)
os.system("pwd")

/home/nickj/Projects/clusterOVITO/code


0

# Export to TAR

In [9]:
import struct
import io
import tarfile

dir_name = "/mnt/a/MD_Trajectories/PtCO/NPs/Inputs/Pt309_cuboct_148K/"
file_name = "Pt309_cuboct_148K_sep.bin"
tar_name = file_name + ".tar.gz"

# Create a BytesIO object to accumulate the binary data
binary_buffer = io.BytesIO()

# Iterate over each line in the data array
for line in data:
    for i in range(0, len(line), 4):
        # Extract x, y, z, and p for the atom
        # Ensure that 'p' is cast to an integer if it's not already
        x, y, z, p = line[i], line[i+1], line[i+2], int(line[i+3])

        # Pack the data into a binary format
        binary_data = struct.pack("fffI", x, y, z, p)

        # Write to the BytesIO object
        binary_buffer.write(binary_data)

# Rewind the buffer to the beginning
binary_buffer.seek(0)

# Create a .tar.gz file and add the BytesIO object
tar_file_name = dir_name + tar_name
with tarfile.open(tar_file_name, "w:gz") as tar:
    # Create a TarInfo object for the BytesIO object
    info = tarfile.TarInfo(name=file_name)
    info.size = len(binary_buffer.getbuffer())
    
    # Add the BytesIO object to the tar archive
    tar.addfile(tarinfo=info, fileobj=binary_buffer)


In [1]:
import tarfile
import struct
import io
import numpy as np

tar_file_name = '/mnt/a/MD_Trajectories/PtCO/NPs/Inputs/Pt309_cuboct_148K/Pt309_cuboct_148K_sep.tar.gz'
file_name = "Pt309_cuboct_148K_sep.bin"

# Open the .tar.gz file
with tarfile.open(tar_file_name, 'r:gz') as tar:
    # Extract the specific file
    extracted_file = tar.extractfile(file_name)

    if extracted_file is not None:
        # Read the binary data from the extracted file
        binary_buffer = extracted_file.read()

# Calculate the number of records (assuming you know the size of each record)
record_size = struct.calcsize("fffI")  # Size of each record
num_records = len(binary_buffer) // record_size

# Preallocate a NumPy array
data_array = np.empty((num_records, 4), dtype=np.float32)  # Adjust dtype as needed

# Read the data into the NumPy array
bytes_buffer = io.BytesIO(binary_buffer)
for i in range(num_records):
    record = bytes_buffer.read(record_size)
    data_array[i] = struct.unpack("fffI", record)

# Now data_array contains all your data


In [4]:
data_array.shape

(248936727, 4)

In [6]:
import numpy as np

def compute_separator_indices(data_array):
    separator = np.array([0, 0, 0, 0])
    return np.where(np.all(data_array == separator, axis=1))[0]

# Usage
data_index_array = compute_separator_indices(data_array)


In [14]:
def get_data(data_array, data_index_array, frame_number):
    if frame_number == 0:
        start_idx = 0
    else:
        if frame_number - 1 < len(data_index_array):
            start_idx = data_index_array[frame_number - 1] + 1
        else:
            return None  # Frame number out of range

    if frame_number < len(data_index_array):
        end_idx = data_index_array[frame_number]
        return data_array[start_idx:end_idx]
    else:
        return data_array[start_idx:]  # For the last frame

# Usage
frame_data = get_data(data_array, data_index_array, 3)  # Get data for frame 0


In [15]:
frame_data

array([[10.1971, 11.9319, 15.8849,  0.    ],
       [12.1255, 10.0489, 15.857 ,  1.    ],
       [12.157 , 11.8739, 14.0017,  1.    ],
       ...,
       [ 7.3573, 17.8753, 27.1071,  3.    ],
       [ 8.0414, 17.1239,  8.4473,  3.    ],
       [ 7.3576, 11.0483, 15.758 ,  3.    ]], dtype=float32)

# New pipeline

In [37]:
class MD_EXAFS_Binary_Pipeline:
    def __init__(self, pipeline, framerange, file_params):
        self.pipeline = pipeline
        self.framerange = framerange
        self.file_params = file_params
        #self.proj_name = file_params["proj_name"]
        #self.working_dir = file_params["working_dir"]
        #self.cluster_dir = file_params["cluster_dir"]
        #self.proj_dir = self.working_dir + self.proj_name + '/'
        self.frame = 0
        self.interesting_c = 0
        self.interesting_pt = 0
        self.all_pt = 0
        self.interesting_o = 0
        self.b_line_table = []

    def run(self):
        for frame in self.framerange:
            self.frame = frame
            data = self.pipeline.compute(frame)
            finder = CutoffNeighborFinder(3, data)

            ptatom_index, catom_index, oatom_index = self.get_atom_indices(data)
            all_positions = data.particles.positions[...]

            interesting_c, interesting_pt, interesting_o = self.find_interesting_atoms(
                catom_index, ptatom_index, oatom_index, finder
            )

            self.interesting_c = all_positions[interesting_c]
            self.interesting_pt = all_positions[interesting_pt]
            self.interesting_o = all_positions[interesting_o]
            self.all_pt = all_positions[ptatom_index]

            self.add_sep_line_start()
            self.make_files()
        self.finish()


    def get_atom_indices(self, data):
        ptatom_index = np.where(data.particles['Particle Type'].array == 1)[0]
        catom_index = np.where(data.particles['Particle Type'].array == 2)[0]
        oatom_index = np.where(data.particles['Particle Type'].array == 3)[0]
        return ptatom_index, catom_index, oatom_index

    def find_interesting_atoms(self, catom_index, ptatom_index, oatom_index, finder):
        interesting_c, interesting_pt, interesting_o = [], [], []
        for i in catom_index:
            neigh = [neigh.index for neigh in finder.find(i)]
            if len(neigh) > 0:
                n_count = 0
                for l in neigh:
                    if l in ptatom_index:
                        n_count += 1
                        interesting_pt.append(l)
                if n_count > 0:
                    interesting_c.append(i)

        interesting_c = np.unique(np.array(interesting_c))
        interesting_pt = np.unique(np.array(interesting_pt))

        for i in interesting_c:
            neigh = [neigh.index for neigh in finder.find(i)]
            if len(neigh) > 0:
                for l in neigh:
                    if l in oatom_index:
                        interesting_o.append(l)

        interesting_o = np.unique(np.array(interesting_o))

        return interesting_c, interesting_pt, interesting_o
    
    def make_files(self):
        n_c = self.interesting_c.shape[0]
        n_pt = self.all_pt.shape[0]
        n_o = self.interesting_o.shape[0]

        n_pt_pot = 1
        n_c_pot = 2
        n_o_pot = 3

        n_pt_pots = np.full(n_pt, 1).reshape(-1, 1)
        pt_lattice = np.concatenate((self.all_pt, n_pt_pots), axis=1)
        
        n_c_pots = np.full(n_c, 2).reshape(-1, 1)
        c_lattice = np.concatenate((self.interesting_c, n_c_pots), axis=1)

        n_o_pots = np.full(n_o, 3).reshape(-1, 1)
        o_lattice = np.concatenate((self.interesting_o, n_o_pots), axis=1)
        

        b_line = np.concatenate((pt_lattice, c_lattice, o_lattice), axis=0)
        b_line = np.around(b_line, decimals=4)
        self.b_line_table.append(b_line)


    def add_sep_line_start(self):
        self.b_line_table.append(np.array([[0,0,0,0]]))

    def add_sep_line_end(self):
        self.b_line_table.append(np.array([1,1,1,1]))

    def finish(self):
        self.data = np.concatenate(self.b_line_table, axis=0)

        

        



In [38]:
test_pl = MD_EXAFS_Binary_Pipeline(pipeline, range(15999,18000), file_params={})

In [39]:
test_pl.run()

In [44]:
test_pl.data.shape

(807615, 4)

In [42]:
###
# Make input table and export
###

import struct
import io
import tarfile

###########################################

traj_dir = '/mnt/a/MD_Trajectories/PtCO/NPs/'
traj_name = "Pt309_cuboct_148K.all.bin"

p_name = "Pt309_cuboct_148K"
#input_dir = "/mnt/a/MD_Trajectories/PtCO/NPs/Inputs/"
input_dir = "/mnt/sdcc/sdcc+u/nmarcella/MD_EXAFS_inputs/231228/"


###########################################
traj_path = traj_dir + traj_name

dir_name = input_dir + p_name + "/"

# if it doesn't exist, make it
os.makedirs(dir_name, exist_ok=True)

file_name = p_name + ".bin"
tar_name = file_name + ".tar.gz"


pipeline = import_file(traj_path)

test_pl = MD_EXAFS_Binary_Pipeline(pipeline, range(15999,18000), file_params={})
test_pl.run()

data = test_pl.data

###########################################

# Create a BytesIO object to accumulate the binary data
binary_buffer = io.BytesIO()

# Assuming 'data' is a 2D array where each row is [x, y, z, p]
for line in data:
    # Extract x, y, z, and p from the line
    x, y, z, p = line[0], line[1], line[2], int(line[3])

    # Pack the data into a binary format and write to the BytesIO object
    binary_data = struct.pack("fffI", x, y, z, p)
    binary_buffer.write(binary_data)

# Rewind the buffer to the beginning
binary_buffer.seek(0)

# Create a .tar.gz file and add the BytesIO object
tar_file_name = dir_name + tar_name
with tarfile.open(tar_file_name, "w:gz") as tar:
    # Create a TarInfo object for the BytesIO object
    info = tarfile.TarInfo(name=file_name)
    info.size = len(binary_buffer.getbuffer())
    
    # Add the BytesIO object to the tar archive
    tar.addfile(tarinfo=info, fileobj=binary_buffer)


In [23]:
###
# Import
###

import tarfile
import struct
import io
import numpy as np

dir_name = "/mnt/a/MD_Trajectories/PtCO/NPs/Inputs/Pt309_cuboct_148K/"
tar_name = "Pt309_cuboct_148K_sep.bin.tar.gz"
file_name = "Pt309_cuboct_148K_sep.bin"

# Open the .tar.gz file
with tarfile.open(dir_name + tar_name, 'r:gz') as tar:
    # Extract the specific binary file
    extracted_file = tar.extractfile(file_name)

    if extracted_file is not None:
        # Read the binary data from the extracted file
        binary_buffer = extracted_file.read()

# Convert the binary data back to a NumPy array
record_size = struct.calcsize("fffI")  # Size of each record (3 floats and 1 integer)
num_records = len(binary_buffer) // record_size  # Calculate the number of records

# Create an empty NumPy array to hold the data
data_array = np.empty((num_records, 4), dtype=np.float32)  # Adjust dtype if necessary

# Unpack the data and fill the array
bytes_buffer = io.BytesIO(binary_buffer)
for i in range(num_records):
    record = bytes_buffer.read(record_size)
    x, y, z, p = struct.unpack("fffI", record)
    data_array[i] = [x, y, z, p]

# data_array now contains your unpacked data


In [34]:
import numpy as np

def index_and_separate_data(data_array):
    # Define the separator
    separator = np.array([0, 0, 0, 0], dtype=np.float32)
    
    # Dictionary to hold the separated data
    separated_data = {}
    start_idx = 0
    index = 0

    # Iterate over the array to find separators and split the data
    for i, row in enumerate(data_array):
        if np.array_equal(row, separator):
            # Add the chunk of data to the dictionary
            if i != start_idx:  # Ignore empty chunks
                separated_data[index] = data_array[start_idx:i]
                index += 1
            start_idx = i + 1
    
    # Handle the last chunk of data
    if start_idx < len(data_array):
        separated_data[index] = data_array[start_idx:]

    return separated_data

# Usage
separated_dict = index_and_separate_data(data_array)

# Access data by index, for example:
print(separated_dict[0])  # First chunk of data


[[10.0295 12.0281 16.023   1.    ]
 [12.2988  9.9736 15.9369  1.    ]
 [11.9282 11.8842 13.9179  1.    ]
 ...
 [ 7.5571 17.8051 27.5405  3.    ]
 [ 8.9038 17.9175  7.5048  3.    ]
 [ 7.472  10.7262 15.0737  3.    ]]


In [35]:
len(separated_dict)

2001

In [36]:
separated_dict[0]

array([[10.0295, 12.0281, 16.023 ,  1.    ],
       [12.2988,  9.9736, 15.9369,  1.    ],
       [11.9282, 11.8842, 13.9179,  1.    ],
       ...,
       [ 7.5571, 17.8051, 27.5405,  3.    ],
       [ 8.9038, 17.9175,  7.5048,  3.    ],
       [ 7.472 , 10.7262, 15.0737,  3.    ]], dtype=float32)

In [43]:
feff_template_exafs

'TITLE\t{title}\nEDGE\t{edge}\nS02\t1\nEXAFS 20.0\nRPATH 6\nNLEGS 8\nCRITERIA 0.0 0.0\nEXCHANGE\t0\t0\t0\nSCF\t6.0\t0\t30\t0.1\t1\nPOTENTIALS\n{potentials}\nATOMS\n{atoms}\nEND'