In [1]:
import numpy as np
import pandas as pd

In [2]:
df_coords = pd.read_parquet('atom_coordinates-001.parquet')

In [3]:
df_coords.head()

Unnamed: 0_level_0,atomic numbers,atom count,heavy atom count,geometry_x,geometry_y,geometry_z
cid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,"[6, 6, 8, 8, 6, 6, 6, 8, 8, 6, 7, 6, 6, 6, 1, ...",31,14,"[9.0222826408, 8.6952767673, 8.5944177792, 8.5...","[4.8372353216, 3.4064427818, 2.982379015, 2.64...","[-3.0906412175, -3.4441728955, -4.5846751688, ..."
2,"[6, 6, 8, 8, 6, 6, 6, 8, 8, 6, 7, 6, 6, 6, 1, ...",32,14,"[0.8534969865, 2.0554171317, 2.1532149537, 3.0...","[3.6892102868, 2.874638067, 2.1476716139, 3.05...","[-0.1689540401, 0.215227915, 1.1811928829, -0...."
3,"[6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 1, 1, 1, 1, ...",19,11,"[1.1846999742, 1.4523086164, 0.3783964324, -1....","[0.8792580075, -0.4392446067, -1.4636017705, -...","[0.1084402265, 0.1134616892, -0.14140948, 0.12..."
4,"[6, 6, 6, 7, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1]",14,5,"[0.8709266695, 2.392262514, 2.8675340407, 4.32...","[-0.0592588282, -0.0939471141, -0.1495982066, ...","[-0.0932100126, 0.0365717632, 1.4905144329, 1...."
5,"[6, 6, 8, 6, 8, 15, 8, 8, 8, 7, 1, 1, 1, 1, 1,...",18,10,"[1.7220056617, 1.407568102, 2.3037718629, -0.0...","[-0.7257250542, -1.3783715531, -1.6157526, -1....","[0.4170901951, -0.9230877039, -1.7183004719, -..."


In [4]:
df = df_coords.head()

In [5]:
# Convertendo o index (cid) em coluna
df.reset_index(inplace=True)

In [7]:
# Tabela de conversão de número atômico para símbolo do átomo
atomic_symbol_table = {
    1: 'H',
    5: 'B',
    6: 'C',
    7: 'N',
    8: 'O',
    9: 'F',
    16: 'S',
    17: 'Cl',
    35: 'Br'
}

def create_xyz_file(row):
    cid = row['cid']
    filename = f"coordinates_cid_{cid}.xyz"

    coordinates = np.column_stack((
        np.fromstring(row['geometry_x'][1:-1], sep=','),
        np.fromstring(row['geometry_y'][1:-1], sep=','),
        np.fromstring(row['geometry_z'][1:-1], sep=',')
    )).astype(float)

    atomic_numbers_str = row['atomic numbers']
    atomic_numbers_str = atomic_numbers_str.strip('[]')  # Remove os colchetes

    atomic_numbers = [int(num) for num in atomic_numbers_str.split(',')]

    with open(filename, 'w') as file:
        file.write(f"{len(coordinates)}\n")
        file.write(f"Coordinates for CID: {cid}\n")

        for coord, atomic_num in zip(coordinates, atomic_numbers):
            atom = atomic_symbol_table.get(atomic_num, 'X')
            file.write(f"{atom} {coord[0]:.4f} {coord[1]:.4f} {coord[2]:.4f}\n")
    
    print(f"Arquivo XYZ criado: {filename}")

# Iterando pelas linhas do DataFrame e chamando a função create_xyz_file individualmente
for _, row in df.iterrows():
    create_xyz_file(row)


Arquivo XYZ criado: coordinates_cid_1.xyz
Arquivo XYZ criado: coordinates_cid_2.xyz
Arquivo XYZ criado: coordinates_cid_3.xyz
Arquivo XYZ criado: coordinates_cid_4.xyz
Arquivo XYZ criado: coordinates_cid_5.xyz
