In [14]:
import re
import shlex

In [None]:
mae_filepath = "/home/shpark/prj-mlcv/lib/DESRES/DESRES-Trajectory_CLN025-0-protein/system.mae"
with open(mae_filepath, 'r') as file:
	mae_content = file.read()

# Find the atom block using regex
atom_block_match = re.search(r'm_atom\[\d+\]\s*{([^}]*)}', mae_content, re.DOTALL)

if not atom_block_match:
	raise ValueError("Atom block not found in the MAE file.")

atom_block = atom_block_match.group(1).strip()
atom_lines = atom_block.split('\n')


# Extract header fields (lines before ':::')
header_fields = []
coords_started = False
atom_data = []

for line in atom_lines:
	line = line.strip()
	if line == ':::':
		coords_started = True
		continue
	
	if "TIP3" in line:
		coords_started = False
		break

	if not coords_started:
		header_fields.extend(line.split())
	else:
		atom_data.append(line)

# Map field names to their indices
header_indices = {name: idx for idx, name in enumerate(header_fields)}

In [8]:
print(len(atom_data))
print(atom_data[:3])

166
['1 " N  " TYR P P 1 10.865962 -13.578603 -19.419243 7 1 43 1 1.6651826 -0.85347438 1.6885613', '2 " HT1" TYR P P 1 11.192533 -12.732378 -19.863485 1 1 21 1 2.0433135 -9.0181837 -13.586321', '3 " HT2" TYR P P 1 10.051212 -13.955378 -19.882196 1 1 21 1 -9.3199043 17.198019 6.3300219']


In [48]:
def infer_element(atom_name, atomic_number=None):
    """
    Infer element symbol from atom name or fallback to atomic number.
    """
    atom_name = atom_name.strip().upper()

    if atom_name.startswith("H"):
        return "H"
    elif atom_name.startswith("C"):
        return "C"
    elif atom_name.startswith("N"):
        return "N"
    elif atom_name.startswith("O"):
        return "O"
    elif atom_name.startswith("S"):
        return "S"
    elif atomic_number is not None:
        # fallback using periodic table
        periodic_table = {1: "H", 6: "C", 7: "N", 8: "O", 16: "S"}
        return periodic_table.get(atomic_number, "X")
    else:
        return "X"

pdb_lines = []
for line in atom_data:
    tokens = shlex.split(line)
    
    atom_idx = int(tokens[0])
    atom_name = tokens[1].strip().replace('"', '').strip()
    residue_name = tokens[2]
    residue_number = int(tokens[5])
    
    x = f"{float(tokens[6]):.3f}"
    y = f"{float(tokens[7]):.3f}"
    z = f"{float(tokens[8]):.3f}"
    element = infer_element(atom_name)

    # pdb_line = f"ATOM {atom_idx} {atom_name} {residue_name} {residue_number} {x} {y} {z}  1.00  0.00 {element}"
    pdb_line = "ATOM {:>6}  {:<4}{:>3} {:>5} {:>11} {:>7} {:>7}  1.00  0.00{:>12}".format(
        atom_idx,
        atom_name,
        residue_name,
        residue_number,
        x,
        y,
        z,
        element
	)
    print(pdb_line)
    pdb_lines.append(pdb_line)
pdb_lines.append("END")

ATOM      1  N   TYR     1      10.866 -13.579 -19.419  1.00  0.00           N
ATOM      2  HT1 TYR     1      11.193 -12.732 -19.863  1.00  0.00           H
ATOM      3  HT2 TYR     1      10.051 -13.955 -19.882  1.00  0.00           H
ATOM      4  HT3 TYR     1      11.638 -14.227 -19.484  1.00  0.00           H
ATOM      5  CA  TYR     1      10.585 -13.392 -18.050  1.00  0.00           C
ATOM      6  HA  TYR     1       9.961 -12.507 -17.925  1.00  0.00           H
ATOM      7  CB  TYR     1       9.690 -14.502 -17.442  1.00  0.00           C
ATOM      8  HB1 TYR     1       8.667 -14.457 -17.815  1.00  0.00           H
ATOM      9  HB2 TYR     1      10.108 -15.453 -17.771  1.00  0.00           H
ATOM     10  CG  TYR     1       9.672 -14.561 -15.960  1.00  0.00           C
ATOM     11  CD1 TYR     1      10.278 -15.544 -15.203  1.00  0.00           C
ATOM     12  HD1 TYR     1      10.641 -16.450 -15.665  1.00  0.00           H
ATOM     13  CE1 TYR     1      10.361 -15.429 -13.7

In [49]:
with open("CLN025_desres.pdb", "w") as f:
    f.write("\n".join(pdb_lines))