In [None]:
import os
import re

In [None]:
cwd = os.getcwd()
xyz_dir = f"{cwd}/../XYZ"
os.chdir(f"{xyz_dir}")

input_file = "VargasCaamal.txt"
dimer_name = "ethanol_dimer"

In [None]:
def parse_coordinates(filename):
    structures = []
    with open(filename, 'r') as file:
        lines = file.readlines()
    
    structure = []
    current_id = None
    
    for line in lines:
        match = re.match(r'^(\d+)\n', line)
        if match:
            if structure:
                structures.append((current_id, structure))
                structure = []
            current_id = match.group(1)
        elif re.match(r'^[A-Z]', line):
            structure.append(line.strip())
    
    if structure:
        structures.append((current_id, structure))
    
    return structures

def convert_to_format(structures, dimer_name):
    formatted_data = []
    formatted_data_A = []
    formatted_data_B = []
    
    swap_mapping = {7: 5, 9: 6, 8: 7, 6: 8, 5: 9, 16: 14, 18: 15, 17: 16, 15: 17, 14: 18}
    
    for struct_id, structure in structures:
        formatted_data.append(f"    {len(structure)} {dimer_name}_{struct_id}")
        formatted_data_A.append(f"    {len(structure)//2} {dimer_name}_{struct_id}")
        formatted_data_B.append(f"    {len(structure)//2} {dimer_name}_{struct_id}")
        
        connectivity_numbers = [
            (49, [2, 5, 6, 7]),
            (47, [1, 3, 8, 9]),
            (45, [2, 4]),
            (46, [3]),
            (50, [1]),
            (50, [1]),
            (50, [1]),
            (48, [2]),
            (48, [2]),
            (49, [11, 14, 15, 16]),
            (47, [10, 12, 17, 18]),
            (45, [11, 13]),
            (46, [12]),
            (50, [10]),
            (50, [10]),
            (50, [10]),
            (48, [11]),
            (48, [11])
        ]
        
        coordinates = [line.split()[1:4] for line in structure]
        swapped_coordinates = coordinates[:]
        for old, new in swap_mapping.items():
            swapped_coordinates[new - 1] = coordinates[old - 1]
        
        for i, (line, (connectivity, connected_atoms)) in enumerate(zip(structure, connectivity_numbers)):
            parts = line.split()
            atom = parts[0]
            x, y, z = swapped_coordinates[i]
            connectivity_str = ' '.join(f"{num:>5}" for num in connected_atoms)
            formatted_line = f" {i + 1:>5}  {atom:<3}{float(x):>12.6f}{float(y):>12.6f}{float(z):>12.6f}    {connectivity} {connectivity_str}"
            formatted_data.append(formatted_line)
            
            if i < len(structure)//2:
                formatted_data_A.append(formatted_line)
            else:
                adjusted_connectivity = [num - 9 if num > 9 else num for num in connected_atoms]
                connectivity_str_B = ' '.join(f"{num:>5}" for num in adjusted_connectivity)
                formatted_line_B = f" {i - 8:>5}  {atom:<3}{float(x):>12.6f}{float(y):>12.6f}{float(z):>12.6f}    {connectivity} {connectivity_str_B}"
                formatted_data_B.append(formatted_line_B)
    
    return '\n'.join(formatted_data), '\n'.join(formatted_data_A), '\n'.join(formatted_data_B)

In [None]:
structures = parse_coordinates(input_file)
formatted_output, formatted_output_A, formatted_output_B = convert_to_format(structures, dimer_name)

output_file = f"{dimer_name}.arc"
output_file_A = f"{dimer_name}_A.arc"
output_file_B = f"{dimer_name}_B.arc"

with open(output_file, 'w') as f:
    f.write(formatted_output)
with open(output_file_A, 'w') as f:
    f.write(formatted_output_A)
with open(output_file_B, 'w') as f:
    f.write(formatted_output_B)

print(f"Formatted data saved to {output_file}")
print(f"Formatted data saved to {output_file_A}")
print(f"Formatted data saved to {output_file_B}")