<a href="https://colab.research.google.com/github/saeedshahi/Portfolio/blob/main/Active_Learning_in_ML_PES%20/data_parser/outcar_parser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Mount your Google Drive.
from google.colab import drive
drive.mount("/content/drive")
%cd /content/drive/MyDrive/Projects/Project_data_parser

Mounted at /content/drive
/content/drive/MyDrive/Projects/Project_data_parser


In [None]:
base_path = Path('/content/drive/MyDrive/Projects/Project_data_parser')
for file in base_path.rglob('*'):
    if file.is_file():
        print(file)

/content/drive/MyDrive/Projects/Project_data_parser/OUTCAR
/content/drive/MyDrive/Projects/Project_data_parser/outcar_parser.ipynb
/content/drive/MyDrive/Projects/Project_data_parser/output/Energies.txt
/content/drive/MyDrive/Projects/Project_data_parser/output/Coordinates.txt
/content/drive/MyDrive/Projects/Project_data_parser/output/Forces.txt


In [None]:
import re
from pathlib import Path

# Define file_path with the current directory
file_path = Path.cwd() / 'OUTCAR'

def extract_outcar_info(file_path):
    energies = []
    coordinates_list = []
    forces_list = []
    coordinates = []
    forces = []
    is_position_force_section = False
    skip_line_count = 0  # Counter to skip lines after the "FREE ENERGIE..." line

    with open(file_path, 'r') as file:
        lines = file.readlines()

        for line in lines:
            if "FREE ENERGIE OF THE ION-ELECTRON SYSTEM (eV)" in line:
                skip_line_count = 2  # Set the counter to skip the next two lines
                continue

            if skip_line_count > 0:  # Skip the line if the counter is greater than 0
                skip_line_count -= 1
                if skip_line_count == 0:  # If it’s the second line after the "FREE ENERGIE..." line
                    free_energy_toten_match = re.search(r"free\s+energy\s+TOTEN\s*=\s*(-?\d+\.\d+)", line)
                    if free_energy_toten_match:
                        energies.append(float(free_energy_toten_match.group(1)))
                        if coordinates:
                            coordinates_list.append(coordinates)
                            coordinates = []
                        if forces:
                            forces_list.append(forces)
                            forces = []

            if "POSITION" in line and "TOTAL-FORCE (eV/Angst)" in line:
                is_position_force_section = True
                continue

            if is_position_force_section:
                if line.strip() == "":
                    is_position_force_section = False
                    continue

                parts = line.split()
                if len(parts) == 6:
                    coordinates.append([float(coord) for coord in parts[:3]])
                    forces.append([float(force) for force in parts[3:]])

        # Append the last set of coordinates and forces to the main list
        if coordinates:
            coordinates_list.append(coordinates)
        if forces:
            forces_list.append(forces)

    return energies, coordinates_list, forces_list


def write_aligned_columns(file_path, data):
    max_widths = [0] * len(data[0])  # Initialize the max widths to 0
    for row in data:
        for i, value in enumerate(row):
            max_widths[i] = max(max_widths[i], len(f"{value:.9f}"))

    with open(file_path, 'w') as file:
        for row in data:
            file.write("    ".join(f"{value:>{max_widths[i]}.9f}" for i, value in enumerate(row)))
            file.write("\n")


def main():
    energies, coordinates_list, forces_list = extract_outcar_info(file_path)

    # Define the paths for the output files with the current directory
    output_dir_path = Path.cwd() / 'output'
    output_dir_path.mkdir(exist_ok=True)  # Create output directory if it does not exist
    energy_file_path = output_dir_path / 'Energies.txt'
    coordinates_file_path = output_dir_path / 'Coordinates.txt'
    forces_file_path = output_dir_path / 'Forces.txt'

    # Write all the extracted values to the respective files
    with open(energy_file_path, 'w') as energy_file:
        for energy in energies:
            energy_file.write(f"{energy:.9f}\n")

    write_aligned_columns(coordinates_file_path, [coord for coordinates in coordinates_list for coord in coordinates])
    write_aligned_columns(forces_file_path, [force for forces in forces_list for force in forces])

if __name__ == "__main__":
    main()