# DFT2ML Formatter: End-to-End Demo

This notebook demonstrates how to use the `dft2ml_formatter` package to extract structured, ML-ready data from output files of various DFT packages, including:
- VASP
- Gaussian
- ORCA
- CP2K

In [None]:
# Import available parsers
from dft2ml_formatter import parse_outcar, parse_doscar
from dft2ml_formatter.gaussian_parser import parse_gaussian_log
from dft2ml_formatter.orca_parser import parse_orca_output
from dft2ml_formatter.cp2k_parser import parse_cp2k_log

import json

## VASP Example: Extract from OUTCAR and DOSCAR

In [None]:
# Set file paths
outcar_path = "../examples/vasp/OUTCAR"
doscar_path = "../examples/vasp/DOSCAR"

# Parse OUTCAR
outcar_data = parse_outcar(outcar_path)

# Parse DOSCAR and combine
doscar_data = parse_doscar(doscar_path)

# Combine and preview
vasp_result = {**outcar_data, **doscar_data}
print(json.dumps(vasp_result, indent=2))

## Gaussian Example

In [None]:
gaussian_result = parse_gaussian_log("../examples/gaussian/G16_log.log")
print(json.dumps(gaussian_result, indent=2))

## ORCA Example

In [None]:
orca_result = parse_orca_output("../examples/orca/orca_output.out")
print(json.dumps(orca_result, indent=2))

## CP2K Example

In [None]:
cp2k_result = parse_cp2k_log("../examples/cp2k/cp2k_output.log")
print(json.dumps(cp2k_result, indent=2))