In [1]:
"""
Bulk NEB Calculation Script

Generate multiple NEB calculations with customizable parameters.
Useful for generating large datasets for training.
"""

from config import Config
from oracle import Oracle
import numpy as np
from datetime import datetime


# ============================================================================
# BULK CALCULATION PARAMETERS - ADJUST HERE
# ============================================================================

# Number of samples to generate
N_SAMPLES = 10000

# Random seed for reproducibility
SEED = 42

# Elements to use (override config if needed, or set to None to use config)
ELEMENTS = None  # e.g., ['Mo', 'Nb', 'Ta', 'W'] or None

# Number of runs per composition
RUNS_PER_COMPOSITION = 1

# ============================================================================


def sample_random_compositions(n_samples, elements, seed=None):
    """
    Sample random compositions using Dirichlet distribution.
    
    The Dirichlet distribution ensures compositions sum to 1.
    
    Parameters:
    -----------
    n_samples : int
        Number of compositions to generate
    elements : list of str
        Element symbols
    seed : int, optional
        Random seed for reproducibility
    
    Returns:
    --------
    compositions : list of dict
        Each dict maps element symbol to concentration
    """
    if seed is not None:
        np.random.seed(seed)
    
    n_elements = len(elements)
    compositions = []
    
    for _ in range(n_samples):
        # Sample from Dirichlet distribution (uniform over simplex)
        fractions = np.random.dirichlet([1.0] * n_elements)
        
        comp = {elem: float(frac) for elem, frac in zip(elements, fractions)}
        compositions.append(comp)
    
    return compositions


def print_bulk_config(config):
    """Print bulk calculation configuration"""
    print("\n" + "="*70)
    print("BULK CALCULATION CONFIGURATION")
    print("="*70)
    
    elements = ELEMENTS if ELEMENTS else config.elements
    
    print("\nBulk Parameters:")
    print(f"  Number of samples:        {N_SAMPLES}")
    print(f"  Runs per composition:     {RUNS_PER_COMPOSITION}")
    print(f"  Total calculations:       {N_SAMPLES * RUNS_PER_COMPOSITION}")
    print(f"  Random seed:              {SEED}")
    print(f"  Elements:                 {', '.join(elements)}")
    
    print("\nOracle Configuration:")
    print(f"  Calculator:               CHGNet")
    print(f"  Supercell size:           {config.supercell_size}x{config.supercell_size}x{config.supercell_size}")
    print(f"  Lattice parameter:        {config.lattice_parameter} Å")
    print(f"  NEB images:               {config.neb_images}")
    print(f"  NEB max steps:            {config.neb_max_steps}")
    print(f"  NEB fmax:                 {config.neb_fmax} eV/Å")
    print(f"  Relax max steps:          {config.relax_max_steps}")
    print(f"  Relax fmax:               {config.relax_fmax} eV/Å")
    
    print("\nData Storage:")
    print(f"  Database directory:       {config.database_dir}")
    print(f"  CSV path:                 {config.csv_path}")
    
    print("="*70)


def run_bulk_calculation():
    """Run bulk NEB calculations"""
    
    # Load config
    config = Config()
    
    # Determine elements
    elements = ELEMENTS if ELEMENTS else config.elements
    
    # Print configuration
    print_bulk_config(config)
    
    print("\nStarting bulk calculation...")
    print(f"Start time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
    
    input("Press ENTER to start (or Ctrl+C to cancel)...")
    
    # Sample compositions
    print(f"\nSampling {N_SAMPLES} random compositions...")
    compositions = sample_random_compositions(
        n_samples=N_SAMPLES,
        elements=elements,
        seed=SEED
    )
    print(f"✓ Sampled {len(compositions)} compositions")
    
    # Show first few compositions
    print("\nFirst 5 compositions:")
    for i, comp in enumerate(compositions[:5]):
        comp_str = ", ".join([f"{k}={v:.2f}" for k, v in comp.items()])
        print(f"  {i+1}. {comp_str}")
    if len(compositions) > 5:
        print(f"  ... and {len(compositions) - 5} more")
    
    # Run calculations
    print(f"\n{'='*70}")
    print("STARTING CALCULATIONS")
    print(f"{'='*70}\n")
    
    total_calculations = len(compositions) * RUNS_PER_COMPOSITION
    completed = 0
    failed = 0
    
    with Oracle(config) as oracle:
        for i, comp in enumerate(compositions, 1):
            print(f"\nComposition {i}/{len(compositions)}:")
            
            for run in range(RUNS_PER_COMPOSITION):
                if RUNS_PER_COMPOSITION > 1:
                    print(f"  Run {run+1}/{RUNS_PER_COMPOSITION}:")
                
                success = oracle.calculate(comp)
                
                if success:
                    completed += 1
                else:
                    failed += 1
                
                # Progress update
                progress = (completed + failed) / total_calculations * 100
                print(f"  Progress: {completed + failed}/{total_calculations} ({progress:.1f}%) | "
                      f"Completed: {completed} | Failed: {failed}")
    
    # Final summary
    print(f"\n{'='*70}")
    print("BULK CALCULATION COMPLETE")
    print(f"{'='*70}")
    print(f"\nEnd time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"\nResults:")
    print(f"  Total calculations:  {total_calculations}")
    print(f"  Completed:           {completed} ({completed/total_calculations*100:.1f}%)")
    print(f"  Failed:              {failed} ({failed/total_calculations*100:.1f}%)")
    print(f"\nData saved to:")
    print(f"  Database: {config.database_dir}")
    print(f"  CSV:      {config.csv_path}")
    print(f"\n{'='*70}\n")


if __name__ == "__main__":
    run_bulk_calculation()


BULK CALCULATION CONFIGURATION

Bulk Parameters:
  Number of samples:        10000
  Runs per composition:     1
  Total calculations:       10000
  Random seed:              42
  Elements:                 Mo, Nb, Ta, W, Cr

Oracle Configuration:
  Calculator:               CHGNet
  Supercell size:           4x4x4
  Lattice parameter:        3.2 Å
  NEB images:               3
  NEB max steps:            500
  NEB fmax:                 0.05 eV/Å
  Relax max steps:          500
  Relax fmax:               0.05 eV/Å

Data Storage:
  Database directory:       database
  CSV path:                 database_navi.csv

Starting bulk calculation...
Start time: 2025-10-30 23:03:09


Sampling 10000 random compositions...
✓ Sampled 10000 compositions

First 5 compositions:
  1. Mo=0.08, Nb=0.51, Ta=0.22, W=0.16, Cr=0.03
  2. Mo=0.04, Nb=0.01, Ta=0.46, W=0.21, Cr=0.28
  3. Mo=0.00, Nb=0.61, Ta=0.31, W=0.04, Cr=0.03
  4. Mo=0.09, Nb=0.16, Ta=0.34, W=0.25, Cr=0.16
  5. Mo=0.38, Nb=0.06, Ta=0.14, W=0

KeyboardInterrupt: 