Overall Plan:
1. Get the compositions we care about in V-Cr-Ti space
2. Fit CE model on them 
3. Run MCMC on those compositions 
4. Sample 3-5 structures with comparable energy
5. Relax the supercells using CHGNet 
6. Create vacancies in those supercells 
7. Relax the supercell (letting the cell change)
8. Create defects for all nearest neighbors 
9. relax with keeping the volume fixed 
10. interpolate between start and end point
11. conduct neb 
12. save the energies for that site and neighbor 
13. for each structure, create histogram of the mean and std energies over all NNs 

# 1 Get all the compositions we care about in the V-Cr-Ti Space


## Libraries

In [1]:
import os
from pymatgen.core import Structure

## Load the perfect structures

In [11]:
# load the perfect structures 
#start_path = '../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_4096/V0_90625-Cr0_046875-Ti0_046875_initial.cif'
#middle_path = '../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_4096/V0_90625-Cr0_046875-Ti0_046875_middle.cif'
#end_path = '../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_4096/V0_90625-Cr0_046875-Ti0_046875_final.cif'
start_path = '../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_512/V0_90625-Cr0_046875-Ti0_046875_initial.cif'
middle_path = '../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_512/V0_90625-Cr0_046875-Ti0_046875_middle.cif'
end_path = '../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_512/V0_90625-Cr0_046875-Ti0_046875_final.cif'


start = Structure.from_file(start_path)
middle = Structure.from_file(middle_path)
end = Structure.from_file(end_path)

 # 2. Create Vacancies from the Structures

## Libraries

In [6]:
import os
import json
import sys
import numpy as np
import random
from smol.io import load_work
from pymatgen.core.structure import Structure
from pymatgen.entries.computed_entries import ComputedStructureEntry
from pymatgen.io.vasp.inputs import Poscar
from pymatgen.io.vasp.outputs import Outcar
from ase.db import connect
from ase.io import write
from ase.visualize import view

sys.path.append('../Modules')
from defect_maker import make_defects, return_x_neighbors
from vasp_misc import *
# Function to load and sort the structure
def load_and_sort_structure(entry):
    return Structure.from_dict(entry.structure.as_dict()).get_sorted_structure()

def _read_contcar_direct(contcar_file):
    try:
        with open(contcar_file, 'r') as file:
            lines = file.readlines()
            # Read the lattice constant
            lattice_constant = float(lines[1].strip())
            # Read the lattice vectors
            lattice_vectors = [list(map(float, line.strip().split())) for line in lines[2:5]]
            # Read the elements and their counts
            #elements = lines[5].strip().split()
            elements = [element.split('_')[0] for element in lines[5].strip().split()]
            print(elements)
            element_counts = list(map(int, lines[6].strip().split()))
            # Create a list of species that matches the number of coordinates
            species = [element for element, count in zip(elements, element_counts) for _ in range(count)]
            # Read the coordinates
            coordinates = [list(map(float, line.strip().split())) for line in lines[8:8+sum(element_counts)]]
            # Convert the coordinates from direct to Cartesian
            cartesian_coordinates = [[sum(a*b for a, b in zip(coord, vector)) for vector in lattice_vectors] for coord in coordinates]
            # Create the structure
            contcar = Structure(lattice_vectors, species, cartesian_coordinates)
            return contcar
    except Exception as e:
        print(f"Error reading CONTCAR file {contcar_file}: {e}")
        raise e
        return None
    
from pymatgen.core.structure import Structure

def read_contcar_direct(contcar_file):
    try:
        with open(contcar_file, 'r') as file:
            lines = file.readlines()
            # Read the lattice constant
            lattice_constant = float(lines[1].strip())
            # Read the lattice vectors
            lattice_vectors = [list(map(float, line.strip().split())) for line in lines[2:5]]
            # Read the elements and their counts
            elements_line = lines[5].strip().split()
            if '/' in elements_line[0]:
                # Handle format where elements are followed by identifiers
                #elements = [element.split('/')[0] for element in elements_line]
                elements = [element.split('/')[0].rstrip('_pv').rstrip('_sv') for element in elements_line]

            else:
                # Handle format where elements are directly listed
                elements = elements_line
            print(elements)
            element_counts = list(map(int, lines[6].strip().split()))
            # Create a list of species that matches the number of coordinates
            species = [element for element, count in zip(elements, element_counts) for _ in range(count)]
            # coordinate type
            coord_type = lines[7].strip()
            if coord_type.startswith(('c','C')):
                cart = True
            elif coord_type.startswith(('d','D')):
                cart = False
            # Read the coordinates
            coordinates_start_index = 8
            coordinates = []
            for line in lines[coordinates_start_index:coordinates_start_index + sum(element_counts)]:
                parts = line.strip().split()
                coordinates.append(list(map(float, parts[:3])))

            # Convert the coordinates from direct to Cartesian
            #cartesian_coordinates = [
                #[sum(a*b for a, b in zip(coord, vector)) for vector in zip(*lattice_vectors)]
                #for coord in coordinates
            #]
            # Create the structure
            contcar = Structure(lattice_vectors, species, coords=coordinates, coords_are_cartesian=cart)
            return contcar
    except Exception as e:
        print(f"Error reading CONTCAR file {contcar_file}: {e}")
        raise e
        return None


# Function to find N target atoms without overlapping second nearest neighbors
def _find_target_atoms(structure, N, neighbor_distance=2):
    all_indices = list(range(len(structure)))
    random.shuffle(all_indices)
    target_atoms = []
    neighbor_sets = []

    print(f"All indices: {all_indices}")

    while all_indices and len(target_atoms) < N:
        index = all_indices.pop()
        #neighbors, _ = return_x_neighbors(structure, target_atom_index=index, x_neighbor=neighbor_distance, alat=structure.lattice.a)
        neighbors = []
        for distance in range(1, neighbor_distance + 1):
            neighbors_distance, _ = return_x_neighbors(structure, target_atom_index=index, x_neighbor=distance, alat=structure.lattice.a)
            neighbors.extend(neighbors_distance)
        print(f"Index: {index}, Neighbors: {neighbors}")

        if not any(set(neighbors).intersection(neighbor_set) for neighbor_set in neighbor_sets):
            target_atoms.append(index)
            neighbor_sets.append(set(neighbors))
            print(f"Selected target atom index: {index}")

    #return target_atoms if len(target_atoms) == N else None
    return target_atoms 

def find_target_atoms(structure, N, neighbor_distance=2, cutoff_distance=5):
    all_indices = list(range(len(structure)))
    random.shuffle(all_indices)
    target_atoms = []
    neighbor_sets = []

    print(f"All indices: {all_indices}")

    while all_indices and len(target_atoms) < N:
        index = all_indices.pop()
        neighbors = []
        for distance in range(1, neighbor_distance + 1):
            neighbors_distance, _ = return_x_neighbors(structure, target_atom_index=index, x_neighbor=distance, alat=structure.lattice.a)
            neighbors.extend(neighbors_distance)
        
        print(f"Index: {index}, Neighbors: {neighbors}")

        if not any(set(neighbors).intersection(neighbor_set) for neighbor_set in neighbor_sets):
            # Check if the distance to all existing target atoms is greater than the cutoff distance
            if all(structure.get_distance(index, target_atom) > cutoff_distance for target_atom in target_atoms):
                target_atoms.append(index)
                neighbor_sets.append(set(neighbors))
                print(f"Selected target atom index: {index}")

    #return target_atoms if len(target_atoms) == N else None
    return target_atoms

def _randomly_pick_sites(structure, n):
    # Ensure that n is not greater than the number of sites in the structure
    if n > len(structure.sites):
        raise ValueError("The number of sites to pick cannot be greater than the number of sites in the structure.")
    
    # Randomly select n sites from the structure
    random_sites = random.sample(structure.sites, n)
    return random_sites

import random
from scipy.spatial.distance import cdist

def _randomly_pick_sites(structure, n, cutoff=1):
    # Ensure that n is not greater than the number of sites in the structure
    if n > len(structure.sites):
        raise ValueError("The number of sites to pick cannot be greater than the number of sites in the structure.")
    
    # Randomly select n sites from the structure
    random_sites = []
    while len(random_sites) < n:
        potential_site = random.choice(structure.sites)
        print("Potential site: ", potential_site.coords)
        print(cutoff * min(structure.lattice.abc))
        if all(cdist([potential_site.coords], [site.coords])[0][0] > cutoff * min(structure.lattice.abc) for site in random_sites):
            print("Found site")
            random_sites.append(potential_site)
    return random_sites

import random
import numpy as np
from pymatgen.core.structure import Structure

def old_randomly_pick_sites(structure, n, initial_cutoff=1.25, max_attempts=1000, reduction_factor=0.9):
    """
    Randomly selects a specified number of sites from a given structure.

    Args:
        structure (Structure): The structure from which to randomly select sites.
        n (int): The number of sites to randomly select.
        initial_cutoff (float, optional): The initial cutoff distance for site selection. Defaults to 1.25.
        max_attempts (int, optional): The maximum number of attempts to make for site selection. Defaults to 1000.
        reduction_factor (float, optional): The reduction factor for the cutoff distance after each unsuccessful attempt. Defaults to 0.9.

    Returns:
        list: A list of randomly selected sites from the structure.

    Raises:
        ValueError: If the number of sites to pick is greater than the number of sites in the structure.
    """
    # Ensure that n is not greater than the number of sites in the structure
    if n > len(structure.sites):
        raise ValueError("The number of sites to pick cannot be greater than the number of sites in the structure.")
    
    # Randomly select n sites from the structure with iterative reduction in cutoff
    random_sites = []
    cutoff = initial_cutoff
    while len(random_sites) < n:
        attempts = 0
        while len(random_sites) < n and attempts < max_attempts:
            potential_site = random.choice(structure.sites)
            if all(np.linalg.norm(np.array(potential_site.coords) - np.array(site.coords)) > cutoff * min(structure.lattice.abc) for site in random_sites):
                random_sites.append(potential_site)
            attempts += 1
        
        if len(random_sites) < n:
            cutoff *= reduction_factor
            random_sites = []  # Reset and try again with a reduced cutoff
    
    return random_sites

def randomly_pick_sites(structure, n, initial_cutoff=1.25, max_attempts=1000, reduction_factor=0.9):
    """
    Randomly selects a specified number of site indices from a given structure.

    Args:
        structure (Structure): The structure from which to randomly select sites.
        n (int): The number of sites to randomly select.
        initial_cutoff (float, optional): The initial cutoff distance for site selection. Defaults to 1.25.
        max_attempts (int, optional): The maximum number of attempts to make for site selection. Defaults to 1000.
        reduction_factor (float, optional): The reduction factor for the cutoff distance after each unsuccessful attempt. Defaults to 0.9.

    Returns:
        list: A list of indices of randomly selected sites from the structure.

    Raises:
        ValueError: If the number of sites to pick is greater than the number of sites in the structure.
    """
    # Ensure that n is not greater than the number of sites in the structure
    if n > len(structure.sites):
        raise ValueError("The number of sites to pick cannot be greater than the number of sites in the structure.")
    
    # Randomly select n site indices from the structure with iterative reduction in cutoff
    random_site_indices = []
    cutoff = initial_cutoff
    while len(random_site_indices) < n:
        attempts = 0
        while len(random_site_indices) < n and attempts < max_attempts:
            potential_index = random.randint(0, len(structure.sites) - 1)
            potential_site = structure.sites[potential_index]
            if all(np.linalg.norm(np.array(potential_site.coords) - np.array(structure.sites[idx].coords)) > cutoff * min(structure.lattice.abc) for idx in random_site_indices):
                random_site_indices.append(potential_index)
            attempts += 1
        
        if len(random_site_indices) < n:
            cutoff *= reduction_factor
            random_site_indices = []  # Reset and try again with a reduced cutoff
    
    return random_site_indices

# Function to select a random neighbor
def select_random_neighbor(structure, target_atom_index, x_neighbor):
    neighbors, _ = return_x_neighbors(structure, target_atom_index, x_neighbor, structure.lattice.a)
    if neighbors:
        return random.choice(neighbors)
    return None

# Function to create and save structures with vacancies
def create_and_save_structures(entries, N, job_path, neighbor_distance=2, cutoff_distance=1.25):
    for k, entry in enumerate(entries):
        print(f"Processing entry {k+1}/{len(entries)}...")
        structure = load_and_sort_structure(entry)
        #target_atoms = find_target_atoms(structure, N, neighbor_distance, cutoff_distance)
        target_atoms = randomly_pick_sites(structure, N, cutoff = cutoff_distance)
        if not target_atoms:
            print(f"No suitable target atoms found for entry {k+1}. Skipping...")
            continue
        print(f"Found target atoms for entry {k+1}: {target_atoms}")

        for n, target_atom_index in enumerate(target_atoms):
            start_structure, _ = make_defects(structure, target_atom_index, target_atom_index)
            print("Start defect made")
            if start_structure is None:
                print(f"Failed to create start structure for entry {k+1}, target atom {target_atom_index}.")
                continue

            for x_neighbor in [1, 2, 3]:
                print("Neigbor distance: ", x_neighbor)
                vac_site = select_random_neighbor(structure, target_atom_index, x_neighbor)
                if vac_site is not None:
                    _, end_structure = make_defects(structure, target_atom_index, vac_site)
                    if end_structure is None:
                        print(f"Failed to create end structure for entry {k+1}, target atom {target_atom_index}, vac_site {vac_site}.")
                        continue

                    # Create directory and filenames
                    directory = os.path.join(job_path, f"structure_{k}_vac_site_{n}")
                    os.makedirs(directory, exist_ok=True)
                    print(f"Created directory: {directory}")

                    start_filename = os.path.join(directory, f"structure_{k}_vac_site_{n}_start.vasp")
                    end_filename = os.path.join(directory, f"structure_{k}_vac_site_{n}_end_site_{vac_site}.vasp")

                    # Write the structures to POSCAR files
                    Poscar(start_structure).write_file(start_filename)
                    print(f"Written start structure to {start_filename}")
                    Poscar(end_structure).write_file(end_filename)
                    print(f"Written end structure to {end_filename}")

def _create_start_structures(entries, N, job_path, cutoff_distance=1.25):
    start_structures = []
    removed_indexes = []
    for k, entry in enumerate(entries):
        print(f"Processing entry {k+1}/{len(entries)}...")
        structure = load_and_sort_structure(entry)
        target_atoms = randomly_pick_sites(structure, N, cutoff = cutoff_distance)
        if not target_atoms:
            print(f"No suitable target atoms found for entry {k+1}. Skipping...")
            continue
        print(f"Found target atoms for entry {k+1}: {target_atoms}")

        for n, target_atom_index in enumerate(target_atoms):
            start_structure, _ = make_defects(structure, target_atom_index, target_atom_index)
            if start_structure is None:
                print(f"Failed to create start structure for entry {k+1}, target atom {target_atom_index}.")
                continue
            start_structures.append(start_structure)
            removed_indexes.append(target_atom_index)
    return start_structures, removed_indexes

from pymatgen.entries.computed_entries import ComputedStructureEntry
from monty.json import MontyEncoder, MontyDecoder
from pymatgen.io.vasp import Poscar

def create_start_structures(entries, N, job_path, cutoff_distance=1.25,supercell_scheme=True):
    computed_entries = []
    for k, entry in enumerate(entries):
        print(f"Processing entry {k+1}/{len(entries)}...")
        structure = load_and_sort_structure(entry)
        target_atoms = randomly_pick_sites(structure, N, initial_cutoff= cutoff_distance)
        if not target_atoms:
            print(f"No suitable target atoms found for entry {k+1}. Skipping...")
            continue
        print(f"Found target atoms for entry {k+1}: {target_atoms}")

        for n, target_atom_index in enumerate(target_atoms):
            #print("On Target Atom: ", target_atom_index)
            target_atom_index = structure.index(target_atom_index)
            target_atom_composition = structure[target_atom_index].specie

            start_structure, _ = make_defects(structure, target_atom_index, target_atom_index)
            if start_structure is None:
                print(f"Failed to create start structure for entry {k+1}, target atom {target_atom_index}.")
                continue

            # Save the structure as a .vasp file
            #start_filename = os.path.join(job_path, f"structure_{k}_vac_site_{n}_start.vasp")
            if supercell_scheme:
                start_filename = os.path.join(job_path, f"supercell_gen{entry.data['generation']}_comp{entry.data['comp']}_struct{entry.data['struct']}_vac_site{n}_start.vasp")
            else:
                start_filename = os.path.join(job_path, f"supercell_gen{entry.data['generation']}_comp{entry.data['comp']}_vac_site{n}_start.vasp")
            Poscar(start_structure).write_file(start_filename)
            print(f"Written start structure to {start_filename}")

            # Create a ComputedStructureEntry and add it to the list
            data = {'generation': entry.data['generation'],
                    'comp': entry.data['comp'], 
                    'struct': entry.data['struct'], 
                    'vac_index' : target_atom_index, 
                    'vac_comp' : target_atom_composition, 
                    'perfect_structure' : entry.structure.as_dict()}
            computed_entry = ComputedStructureEntry(start_structure, energy=0, data=data)
            computed_entries.append(computed_entry)

    # Save the computed entries as a JSON file
    with open(os.path.join(job_path, 'computed_entries.json'), 'w') as f:
        json.dump(computed_entries, f, cls=MontyEncoder)

    print("Computed entries saved to JSON file.")

def create_end_structures(start_structures, removed_indexes, job_path):
    for k, (start_structure, target_atom_index) in enumerate(zip(start_structures, removed_indexes)):
        for n, x_neighbor in enumerate([1, 2, 3]):
            vac_site = select_random_neighbor(start_structure, target_atom_index, x_neighbor)
            if vac_site is not None:
                _, end_structure = make_defects(start_structure, target_atom_index, vac_site)
                if end_structure is None:
                    print(f"Failed to create end structure for entry {k+1}, target atom {target_atom_index}, vac_site {vac_site}.")
                    continue

                # Create directory and filenames
                directory = os.path.join(job_path, f"structure_{k}_vac_site_{n}")
                os.makedirs(directory, exist_ok=True)
                print(f"Created directory: {directory}")

                start_filename = os.path.join(directory, f"structure_{k}_vac_site_{n}_start.vasp")
                end_filename = os.path.join(directory, f"structure_{k}_vac_site_{n}_end_site_{vac_site}.vasp")

                # Write the structures to POSCAR files
                Poscar(start_structure).write_file(start_filename)
                print(f"Written start structure to {start_filename}")
                Poscar(end_structure).write_file(end_filename)
                print(f"Written end structure to {end_filename}")

def check_overlapping_atoms(structure, distance_threshold=0.4):
    """
    Check if a pymatgen structure has overlapping atoms.
    
    Parameters:
    structure (Structure): The pymatgen structure to check.
    distance_threshold (float): The distance threshold below which atoms are considered overlapping.
    
    Returns:
    bool: True if there are overlapping atoms, False otherwise.
    """
    distances = structure.distance_matrix
    num_atoms = len(structure)

    for i in range(num_atoms):
        for j in range(i + 1, num_atoms):
            if distances[i, j] < distance_threshold:
                return True
    return False

def _print_min_distance(structure: Structure):
    min_distance = float('inf')

    for i in range(len(structure)):
        for j in range(i+1, len(structure)):
            distance = structure[i].distance(structure[j])
            min_distance = min(min_distance, distance)

    print(f"The minimum distance between any two atoms in the structure is: {min_distance}")

def print_min_distance(structure: Structure):
    min_distance = float('inf')
    atom1, atom2 = None, None

    for i in range(len(structure)):
        for j in range(i+1, len(structure)):
            distance = structure[i].distance(structure[j])
            if distance < min_distance:
                min_distance = distance
                atom1, atom2 = i, j

    print(f"The minimum distance between any two atoms in the structure is: {min_distance}")
    print(f"The atoms are at indexes {atom1} and {atom2}")
    print(f"The coordinates of the atoms are {structure[atom1].coords} and {structure[atom2].coords}")

## Create Vacancies

In [2]:
# create an entry from teh structure 
def create_entry(structure, data):
    return ComputedStructureEntry(structure, energy=0, data=data)

# create a list of entries from a list of structures
def create_entries(structures, data):
    return [create_entry(structure, dat) for structure, dat in zip(structures, data)]

# create a list of entries from a list of structures
def create_entries_from_files(structure_files, data):
    structures = [Structure.from_file(file) for file in structure_files]
    data = []
    for structure in structures:
        struc_data = {'generation' : 0, 'comp' : structure.composition.reduced_formula, 'struct' : 'Fixed_125'} 
        data.append(struc_data)
    return create_entries(structures, data)

# get file list
file_list = [os.path.join('../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_125',f) for f in os.listdir('../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_125/') if f.endswith('.cif')]
#file_list = [start_path, middle_path, end_path]
entries = create_entries_from_files(file_list, {'generation': 0, 'comp': 'VCrTi', 'struct': 'Fixed_64'})

In [3]:
print(entries[6].structure)

Full Formula (Ti1 V110 Cr14)
Reduced Formula: Ti(V55Cr7)2
abc   :  13.033682  13.033682  13.033682
angles: 109.471221 109.471221 109.471221
pbc   :       True       True       True
Sites (125)
  #  SP      a    b    c
---  ----  ---  ---  ---
  0  Ti    0    0    0.2
  1  V     0    0    0
  2  V     0    0    0.4
  3  V     0    0    0.6
  4  V     0    0    0.8
  5  V     0    0.2  0
  6  V     0    0.2  0.2
  7  V     0    0.2  0.4
  8  V     0    0.2  0.6
  9  V     0    0.2  0.8
 10  V     0    0.4  0
 11  V     0    0.4  0.2
 12  V     0    0.4  0.4
 13  V     0    0.4  0.6
 14  V     0    0.4  0.8
 15  V     0    0.6  0
 16  V     0    0.6  0.4
 17  V     0    0.6  0.6
 18  V     0    0.6  0.8
 19  V     0    0.8  0
 20  V     0    0.8  0.2
 21  V     0    0.8  0.8
 22  V     0.2  0    0
 23  V     0.2  0    0.6
 24  V     0.2  0.2  0
 25  V     0.2  0.2  0.2
 26  V     0.2  0.2  0.4
 27  V     0.2  0.2  0.6
 28  V     0.2  0.4  0
 29  V     0.2  0.4  0.2
 30  V     0.2  0.4  0.

In [12]:
# create start structure from the entries

# Function to create and save structures with vacancies
def create_and_save_structures(entries, N, job_path, cutoff_distance=1.25):
    for k, entry in enumerate(entries):
        print(f"Processing entry {k+1}/{len(entries)}...")
        structure = load_and_sort_structure(entry)
        #structure = entry[0]
        #print(structure)
        #target_atoms = find_target_atoms(structure, N, neighbor_distance, cutoff_distance)
        target_atoms = randomly_pick_sites(structure, N, initial_cutoff= cutoff_distance)
        if not target_atoms:
            print(f"No suitable target atoms found for entry {k+1}. Skipping...")
            continue
        print(f"Found target atoms for entry {k+1}: {target_atoms}")

        for t, target_atom_index in enumerate(target_atoms):
            print("On Target Atom: ", target_atom_index)
            start_structure, _ = make_defects(structure, target_atom_index, target_atom_index)
            print("Start defect made")
            if start_structure is None:
                print(f"Failed to create start structure for entry {k+1}, target atom {target_atom_index}.")
                continue
            
            probabilities = [0.8, 0.2]
            selected_sites = []
            while len(selected_sites) < N: 
            #for x_neighbor in [1, 2]: # removed nextnextnext nearest neighbor
                x_neighbor = random.choices([1, 2], probabilities)[0]
                print("Neigbor distance: ", x_neighbor)
                vac_site = select_random_neighbor(structure, target_atom_index, x_neighbor)
                if vac_site is not None and vac_site not in selected_sites:
                    selected_sites.append(vac_site)
                    _, end_structure = make_defects(structure, target_atom_index, vac_site)
                    if end_structure is None:
                        print(f"Failed to create end structure for entry {k+1}, target atom {target_atom_index}, vac_site {vac_site}.")
                        continue

                    # Create directory and filenames
                    directory = os.path.join(job_path, f"structure_{k}_vac_site_{target_atom_index}")
                    #directory = os.path.join(job_path, entry[1])
                    os.makedirs(directory, exist_ok=True)
                    print(f"Created directory: {directory}")

                    start_filename = os.path.join(directory, f"structure_{k}_vac_site_{target_atom_index}_start.vasp")
                    #start_filename = os.path.join(directory, f"vac_site_{t}_start.vasp")
                    #end_filename = os.path.join(directory, f"vac_site_{t}_end_site_{vac_site}.vasp")
                    end_filename = os.path.join(directory, f"structure_{k}_vac_site_{target_atom_index}_end_site_{vac_site}.vasp")


                    # Write the structures to POSCAR files
                    Poscar(start_structure).write_file(start_filename)
                    print(f"Written start structure to {start_filename}")
                    Poscar(end_structure).write_file(end_filename)
                    print(f"Written end structure to {end_filename}")



In [13]:
create_and_save_structures(entries, 5, '../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_125/Vacancies', cutoff_distance=1.25)

Processing entry 1/350...
Found target atoms for entry 1: [27, 117, 87, 7, 100]
On Target Atom:  27
Start defect made
Neigbor distance:  2
Created directory: ../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_125/Vacancies/structure_0_vac_site_27
Written start structure to ../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_125/Vacancies/structure_0_vac_site_27/structure_0_vac_site_27_start.vasp
Written end structure to ../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_125/Vacancies/structure_0_vac_site_27/structure_0_vac_site_27_end_site_30.vasp
Neigbor distance:  1
Created directory: ../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_125/Vacancies/structure_0_vac_site_27
Written start structure to ../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_125/Vacancies/structure_0_vac_site_27/structure_0_vac_site_27_start.vasp
Written end structure to ../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_125/Vacancies/structure_0_vac_site_27/structure_0_vac_site_27_end_site_93.vasp


In [64]:
computed_entries = json.load(open('../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_64/Start_Structures/computed_entries.json'))
print(computed_entries[0]['data'])
print(computed_entries[0]['structure'])
print(type(computed_entries[0]))

{'generation': 0, 'comp': 'Ti3V58Cr3', 'struct': 'Fixed_64', 'vac_index': 4, 'vac_comp': {'@module': 'pymatgen.core.periodic_table', '@class': 'Element', 'element': 'V', '@version': None}, 'perfect_structure': {'@module': 'pymatgen.core.structure', '@class': 'Structure', 'charge': 0.0, 'lattice': {'matrix': [[9.830618833167076, 0.0, -3.475648619229503], [-4.915309414949067, 8.513565645388086, -3.475648619229503], [0.0, 0.0, 10.42694586]], 'pbc': [True, True, True], 'a': 10.42694586, 'b': 10.42694586, 'c': 10.42694586, 'alpha': 109.47122063, 'beta': 109.47122063, 'gamma': 109.47122063, 'volume': 872.6688317522414}, 'properties': {}, 'sites': [{'species': [{'element': 'Ti', 'occu': 1.0}], 'abc': [0.5, 0.75, 0.0], 'xyz': [1.2288273553717382, 6.385174234041065, -4.344560774036879], 'properties': {}, 'label': 'Ti44'}, {'species': [{'element': 'Ti', 'occu': 1.0}], 'abc': [0.75, 0.0, 0.0], 'xyz': [7.372964124875307, 0.0, -2.606736464422127], 'properties': {}, 'label': 'Ti48'}, {'species': [{'

## Relax Each Vacancy Structure

In [14]:
from pymatgen.entries.computed_entries import ComputedStructureEntry
# now lets make the computed entries from chgnet potential 
from chgnet.model.model import CHGNet 
from chgnet.model.dynamics import CHGNetCalculator 
from chgnet.model import StructOptimizer

def create_computed_entry_from_chgnet(structure, energy):
    composition = structure.composition
    return ComputedStructureEntry(composition=composition, energy = energy, structure=structure)

def relax_structures(supercells, potential_path,device='cpu',verbose=False,relax_cell=True):
    vac_pot = CHGNet.from_file(potential_path, use_device=device)
    entries = []
    relaxer = StructOptimizer(vac_pot, use_device=device)

    # If vcrti_generated_supercells is a dictionary, convert it to a list
    if isinstance(supercells, dict):
        supercells = list(supercells.values())
    print(len(supercells))
    for i, supercell in enumerate(supercells):
        if isinstance(supercell, dict):
            supercell = Structure.from_dict(supercell)
        relaxed_supercell = relaxer.relax(atoms=supercell, fmax=0.05, relax_cell=relax_cell, verbose=verbose)
        print("Finished Volumetric Relaxing Structure: ", i)
        final_result = relaxer.relax(atoms=relaxed_supercell['final_structure'], fmax=0.05, relax_cell=False, verbose=verbose)
        print("Final Energy: ", final_result['trajectory'].energies[-1])

        # make the computed entry
        entry = create_computed_entry_from_chgnet(supercell, final_result['trajectory'].energies[-1])
        entries.append(entry.as_dict())

    return entries

## Allegro attempt

In [3]:
from nequip.ase import NequIPCalculator
from pymatgen.io.ase import AseAtomsAdaptor
from ase.filters import UnitCellFilter 
from ase.constraints import FixAtoms
from ase.io import Trajectory
import pickle
from ase.atoms import Atoms, units 
import numpy as np 
import json, os
from ase.optimize import LBFGS 
from pymatgen.core import Structure 
from pymatgen.entries.computed_entries import ComputedStructureEntry


class TrajectoryObserver:
    """Trajectory observer is a hook in the relaxation process that saves the
    intermediate structures.
    """

    # thanks to CHGNet and M3GNET teams

    def __init__(self, atoms: Atoms) -> None:
        """Create a TrajectoryObserver from an Atoms object.

        Args:
            atoms (Atoms): the structure to observe.
        """
        self.atoms = atoms
        self.energies: list[float] = []
        self.forces: list[np.ndarray] = []
        #self.stresses: list[np.ndarray] = []
        #self.magmoms: list[np.ndarray] = []
        self.atom_positions: list[np.ndarray] = []
        self.cells: list[np.ndarray] = []

    def __call__(self) -> None:
        """The logic for saving the properties of an Atoms during the relaxation."""
        self.energies.append(self.compute_energy())
        self.forces.append(self.atoms.get_forces())
        #self.stresses.append(self.atoms.get_stress())
        #self.magmoms.append(self.atoms.get_magnetic_moments())
        self.atom_positions.append(self.atoms.get_positions())
        self.cells.append(self.atoms.get_cell()[:])

    def __len__(self) -> int:
        """The number of steps in the trajectory."""
        return len(self.energies)

    def compute_energy(self) -> float:
        """Calculate the potential energy.

        Returns:
            energy (float): the potential energy.
        """
        return self.atoms.get_potential_energy()

    def save(self, filename: str) -> None:
        """Save the trajectory to file.

        Args:
            filename (str): filename to save the trajectory
        """
        out_pkl = {
            "energy": self.energies,
            "forces": self.forces,
            #"stresses": self.stresses,
            #"magmoms": self.magmoms,
            "atom_positions": self.atom_positions,
            "cell": self.cells,
            "atomic_number": self.atoms.get_atomic_numbers(),
        }
        with open(filename, "wb") as file:
            pickle.dump(out_pkl, file)


def allegro_relaxer(atoms, potential_path, species, device='cpu', fmax = 0.01, steps = 250, verbose=False, relax_cell=True, loginterval=1):
    
    if isinstance(atoms, Structure):
        atoms = AseAtomsAdaptor.get_atoms(atoms)
    
    atoms.calc = NequIPCalculator.from_deployed_model(
        model_path=potential_path,
        species_to_type_name = species
    )
    
    if relax_cell:
        ucf = UnitCellFilter(atoms)
        obs = TrajectoryObserver(ucf)
        optimizer = LBFGS(ucf)
        optimizer.attach(obs, interval=loginterval)
        
    else:
        constraints = FixAtoms(mask=[False] * len(atoms))  # Allow all atoms to move
        # Add constraints to atoms
        atoms.set_constraint(constraints)

        obs = TrajectoryObserver(atoms)
        optimizer = LBFGS(atoms)
        optimizer.attach(obs, interval=loginterval)
    
    optimizer.run(fmax=fmax, steps=steps)
    struct = AseAtomsAdaptor.get_structure(atoms)
    return {"final_structure" : struct, "trajectory" : obs}

def numeric_stress(atoms, d=1e-6, voigt=True):
    stress = np.zeros((3, 3), dtype=float)

    cell = atoms.cell.copy()
    V = atoms.get_volume()
    print(V)
    for i in range(3):
        x = np.eye(3)
        x[i, i] += d
        atoms.set_cell(np.dot(cell, x), scale_atoms=True)
        eplus = atoms.get_potential_energy(force_consistent=True)

        x[i, i] -= 2 * d
        atoms.set_cell(np.dot(cell, x), scale_atoms=True)
        eminus = atoms.get_potential_energy(force_consistent=True)

        stress[i, i] = (eplus - eminus) / (2 * d * V)
        x[i, i] += d

        j = i - 2
        x[i, j] = d
        x[j, i] = d
        atoms.set_cell(np.dot(cell, x), scale_atoms=True)
        eplus = atoms.get_potential_energy(force_consistent=True)

        x[i, j] = -d
        x[j, i] = -d
        atoms.set_cell(np.dot(cell, x), scale_atoms=True)
        eminus = atoms.get_potential_energy(force_consistent=True)

        stress[i, j] = (eplus - eminus) / (4 * d * V)
        stress[j, i] = stress[i, j]
    atoms.set_cell(cell, scale_atoms=True)

    if voigt:
        return stress.flat[[0, 4, 8, 5, 2, 1]]
    else:
        return stress
    

import numpy as np
from ase import Atoms
from ase.constraints import FixAtoms
from ase.calculators.calculator import Calculator, all_changes

# Numeric stress calculation function
def np_numeric_stress(atoms, d=1e-6, voigt=True):
    stress = np.zeros((3, 3), dtype=float)
    cell = atoms.cell.copy()
    V = atoms.get_volume()

    for i in range(3):
        x = np.eye(3)
        x[i, i] += d
        atoms.set_cell(np.dot(cell, x), scale_atoms=True)
        eplus = atoms.get_potential_energy(force_consistent=True)
        
        x[i, i] -= 2 * d
        atoms.set_cell(np.dot(cell, x), scale_atoms=True)
        eminus = atoms.get_potential_energy(force_consistent=True)
        
        stress[i, i] = (eplus - eminus) / (2 * d * V)
        x[i, i] += d
        
        for j in range(i + 1, 3):
            x[i, j] = d
            x[j, i] = d
            atoms.set_cell(np.dot(cell, x), scale_atoms=True)
            eplus = atoms.get_potential_energy(force_consistent=True)
            
            x[i, j] = -d
            x[j, i] = -d
            atoms.set_cell(np.dot(cell, x), scale_atoms=True)
            eminus = atoms.get_potential_energy(force_consistent=True)
            
            stress[i, j] = (eplus - eminus) / (4 * d * V)
            stress[j, i] = stress[i, j]
    
    atoms.set_cell(cell, scale_atoms=True)
    
    return stress

class CustomUnitCellFilter:
    def __init__(self, atoms, stress_calculator):
        self.atoms = atoms
        self.stress_calculator = stress_calculator

    def calculate(self):
        self.atoms.calc.calculate(self.atoms)
        forces = self.atoms.get_forces()
        stress = self.stress_calculator(self.atoms)
        self.atoms.set_forces(forces)
        self.atoms.set_stress(stress)




In [6]:
# using allegro 
structure_path = '../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_125/Vacancies/structure_131_vac_site_30'
# get the structures from each .vasp file in the directory
start_structures = [Structure.from_file(os.path.join(structure_path, f)) for f in os.listdir(structure_path) if f.endswith('.vasp')]

pot_path = '../Potentials/vcrtiwzr_vac_deployed.pth'

vcrti_species = {'V' : 'V', 'Cr' : 'Cr', 'Ti' : 'Ti', 'Zr' : 'Zr', 'W' : 'W'}

start_structures = start_structures[0]
print(start_structures)
print(AseAtomsAdaptor.get_atoms(start_structures).get_chemical_formula())
# load the nequip relaxer 
#relaxed_structure = allegro_relaxer(atoms = start_structures[0], potential_path= pot_path , species = vcrti_species, relax_cell=False) 


Full Formula (Ti17 V100 Cr7)
Reduced Formula: Ti17V100Cr7
abc   :  13.033682  13.033682  13.033682
angles: 109.471221 109.471221 109.471221
pbc   :       True       True       True
Sites (124)
  #  SP      a    b     c
---  ----  ---  ---  ----
  0  Ti    0    0     0.4
  1  Ti    0    0.2   0.6
  2  Ti    0    0.4   0.8
  3  Ti    0    0.8   0.2
  4  Ti    0.6  0     0.4
  5  Ti    0.6  0.2   0.6
  6  Ti    0.6  0.4   0.8
  7  Ti    0.6  0.8   0.2
  8  Ti    0.8  0     0.2
  9  Ti    0.8  0     0.6
 10  Ti    0.8  0.2   0.4
 11  Ti    0.8  0.2   0.8
 12  Ti    0.8  0.4   0
 13  Ti    0.8  0.4   0.6
 14  Ti    0.8  0.6   0.2
 15  Ti    0.8  0.8   0
 16  Ti    0.8  0.8   0.4
 17  V     0    0.8  -0
 18  V     0    0     0.6
 19  V     0    0     0.8
 20  V     0    0.2   0
 21  V     0    0.2   0.2
 22  V     0    0.2   0.4
 23  V     0    0.2   0.8
 24  V     0    0.4   0
 25  V     0    0.4   0.6
 26  V     0    0.6   0.2
 27  V     0    0.6   0.4
 28  V     0    0.6   0.6
 29  V     

In [20]:
structure_path = '../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_512/Start_Structures'

# get the structures from each .vasp file in the directory
start_structures = [Structure.from_file(os.path.join(structure_path, f)) for f in os.listdir(structure_path) if f.endswith('.vasp')]

# load chgnet 
pot_path = '../Potentials/Vacancy_Train_Results/bestF_epoch89_e2_f28_s55_mNA.pth.tar'

relaxed_entries = relax_structures([start_structures[0]], pot_path, device='mps', verbose=True)

CHGNet v0.3.0 initialized with 412,525 parameters
CHGNet will run on mps
1


KeyboardInterrupt: 

In [None]:
species = {
            "C": "NequIPTypeNameForCarbon",
            "H": "NequIPTypeNameForHydrogen",
        }

# CHgnet Vac Relax and NEB Creation

## libs

In [2]:
import os
from ase.io import read, write
from ase.mep import DyNEB
from ase import Atoms
from chgnet.model.model import CHGNet
from chgnet.model.dynamics import CHGNetCalculator
from pymatgen.io.ase import AseAtomsAdaptor
from ase.filters import FrechetCellFilter 
from ase.constraints import FixAtoms
from ase.io import Trajectory
import pickle
from ase.atoms import Atoms, units 
import numpy as np 
import json, os
from ase.optimize import LBFGS, FIRE, BFGS, MDMin, QuasiNewton 
from pymatgen.core import Structure 
import sys
sys.path.append('../Modules')
from defect_maker import make_defects, return_x_neighbors
from vasp_misc import *
from NEB_Barrier import NEB_Barrier
        
    

def create_and_run_neb_files(base_directory, job_path, relax = True, num_images=5, vac_calculator = None, neb_calculator = None):

    # Iterate through each subdirectory in base_directory that starts with "supercell"
    num_failed = 0
    for subdir in os.listdir(base_directory):
        if subdir.startswith('structure'):

            print(subdir)
            subdir_path = os.path.join(base_directory, subdir)
            
            # Parse the subdirectory to identify all vac_site files
            files = os.listdir(subdir_path)
            vac_sites = {}

            for file in files:
                if file.startswith('structure_') and file.endswith('.vasp'):
                    parts = file.split('_')
                    vac_site = parts[4]
                    if vac_site not in vac_sites:
                        vac_sites[vac_site] = {'start': None, 'end': []}
                    if 'start' in file:
                        vac_sites[vac_site]['start'] = file
                    elif 'end' in file:
                        vac_sites[vac_site]['end'].append(file)

            # Process each vac_site
            for vac_site, files in vac_sites.items():
                start_file = files['start']
                end_files = files['end']
                
                if start_file is None or not end_files:
                    print(f"Skipping vac_site_{vac_site} in {subdir} due to missing start or end files.")
                    continue
                
                # Load the start structure
                start_structure = read(os.path.join(subdir_path, start_file))
                for end_file in end_files:
                    # Load the end structure
                    end_structure = read(os.path.join(subdir_path, end_file))
                    """
                    except Exception as e:
                        failure_message = f"Error interpolating NEB for vac_site_{vac_site} in {subdir}: {e}"
                        with open(os.path.join(job_path, 'failures.txt'), 'a') as f:
                            f.write(failure_message + '\n')
                        num_failed += 1
                        continue
                    """
                    # Save the interpolated structures
                    neb_dir = os.path.join(job_path, subdir, f'neb_vac_site_{vac_site}_to_{end_file.split("_")[-1].split(".")[0]}')
                    os.makedirs(neb_dir, exist_ok=True)
                    # check if the results.json file exists
                    if os.path.exists(os.path.join(neb_dir, 'results.json')):
                        print(f"NEB interpolation for vac_site_{vac_site} in {subdir} already completed.")
                        continue
                    barrier = NEB_Barrier(start=start_structure,
                                          end=end_structure,
                                          vasp_energies=[0, 0],
                                          composition= start_structure.get_chemical_formula(),
                                          structure_number = int(subdir.split('_')[1]),
                                          defect_number = int(vac_site),
                                          direction = end_file.split("_")[-1].split(".")[0],
                                          root_path = neb_dir
                                          )
                    barrier.neb_run(num_images=5,
                                    potential = neb_calculator,
                                    vac_potential = vac_calculator,
                                    run_relax = False,
                                    num_steps = 200
                                    )
                    
    print(f"NEB interpolation completed with {num_failed} failures.")


def chgnet_relaxer(atoms, calculator, fmax = 0.01, steps = 250, verbose=False, relax_cell=True, optimizer = 'LBFGS', loginterval = 1):
    if isinstance(atoms, Structure):
        atoms = AseAtomsAdaptor.get_atoms(atoms)
    new_atoms = atoms.copy()

    #new_atoms.calc = NequIPCalculator.from_deployed_model(
        #model_path=potential_path,
        #species_to_type_name = species
    #)
    new_atoms.calc = calculator 
    if relax_cell:
        ucf = FrechetCellFilter(new_atoms)
        #obs = TrajectoryObserver(ucf)
        if optimizer == 'LBFGS':
            optimizer = LBFGS(ucf)
        elif optimizer == 'BFGS':
            optimizer = BFGS(ucf)
        elif optimizer == 'MDMin':
            optimizer = MDMin(ucf)
        elif optimizer == 'QuasiNewton':
            optimizer = QuasiNewton(ucf)
        elif optimizer == 'FIRE':
            optimizer = FIRE(ucf)
        #optimizer.attach(obs, interval=loginterval)
        
    else:
        #constraints = FixAtoms(mask=[False] * len(new_atoms))  # Allow all atoms to move
        # Add constraints to atoms
        #new_atoms.set_constraint(constraints)
        print("Relaxing without cell relaxation")
        new_atoms.set_constraint(FixAtoms(mask=[True for atom in new_atoms]))
        ucf = FrechetCellFilter(new_atoms, constant_volume=True)
        if optimizer == 'LBFGS':
            optimizer = LBFGS(ucf)
        elif optimizer == 'BFGS':
            optimizer = BFGS(ucf)
        elif optimizer == 'MDMin':
            optimizer = MDMin(ucf)
        elif optimizer == 'QuasiNewton':
            optimizer = QuasiNewton(ucf)
        elif optimizer == 'FIRE':
            optimizer = FIRE(ucf)
        #obs = TrajectoryObserver(atoms)
        #optimizer.attach(obs, interval=loginterval)
    
    optimizer.run(fmax=fmax, steps=steps)
    return new_atoms

## create NEB Jobs

In [None]:
# Example usage
base_directory = '../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_125/Vacancies'
job_path = '../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_125/NEB'
vac_pot_path = '../Potentials/Vacancy_Train_Results/bestF_epoch89_e2_f28_s55_mNA.pth.tar'
neb_pot_path = '../Potentials/Jan_26_100_Train_Results/bestF_epoch75_e3_f23_s23_mNA.pth.tar'
vac_calculator = CHGNetCalculator(CHGNet.from_file(vac_pot_path), use_device='cuda')
neb_calculator = CHGNetCalculator(CHGNet.from_file(neb_pot_path), use_device='cuda')
vac_calculator = CHGNet.from_file(vac_pot_path)
neb_calculator = CHGNet.from_file(neb_pot_path)
create_and_run_neb_files(base_directory, job_path, relax=True, vac_calculator=vac_calculator, neb_calculator=neb_calculator)


In [1]:
print("Done")

Done


# Interpolate Barriers and run the models

In [36]:
relaxed_entries = json.load(open('../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_512/debug_relaxed_entries.json'))


In [46]:
start = relaxed_entries[0][0]['structure']
end = relaxed_entries[0][1]['structure']

In [47]:
# interpolate the structures and use shared calculator 
images = [start]
images += [start.copy() for i in range(3)]
images += [end]

In [None]:
from ase import io
from ase.mep import DyNEB
neb = DyNEB(images,fmax=0.02, dynamic_relaxation=True, scale_fmax=1.,allow_shared_calculator=True)
# Interpolate linearly the potisions of the three middle images:
neb.interpolate()
# Set calculators:
for image in images[1:4]:
    image.calc = calc
# Optimize:
optimizer = LBFGS(neb, trajectory='test.traj')
optimizer.run(fmax=0.02)

# Analyze the Barrier Statistics Results 

In [48]:
v_data = [{'V' : 38, 'Cr' : 19, 'Ti' : 6, 'Mean' : 0.72, 'Std' : 0.16},
 {'V' : 48, 'Cr' : 8, 'Ti' : 7, 'Mean' : 0.61, 'Std' : 0.05},
 {'V' : 38, 'Cr' : 6, 'Ti' : 19, 'Mean' : 0.28, 'Std' : 0.20},
 {'V' : 58, 'Cr' : 3, 'Ti' : 3, 'Mean' : 0.39, 'Std' : 0.05}]

In [80]:
import plotly.figure_factory as ff

# Convert number of atoms to atomic percentages
for data in v_data:
    total_atoms = data['V'] + data['Cr'] + data['Ti']
    data['V'] = data['V'] / total_atoms
    data['Cr'] = data['Cr'] / total_atoms
    data['Ti'] = data['Ti'] / total_atoms

# Create lists of atomic percentages and mean values
V_percentages = [data['V'] for data in v_data]
Cr_percentages = [data['Cr'] for data in v_data]
Ti_percentages = [data['Ti'] for data in v_data]
mean_values = [data['Mean'] for data in v_data]
print(V_percentages)
print(Cr_percentages)
print(Ti_percentages)

import numpy as np

# Convert mean_values to a NumPy array
mean_values = np.array(mean_values)

# Create the ternary plot
fig = ff.create_ternary_contour([V_percentages, Cr_percentages, Ti_percentages], mean_values, pole_labels=['V', 'Cr', 'Ti'])
fig.show()

[0.6031746031746033, 0.7619047619047619, 0.6031746031746031, 0.90625]
[0.3015873015873016, 0.12698412698412698, 0.09523809523809523, 0.046875]
[0.09523809523809523, 0.1111111111111111, 0.30158730158730157, 0.046875]


In [54]:
import plotly.graph_objects as go

# Create the ternary plot
fig = go.Figure(go.Scatterternary({
    'mode': 'markers',
    'a': V_percentages,
    'b': Cr_percentages,
    'c': Ti_percentages,
    'marker': {
        'symbol': 100,
        'color': mean_values,
        'cmax': 1,
        'cmin': 0,
        'colorscale': 'Viridis',
        'colorbar': {'title': 'Mean Value'},
        'line': {'width': 2},
        'size': 10  # Increase the size of the data points
    },
    'text': mean_values,
    'hoverinfo': 'text'
}))

# Rest of the code...

# Set the labels for the axes
fig.update_layout({
    'ternary': {
        'sum': 1,
        'aaxis': {'title': 'V', 'min': 0.01, 'linewidth': 2, 'ticks': 'outside'},
        'baxis': {'title': 'Cr', 'min': 0.01, 'linewidth': 2, 'ticks': 'outside'},
        'caxis': {'title': 'Ti', 'min': 0.01, 'linewidth': 2, 'ticks': 'outside'}
    },
    'showlegend': False,
    'autosize': False,
    'width': 800,
    'height': 800,
    'margin': {'b': 0, 'l': 0, 'r': 0, 't': 0},
    'paper_bgcolor': 'rgba(0,0,0,0)',
    'plot_bgcolor': 'rgba(0,0,0,0)'
})

fig.show()

# Barrier Statistics part 2

In [8]:
import os
import json
from pymatgen.core import Structure
import numpy as np
import matplotlib.pyplot as plt

# Function to calculate barrier energy
def calculate_barrier(energies):
    return max(energies.values()) - energies["0"]

# Function to get composition from a structure dictionary
def get_composition(structure_dict):
    structure = Structure.from_dict(structure_dict)
    return str(structure.composition)

def gather_barrier_data(root_dir, save_results = True, results_filename="barrier_results.json"):
    # Initialize a dictionary to hold barrier results
    barrier_results = {}
    for subdir in os.listdir(root_dir):
        subdir_path = os.path.join(root_dir, subdir)
        if os.path.isdir(subdir_path):
            for subsubdir in os.listdir(subdir_path):
                subsubdir_path = os.path.join(subdir_path, subsubdir)
                if os.path.isdir(subsubdir_path):
                    results_path = os.path.join(subsubdir_path, "results.json")
                    if os.path.isfile(results_path):
                        try:
                            with open(results_path, "r") as f:
                                results = json.load(f)
                        except json.JSONDecodeError as e:
                            print(f"Error decoding JSON in file: {results_path}")
                            print(e)
                            continue  # Skip this file and move to the next
                        except Exception as e:
                            print(f"Unexpected error in file: {results_path}")
                            print(e)
                            continue  # Skip this file and move to the next
                        
                        barrier = calculate_barrier(results["energies"])
                        composition = get_composition(results["structures"]["0"])
                        
                        if composition not in barrier_results:
                            barrier_results[composition] = []
                        barrier_results[composition].append(barrier)

    # Calculate mean and standard deviation for each composition
    for composition, barriers in barrier_results.items():
        mean_barrier = np.mean(barriers)
        std_barrier = np.std(barriers)
        barrier_results[composition] = {
            "barriers": barriers,
            "mean_barrier": mean_barrier,
            "std_barrier": std_barrier
    }

    if save_results:
        # Save the results
        with open(results_filename, "w") as f:
            json.dump(barrier_results, f)
    return barrier_results



In [9]:
# Number of compositions to plot
X = 5
root_path = '../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_125/NEB'
barrier_results = gather_barrier_data(root_path)
# Sort compositions by standard deviation
sorted_compositions = sorted(barrier_results.items(), key=lambda x: x[1]['std_barrier'])

# Plot compositions with lowest standard deviations
for composition, data in sorted_compositions[:X]:
    plt.figure()
    plt.hist(data["barriers"], bins=20, alpha=0.7, label=f'Mean: {data["mean_barrier"]:.2f}, Std: {data["std_barrier"]:.2f}')
    plt.title(f'Barrier Distribution for Composition {composition} (Lowest Std Dev)')
    plt.xlabel('Barrier Energy')
    plt.ylabel('Frequency')
    plt.legend()
    plt.savefig(f'barrier_histogram_lowest_std_{composition}.png')
    plt.close()

# Plot compositions with highest standard deviations
for composition, data in sorted_compositions[-X:]:
    plt.figure()
    plt.hist(data["barriers"], bins=20, alpha=0.7, label=f'Mean: {data["mean_barrier"]:.2f}, Std: {data["std_barrier"]:.2f}')
    plt.title(f'Barrier Distribution for Composition {composition} (Highest Std Dev)')
    plt.xlabel('Barrier Energy')
    plt.ylabel('Frequency')
    plt.legend()
    plt.savefig(f'barrier_histogram_highest_std_{composition}.png')
    plt.close()


Error decoding JSON in file: ../Visualization/Job_Structures/Pre_VASP/VCrTi_Fixed_125/NEB/structure_2_vac_site_6/neb_vac_site_6_to_121/results.json
Extra data: line 1 column 273594 (char 273593)
